diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 13:00:47 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 13:00:47 +0000 |
commit | 2cb7e0aaedad73b076ea18c6900b0e86c5760d79 (patch) | |
tree | da68ca54bb79f4080079bf0828acda937593a4e1 /src/shared | |
parent | Initial commit. (diff) | |
download | systemd-2cb7e0aaedad73b076ea18c6900b0e86c5760d79.tar.xz systemd-2cb7e0aaedad73b076ea18c6900b0e86c5760d79.zip |
Adding upstream version 247.3.upstream/247.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/shared')
278 files changed, 80898 insertions, 0 deletions
diff --git a/src/shared/acl-util.c b/src/shared/acl-util.c new file mode 100644 index 0000000..ef4b883 --- /dev/null +++ b/src/shared/acl-util.c @@ -0,0 +1,437 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdbool.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "acl-util.h" +#include "alloc-util.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" +#include "util.h" + +int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *ret_entry) { + acl_entry_t i; + int r; + + assert(acl); + assert(uid_is_valid(uid)); + assert(ret_entry); + + for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) { + + acl_tag_t tag; + uid_t *u; + bool b; + + if (acl_get_tag_type(i, &tag) < 0) + return -errno; + + if (tag != ACL_USER) + continue; + + u = acl_get_qualifier(i); + if (!u) + return -errno; + + b = *u == uid; + acl_free(u); + + if (b) { + *ret_entry = i; + return 1; + } + } + if (r < 0) + return -errno; + + *ret_entry = NULL; + return 0; +} + +int calc_acl_mask_if_needed(acl_t *acl_p) { + acl_entry_t i; + int r; + bool need = false; + + assert(acl_p); + + for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) { + acl_tag_t tag; + + if (acl_get_tag_type(i, &tag) < 0) + return -errno; + + if (tag == ACL_MASK) + return 0; + + if (IN_SET(tag, ACL_USER, ACL_GROUP)) + need = true; + } + if (r < 0) + return -errno; + + if (need && acl_calc_mask(acl_p) < 0) + return -errno; + + return need; +} + +int add_base_acls_if_needed(acl_t *acl_p, const char *path) { + acl_entry_t i; + int r; + bool have_user_obj = false, have_group_obj = false, have_other = false; + struct stat st; + _cleanup_(acl_freep) acl_t basic = NULL; + + assert(acl_p); + + for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) { + acl_tag_t tag; + + if (acl_get_tag_type(i, &tag) < 0) + return -errno; + + if (tag == ACL_USER_OBJ) + have_user_obj = true; + else if (tag == ACL_GROUP_OBJ) + have_group_obj = true; + else if (tag == ACL_OTHER) + have_other = true; + if (have_user_obj && have_group_obj && have_other) + return 0; + } + if (r < 0) + return -errno; + + r = stat(path, &st); + if (r < 0) + return -errno; + + basic = acl_from_mode(st.st_mode); + if (!basic) + return -errno; + + for (r = acl_get_entry(basic, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(basic, ACL_NEXT_ENTRY, &i)) { + acl_tag_t tag; + acl_entry_t dst; + + if (acl_get_tag_type(i, &tag) < 0) + return -errno; + + if ((tag == ACL_USER_OBJ && have_user_obj) || + (tag == ACL_GROUP_OBJ && have_group_obj) || + (tag == ACL_OTHER && have_other)) + continue; + + r = acl_create_entry(acl_p, &dst); + if (r < 0) + return -errno; + + r = acl_copy_entry(dst, i); + if (r < 0) + return -errno; + } + if (r < 0) + return -errno; + return 0; +} + +int acl_search_groups(const char *path, char ***ret_groups) { + _cleanup_strv_free_ char **g = NULL; + _cleanup_(acl_freep) acl_t acl = NULL; + bool ret = false; + acl_entry_t entry; + int r; + + assert(path); + + acl = acl_get_file(path, ACL_TYPE_DEFAULT); + if (!acl) + return -errno; + + r = acl_get_entry(acl, ACL_FIRST_ENTRY, &entry); + for (;;) { + _cleanup_(acl_free_gid_tpp) gid_t *gid = NULL; + acl_tag_t tag; + + if (r < 0) + return -errno; + if (r == 0) + break; + + if (acl_get_tag_type(entry, &tag) < 0) + return -errno; + + if (tag != ACL_GROUP) + goto next; + + gid = acl_get_qualifier(entry); + if (!gid) + return -errno; + + if (in_gid(*gid) > 0) { + if (!ret_groups) + return true; + + ret = true; + } + + if (ret_groups) { + char *name; + + name = gid_to_name(*gid); + if (!name) + return -ENOMEM; + + r = strv_consume(&g, name); + if (r < 0) + return r; + } + + next: + r = acl_get_entry(acl, ACL_NEXT_ENTRY, &entry); + } + + if (ret_groups) + *ret_groups = TAKE_PTR(g); + + return ret; +} + +int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask) { + _cleanup_free_ char **a = NULL, **d = NULL; /* strings are not freed */ + _cleanup_strv_free_ char **split; + char **entry; + int r = -EINVAL; + _cleanup_(acl_freep) acl_t a_acl = NULL, d_acl = NULL; + + split = strv_split(text, ","); + if (!split) + return -ENOMEM; + + STRV_FOREACH(entry, split) { + char *p; + + p = STARTSWITH_SET(*entry, "default:", "d:"); + if (p) + r = strv_push(&d, p); + else + r = strv_push(&a, *entry); + if (r < 0) + return r; + } + + if (!strv_isempty(a)) { + _cleanup_free_ char *join; + + join = strv_join(a, ","); + if (!join) + return -ENOMEM; + + a_acl = acl_from_text(join); + if (!a_acl) + return -errno; + + if (want_mask) { + r = calc_acl_mask_if_needed(&a_acl); + if (r < 0) + return r; + } + } + + if (!strv_isempty(d)) { + _cleanup_free_ char *join; + + join = strv_join(d, ","); + if (!join) + return -ENOMEM; + + d_acl = acl_from_text(join); + if (!d_acl) + return -errno; + + if (want_mask) { + r = calc_acl_mask_if_needed(&d_acl); + if (r < 0) + return r; + } + } + + *acl_access = TAKE_PTR(a_acl); + *acl_default = TAKE_PTR(d_acl); + + return 0; +} + +static int acl_entry_equal(acl_entry_t a, acl_entry_t b) { + acl_tag_t tag_a, tag_b; + + if (acl_get_tag_type(a, &tag_a) < 0) + return -errno; + + if (acl_get_tag_type(b, &tag_b) < 0) + return -errno; + + if (tag_a != tag_b) + return false; + + switch (tag_a) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + /* can have only one of those */ + return true; + case ACL_USER: { + _cleanup_(acl_free_uid_tpp) uid_t *uid_a = NULL, *uid_b = NULL; + + uid_a = acl_get_qualifier(a); + if (!uid_a) + return -errno; + + uid_b = acl_get_qualifier(b); + if (!uid_b) + return -errno; + + return *uid_a == *uid_b; + } + case ACL_GROUP: { + _cleanup_(acl_free_gid_tpp) gid_t *gid_a = NULL, *gid_b = NULL; + + gid_a = acl_get_qualifier(a); + if (!gid_a) + return -errno; + + gid_b = acl_get_qualifier(b); + if (!gid_b) + return -errno; + + return *gid_a == *gid_b; + } + default: + assert_not_reached("Unknown acl tag type"); + } +} + +static int find_acl_entry(acl_t acl, acl_entry_t entry, acl_entry_t *out) { + acl_entry_t i; + int r; + + for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) { + + r = acl_entry_equal(i, entry); + if (r < 0) + return r; + if (r > 0) { + *out = i; + return 1; + } + } + if (r < 0) + return -errno; + return 0; +} + +int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl) { + _cleanup_(acl_freep) acl_t old; + acl_entry_t i; + int r; + + old = acl_get_file(path, type); + if (!old) + return -errno; + + for (r = acl_get_entry(new, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(new, ACL_NEXT_ENTRY, &i)) { + + acl_entry_t j; + + r = find_acl_entry(old, i, &j); + if (r < 0) + return r; + if (r == 0) + if (acl_create_entry(&old, &j) < 0) + return -errno; + + if (acl_copy_entry(j, i) < 0) + return -errno; + } + if (r < 0) + return -errno; + + *acl = TAKE_PTR(old); + + return 0; +} + +/* POSIX says that ACL_{READ,WRITE,EXECUTE} don't have to be bitmasks. But that is a natural thing to do and + * all extant implementations do it. Let's make sure that we fail verbosely in the (imho unlikely) scenario + * that we get a new implementation that does not satisfy this. */ +assert_cc(!(ACL_READ & ACL_WRITE)); +assert_cc(!(ACL_WRITE & ACL_EXECUTE)); +assert_cc(!(ACL_EXECUTE & ACL_READ)); +assert_cc((unsigned) ACL_READ == ACL_READ); +assert_cc((unsigned) ACL_WRITE == ACL_WRITE); +assert_cc((unsigned) ACL_EXECUTE == ACL_EXECUTE); + +int fd_add_uid_acl_permission( + int fd, + uid_t uid, + unsigned mask) { + + _cleanup_(acl_freep) acl_t acl = NULL; + acl_permset_t permset; + acl_entry_t entry; + int r; + + /* Adds an ACL entry for the specified file to allow the indicated access to the specified + * user. Operates purely incrementally. */ + + assert(fd >= 0); + assert(uid_is_valid(uid)); + + acl = acl_get_fd(fd); + if (!acl) + return -errno; + + r = acl_find_uid(acl, uid, &entry); + if (r <= 0) { + if (acl_create_entry(&acl, &entry) < 0 || + acl_set_tag_type(entry, ACL_USER) < 0 || + acl_set_qualifier(entry, &uid) < 0) + return -errno; + } + + if (acl_get_permset(entry, &permset) < 0) + return -errno; + + if ((mask & ACL_READ) && acl_add_perm(permset, ACL_READ) < 0) + return -errno; + if ((mask & ACL_WRITE) && acl_add_perm(permset, ACL_WRITE) < 0) + return -errno; + if ((mask & ACL_EXECUTE) && acl_add_perm(permset, ACL_EXECUTE) < 0) + return -errno; + + r = calc_acl_mask_if_needed(&acl); + if (r < 0) + return r; + + if (acl_set_fd(fd, acl) < 0) + return -errno; + + return 0; +} diff --git a/src/shared/acl-util.h b/src/shared/acl-util.h new file mode 100644 index 0000000..837e869 --- /dev/null +++ b/src/shared/acl-util.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <errno.h> +#include <unistd.h> + +#if HAVE_ACL +#include <acl/libacl.h> +#include <stdbool.h> +#include <sys/acl.h> + +#include "macro.h" + +int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry); +int calc_acl_mask_if_needed(acl_t *acl_p); +int add_base_acls_if_needed(acl_t *acl_p, const char *path); +int acl_search_groups(const char* path, char ***ret_groups); +int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask); +int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl); +int fd_add_uid_acl_permission(int fd, uid_t uid, unsigned mask); + +/* acl_free takes multiple argument types. + * Multiple cleanup functions are necessary. */ +DEFINE_TRIVIAL_CLEANUP_FUNC(acl_t, acl_free); +#define acl_free_charp acl_free +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, acl_free_charp); +#define acl_free_uid_tp acl_free +DEFINE_TRIVIAL_CLEANUP_FUNC(uid_t*, acl_free_uid_tp); +#define acl_free_gid_tp acl_free +DEFINE_TRIVIAL_CLEANUP_FUNC(gid_t*, acl_free_gid_tp); + +#else +#define ACL_READ 0x04 +#define ACL_WRITE 0x02 +#define ACL_EXECUTE 0x01 + +static inline int fd_add_uid_acl_permission(int fd, uid_t uid, unsigned mask) { + return -EOPNOTSUPP; +} +#endif diff --git a/src/shared/acpi-fpdt.c b/src/shared/acpi-fpdt.c new file mode 100644 index 0000000..1124453 --- /dev/null +++ b/src/shared/acpi-fpdt.c @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> + +#include "acpi-fpdt.h" +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "time-util.h" + +struct acpi_table_header { + char signature[4]; + uint32_t length; + uint8_t revision; + uint8_t checksum; + char oem_id[6]; + char oem_table_id[8]; + uint32_t oem_revision; + char asl_compiler_id[4]; + uint32_t asl_compiler_revision; +} _packed_; + +enum { + ACPI_FPDT_TYPE_BOOT = 0, + ACPI_FPDT_TYPE_S3PERF = 1, +}; + +struct acpi_fpdt_header { + uint16_t type; + uint8_t length; + uint8_t revision; + uint8_t reserved[4]; + uint64_t ptr; +} _packed_; + +struct acpi_fpdt_boot_header { + char signature[4]; + uint32_t length; +} _packed_; + +enum { + ACPI_FPDT_S3PERF_RESUME_REC = 0, + ACPI_FPDT_S3PERF_SUSPEND_REC = 1, + ACPI_FPDT_BOOT_REC = 2, +}; + +struct acpi_fpdt_boot { + uint16_t type; + uint8_t length; + uint8_t revision; + uint8_t reserved[4]; + uint64_t reset_end; + uint64_t load_start; + uint64_t startup_start; + uint64_t exit_services_entry; + uint64_t exit_services_exit; +} _packed; + +int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit) { + _cleanup_free_ char *buf = NULL; + struct acpi_table_header *tbl; + size_t l = 0; + struct acpi_fpdt_header *rec; + int r; + uint64_t ptr = 0; + _cleanup_close_ int fd = -1; + struct acpi_fpdt_boot_header hbrec; + struct acpi_fpdt_boot brec; + + r = read_full_file("/sys/firmware/acpi/tables/FPDT", &buf, &l); + if (r < 0) + return r; + + if (l < sizeof(struct acpi_table_header) + sizeof(struct acpi_fpdt_header)) + return -EINVAL; + + tbl = (struct acpi_table_header *)buf; + if (l != tbl->length) + return -EINVAL; + + if (memcmp(tbl->signature, "FPDT", 4) != 0) + return -EINVAL; + + /* find Firmware Basic Boot Performance Pointer Record */ + for (rec = (struct acpi_fpdt_header *)(buf + sizeof(struct acpi_table_header)); + (char *)rec < buf + l; + rec = (struct acpi_fpdt_header *)((char *)rec + rec->length)) { + if (rec->length <= 0) + break; + if (rec->type != ACPI_FPDT_TYPE_BOOT) + continue; + if (rec->length != sizeof(struct acpi_fpdt_header)) + continue; + + ptr = rec->ptr; + break; + } + + if (ptr == 0) + return -ENODATA; + + /* read Firmware Basic Boot Performance Data Record */ + fd = open("/dev/mem", O_CLOEXEC|O_RDONLY); + if (fd < 0) + return -errno; + + l = pread(fd, &hbrec, sizeof(struct acpi_fpdt_boot_header), ptr); + if (l != sizeof(struct acpi_fpdt_boot_header)) + return -EINVAL; + + if (memcmp(hbrec.signature, "FBPT", 4) != 0) + return -EINVAL; + + if (hbrec.length < sizeof(struct acpi_fpdt_boot_header) + sizeof(struct acpi_fpdt_boot)) + return -EINVAL; + + l = pread(fd, &brec, sizeof(struct acpi_fpdt_boot), ptr + sizeof(struct acpi_fpdt_boot_header)); + if (l != sizeof(struct acpi_fpdt_boot)) + return -EINVAL; + + if (brec.length != sizeof(struct acpi_fpdt_boot)) + return -EINVAL; + + if (brec.type != ACPI_FPDT_BOOT_REC) + return -EINVAL; + + if (brec.exit_services_exit == 0) + /* Non-UEFI compatible boot. */ + return -ENODATA; + + if (brec.startup_start == 0 || brec.exit_services_exit < brec.startup_start) + return -EINVAL; + if (brec.exit_services_exit > NSEC_PER_HOUR) + return -EINVAL; + + if (loader_start) + *loader_start = brec.startup_start / 1000; + if (loader_exit) + *loader_exit = brec.exit_services_exit / 1000; + + return 0; +} diff --git a/src/shared/acpi-fpdt.h b/src/shared/acpi-fpdt.h new file mode 100644 index 0000000..9eef92b --- /dev/null +++ b/src/shared/acpi-fpdt.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <time-util.h> + +int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit); diff --git a/src/shared/apparmor-util.c b/src/shared/apparmor-util.c new file mode 100644 index 0000000..68e1c55 --- /dev/null +++ b/src/shared/apparmor-util.c @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stddef.h> + +#include "alloc-util.h" +#include "apparmor-util.h" +#include "fileio.h" +#include "parse-util.h" + +bool mac_apparmor_use(void) { + static int cached_use = -1; + + if (cached_use < 0) { + _cleanup_free_ char *p = NULL; + + cached_use = + read_one_line_file("/sys/module/apparmor/parameters/enabled", &p) >= 0 && + parse_boolean(p) > 0; + } + + return cached_use; +} diff --git a/src/shared/apparmor-util.h b/src/shared/apparmor-util.h new file mode 100644 index 0000000..8007aeb --- /dev/null +++ b/src/shared/apparmor-util.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +bool mac_apparmor_use(void); diff --git a/src/shared/ask-password-api.c b/src/shared/ask-password-api.c new file mode 100644 index 0000000..8d66f9f --- /dev/null +++ b/src/shared/ask-password-api.c @@ -0,0 +1,1050 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <poll.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/inotify.h> +#include <sys/signalfd.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/uio.h> +#include <sys/un.h> +#include <termios.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "ask-password-api.h" +#include "def.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "fs-util.h" +#include "io-util.h" +#include "locale-util.h" +#include "log.h" +#include "macro.h" +#include "memory-util.h" +#include "missing_syscall.h" +#include "mkdir.h" +#include "process-util.h" +#include "random-util.h" +#include "signal-util.h" +#include "socket-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "time-util.h" +#include "tmpfile-util.h" +#include "umask-util.h" +#include "utf8.h" + +#define KEYRING_TIMEOUT_USEC ((5 * USEC_PER_MINUTE) / 2) + +static int lookup_key(const char *keyname, key_serial_t *ret) { + key_serial_t serial; + + assert(keyname); + assert(ret); + + serial = request_key("user", keyname, NULL, 0); + if (serial == -1) + return negative_errno(); + + *ret = serial; + return 0; +} + +static int retrieve_key(key_serial_t serial, char ***ret) { + size_t nfinal, m = 100; + char **l; + _cleanup_(erase_and_freep) char *pfinal = NULL; + + assert(ret); + + for (;;) { + _cleanup_(erase_and_freep) char *p = NULL; + long n; + + p = new(char, m); + if (!p) + return -ENOMEM; + + n = keyctl(KEYCTL_READ, (unsigned long) serial, (unsigned long) p, (unsigned long) m, 0); + if (n < 0) + return -errno; + if ((size_t) n <= m) { + nfinal = (size_t) n; + pfinal = TAKE_PTR(p); + break; + } + + if (m > LONG_MAX / 2) /* overflow check */ + return -ENOMEM; + m *= 2; + } + + l = strv_parse_nulstr(pfinal, nfinal); + if (!l) + return -ENOMEM; + + *ret = l; + return 0; +} + +static int add_to_keyring(const char *keyname, AskPasswordFlags flags, char **passwords) { + _cleanup_strv_free_erase_ char **l = NULL; + _cleanup_(erase_and_freep) char *p = NULL; + key_serial_t serial; + size_t n; + int r; + + assert(keyname); + + if (!(flags & ASK_PASSWORD_PUSH_CACHE)) + return 0; + if (strv_isempty(passwords)) + return 0; + + r = lookup_key(keyname, &serial); + if (r >= 0) { + r = retrieve_key(serial, &l); + if (r < 0) + return r; + } else if (r != -ENOKEY) + return r; + + r = strv_extend_strv(&l, passwords, true); + if (r <= 0) + return r; + + r = strv_make_nulstr(l, &p, &n); + if (r < 0) + return r; + + serial = add_key("user", keyname, p, n, KEY_SPEC_USER_KEYRING); + if (serial == -1) + return -errno; + + if (keyctl(KEYCTL_SET_TIMEOUT, + (unsigned long) serial, + (unsigned long) DIV_ROUND_UP(KEYRING_TIMEOUT_USEC, USEC_PER_SEC), 0, 0) < 0) + log_debug_errno(errno, "Failed to adjust kernel keyring key timeout: %m"); + + /* Tell everyone to check the keyring */ + (void) touch("/run/systemd/ask-password"); + + log_debug("Added key to kernel keyring as %" PRIi32 ".", serial); + + return 1; +} + +static int add_to_keyring_and_log(const char *keyname, AskPasswordFlags flags, char **passwords) { + int r; + + assert(keyname); + + r = add_to_keyring(keyname, flags, passwords); + if (r < 0) + return log_debug_errno(r, "Failed to add password to kernel keyring: %m"); + + return 0; +} + +static int ask_password_keyring(const char *keyname, AskPasswordFlags flags, char ***ret) { + + key_serial_t serial; + int r; + + assert(keyname); + assert(ret); + + if (!(flags & ASK_PASSWORD_ACCEPT_CACHED)) + return -EUNATCH; + + r = lookup_key(keyname, &serial); + if (ERRNO_IS_NOT_SUPPORTED(r) || r == -EPERM) /* when retrieving the distinction between "kernel or + * container manager don't support or allow this" and + * "no matching key known" doesn't matter. Note that we + * propagate EACCESS here (even if EPERM not) since + * that is used if the keyring is available but we lack + * access to the key. */ + return -ENOKEY; + if (r < 0) + return r; + + return retrieve_key(serial, ret); +} + +static int backspace_chars(int ttyfd, size_t p) { + if (ttyfd < 0) + return 0; + + _cleanup_free_ char *buf = malloc_multiply(3, p); + if (!buf) + return log_oom(); + + for (size_t i = 0; i < p; i++) + memcpy(buf + 3 * i, "\b \b", 3); + + return loop_write(ttyfd, buf, 3*p, false); +} + +static int backspace_string(int ttyfd, const char *str) { + assert(str); + + /* Backspaces through enough characters to entirely undo printing of the specified string. */ + + if (ttyfd < 0) + return 0; + + size_t m = utf8_n_codepoints(str); + if (m == (size_t) -1) + m = strlen(str); /* Not a valid UTF-8 string? If so, let's backspace the number of bytes + * output. Most likely this happened because we are not in an UTF-8 locale, + * and in that case that is the correct thing to do. And even if it's not, + * terminals tend to stop backspacing at the leftmost column, hence + * backspacing too much should be mostly OK. */ + + return backspace_chars(ttyfd, m); +} + +int ask_password_plymouth( + const char *message, + usec_t until, + AskPasswordFlags flags, + const char *flag_file, + char ***ret) { + + static const union sockaddr_union sa = PLYMOUTH_SOCKET; + _cleanup_close_ int fd = -1, notify = -1; + _cleanup_free_ char *packet = NULL; + ssize_t k; + int r, n; + struct pollfd pollfd[2] = {}; + char buffer[LINE_MAX]; + size_t p = 0; + enum { + POLL_SOCKET, + POLL_INOTIFY + }; + + assert(ret); + + if (!message) + message = "Password:"; + + if (flag_file) { + notify = inotify_init1(IN_CLOEXEC|IN_NONBLOCK); + if (notify < 0) + return -errno; + + r = inotify_add_watch(notify, flag_file, IN_ATTRIB); /* for the link count */ + if (r < 0) + return -errno; + } + + fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (fd < 0) + return -errno; + + r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)); + if (r < 0) + return -errno; + + if (flags & ASK_PASSWORD_ACCEPT_CACHED) { + packet = strdup("c"); + n = 1; + } else if (asprintf(&packet, "*\002%c%s%n", (int) (strlen(message) + 1), message, &n) < 0) + packet = NULL; + if (!packet) + return -ENOMEM; + + r = loop_write(fd, packet, n + 1, true); + if (r < 0) + return r; + + pollfd[POLL_SOCKET].fd = fd; + pollfd[POLL_SOCKET].events = POLLIN; + pollfd[POLL_INOTIFY].fd = notify; + pollfd[POLL_INOTIFY].events = POLLIN; + + for (;;) { + int sleep_for = -1, j; + + if (until > 0) { + usec_t y; + + y = now(CLOCK_MONOTONIC); + + if (y > until) { + r = -ETIME; + goto finish; + } + + sleep_for = (int) ((until - y) / USEC_PER_MSEC); + } + + if (flag_file && access(flag_file, F_OK) < 0) { + r = -errno; + goto finish; + } + + j = poll(pollfd, notify >= 0 ? 2 : 1, sleep_for); + if (j < 0) { + if (errno == EINTR) + continue; + + r = -errno; + goto finish; + } else if (j == 0) { + r = -ETIME; + goto finish; + } + + if (pollfd[POLL_SOCKET].revents & POLLNVAL || + (notify >= 0 && pollfd[POLL_INOTIFY].revents & POLLNVAL)) { + r = -EBADF; + goto finish; + } + + if (notify >= 0 && pollfd[POLL_INOTIFY].revents != 0) + (void) flush_fd(notify); + + if (pollfd[POLL_SOCKET].revents == 0) + continue; + + k = read(fd, buffer + p, sizeof(buffer) - p); + if (k < 0) { + if (IN_SET(errno, EINTR, EAGAIN)) + continue; + + r = -errno; + goto finish; + } else if (k == 0) { + r = -EIO; + goto finish; + } + + p += k; + + if (buffer[0] == 5) { + + if (flags & ASK_PASSWORD_ACCEPT_CACHED) { + /* Hmm, first try with cached + * passwords failed, so let's retry + * with a normal password request */ + packet = mfree(packet); + + if (asprintf(&packet, "*\002%c%s%n", (int) (strlen(message) + 1), message, &n) < 0) { + r = -ENOMEM; + goto finish; + } + + r = loop_write(fd, packet, n+1, true); + if (r < 0) + goto finish; + + flags &= ~ASK_PASSWORD_ACCEPT_CACHED; + p = 0; + continue; + } + + /* No password, because UI not shown */ + r = -ENOENT; + goto finish; + + } else if (IN_SET(buffer[0], 2, 9)) { + uint32_t size; + char **l; + + /* One or more answers */ + if (p < 5) + continue; + + memcpy(&size, buffer+1, sizeof(size)); + size = le32toh(size); + if (size + 5 > sizeof(buffer)) { + r = -EIO; + goto finish; + } + + if (p-5 < size) + continue; + + l = strv_parse_nulstr(buffer + 5, size); + if (!l) { + r = -ENOMEM; + goto finish; + } + + *ret = l; + break; + + } else { + /* Unknown packet */ + r = -EIO; + goto finish; + } + } + + r = 0; + +finish: + explicit_bzero_safe(buffer, sizeof(buffer)); + return r; +} + +#define NO_ECHO "(no echo) " +#define PRESS_TAB "(press TAB for no echo) " +#define SKIPPED "(skipped)" + +int ask_password_tty( + int ttyfd, + const char *message, + const char *keyname, + usec_t until, + AskPasswordFlags flags, + const char *flag_file, + char ***ret) { + + enum { + POLL_TTY, + POLL_INOTIFY, + _POLL_MAX, + }; + + bool reset_tty = false, dirty = false, use_color = false, press_tab_visible = false; + _cleanup_close_ int cttyfd = -1, notify = -1; + struct termios old_termios, new_termios; + char passphrase[LINE_MAX + 1] = {}, *x; + _cleanup_strv_free_erase_ char **l = NULL; + struct pollfd pollfd[_POLL_MAX]; + size_t p = 0, codepoint = 0; + int r; + + assert(ret); + + if (flags & ASK_PASSWORD_NO_TTY) + return -EUNATCH; + + if (!message) + message = "Password:"; + + if (emoji_enabled()) + message = strjoina(special_glyph(SPECIAL_GLYPH_LOCK_AND_KEY), " ", message); + + if (flag_file || ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname)) { + notify = inotify_init1(IN_CLOEXEC|IN_NONBLOCK); + if (notify < 0) + return -errno; + } + if (flag_file) { + if (inotify_add_watch(notify, flag_file, IN_ATTRIB /* for the link count */) < 0) + return -errno; + } + if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) { + r = ask_password_keyring(keyname, flags, ret); + if (r >= 0) + return 0; + else if (r != -ENOKEY) + return r; + + if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0) + return -errno; + } + + /* If the caller didn't specify a TTY, then use the controlling tty, if we can. */ + if (ttyfd < 0) + ttyfd = cttyfd = open("/dev/tty", O_RDWR|O_NOCTTY|O_CLOEXEC); + + if (ttyfd >= 0) { + if (tcgetattr(ttyfd, &old_termios) < 0) + return -errno; + + if (flags & ASK_PASSWORD_CONSOLE_COLOR) + use_color = dev_console_colors_enabled(); + else + use_color = colors_enabled(); + + if (use_color) + (void) loop_write(ttyfd, ANSI_HIGHLIGHT, STRLEN(ANSI_HIGHLIGHT), false); + + (void) loop_write(ttyfd, message, strlen(message), false); + (void) loop_write(ttyfd, " ", 1, false); + + if (!(flags & ASK_PASSWORD_SILENT) && !(flags & ASK_PASSWORD_ECHO)) { + if (use_color) + (void) loop_write(ttyfd, ANSI_GREY, STRLEN(ANSI_GREY), false); + (void) loop_write(ttyfd, PRESS_TAB, strlen(PRESS_TAB), false); + press_tab_visible = true; + } + + if (use_color) + (void) loop_write(ttyfd, ANSI_NORMAL, STRLEN(ANSI_NORMAL), false); + + new_termios = old_termios; + new_termios.c_lflag &= ~(ICANON|ECHO); + new_termios.c_cc[VMIN] = 1; + new_termios.c_cc[VTIME] = 0; + + if (tcsetattr(ttyfd, TCSADRAIN, &new_termios) < 0) { + r = -errno; + goto finish; + } + + reset_tty = true; + } + + pollfd[POLL_TTY] = (struct pollfd) { + .fd = ttyfd >= 0 ? ttyfd : STDIN_FILENO, + .events = POLLIN, + }; + pollfd[POLL_INOTIFY] = (struct pollfd) { + .fd = notify, + .events = POLLIN, + }; + + for (;;) { + _cleanup_(erase_char) char c; + int sleep_for = -1, k; + ssize_t n; + + if (until > 0) { + usec_t y; + + y = now(CLOCK_MONOTONIC); + + if (y > until) { + r = -ETIME; + goto finish; + } + + sleep_for = (int) DIV_ROUND_UP(until - y, USEC_PER_MSEC); + } + + if (flag_file) + if (access(flag_file, F_OK) < 0) { + r = -errno; + goto finish; + } + + k = poll(pollfd, notify >= 0 ? 2 : 1, sleep_for); + if (k < 0) { + if (errno == EINTR) + continue; + + r = -errno; + goto finish; + } else if (k == 0) { + r = -ETIME; + goto finish; + } + + if ((pollfd[POLL_TTY].revents & POLLNVAL) || + (notify >= 0 && (pollfd[POLL_INOTIFY].revents & POLLNVAL))) { + r = -EBADF; + goto finish; + } + + if (notify >= 0 && pollfd[POLL_INOTIFY].revents != 0 && keyname) { + (void) flush_fd(notify); + + r = ask_password_keyring(keyname, flags, ret); + if (r >= 0) { + r = 0; + goto finish; + } else if (r != -ENOKEY) + goto finish; + } + + if (pollfd[POLL_TTY].revents == 0) + continue; + + n = read(ttyfd >= 0 ? ttyfd : STDIN_FILENO, &c, 1); + if (n < 0) { + if (IN_SET(errno, EINTR, EAGAIN)) + continue; + + r = -errno; + goto finish; + + } + + if (press_tab_visible) { + assert(ttyfd >= 0); + backspace_chars(ttyfd, strlen(PRESS_TAB)); + press_tab_visible = false; + } + + /* We treat EOF, newline and NUL byte all as valid end markers */ + if (n == 0 || c == '\n' || c == 0) + break; + + if (c == 4) { /* C-d also known as EOT */ + if (ttyfd >= 0) + (void) loop_write(ttyfd, SKIPPED, strlen(SKIPPED), false); + + goto skipped; + } + + if (c == 21) { /* C-u */ + + if (!(flags & ASK_PASSWORD_SILENT)) + (void) backspace_string(ttyfd, passphrase); + + explicit_bzero_safe(passphrase, sizeof(passphrase)); + p = codepoint = 0; + + } else if (IN_SET(c, '\b', 127)) { + + if (p > 0) { + size_t q; + + if (!(flags & ASK_PASSWORD_SILENT)) + (void) backspace_chars(ttyfd, 1); + + /* Remove a full UTF-8 codepoint from the end. For that, figure out where the + * last one begins */ + q = 0; + for (;;) { + size_t z; + + z = utf8_encoded_valid_unichar(passphrase + q, (size_t) -1); + if (z == 0) { + q = (size_t) -1; /* Invalid UTF8! */ + break; + } + + if (q + z >= p) /* This one brings us over the edge */ + break; + + q += z; + } + + p = codepoint = q == (size_t) -1 ? p - 1 : q; + explicit_bzero_safe(passphrase + p, sizeof(passphrase) - p); + + } else if (!dirty && !(flags & ASK_PASSWORD_SILENT)) { + + flags |= ASK_PASSWORD_SILENT; + + /* There are two ways to enter silent mode. Either by pressing backspace as + * first key (and only as first key), or ... */ + + if (ttyfd >= 0) + (void) loop_write(ttyfd, NO_ECHO, strlen(NO_ECHO), false); + + } else if (ttyfd >= 0) + (void) loop_write(ttyfd, "\a", 1, false); + + } else if (c == '\t' && !(flags & ASK_PASSWORD_SILENT)) { + + (void) backspace_string(ttyfd, passphrase); + flags |= ASK_PASSWORD_SILENT; + + /* ... or by pressing TAB at any time. */ + + if (ttyfd >= 0) + (void) loop_write(ttyfd, NO_ECHO, strlen(NO_ECHO), false); + + } else if (p >= sizeof(passphrase)-1) { + + /* Reached the size limit */ + if (ttyfd >= 0) + (void) loop_write(ttyfd, "\a", 1, false); + + } else { + passphrase[p++] = c; + + if (!(flags & ASK_PASSWORD_SILENT) && ttyfd >= 0) { + /* Check if we got a complete UTF-8 character now. If so, let's output one '*'. */ + n = utf8_encoded_valid_unichar(passphrase + codepoint, (size_t) -1); + if (n >= 0) { + if (flags & ASK_PASSWORD_ECHO) + (void) loop_write(ttyfd, passphrase + codepoint, n, false); + else + (void) loop_write(ttyfd, "*", 1, false); + codepoint = p; + } + } + + dirty = true; + } + } + + x = strndup(passphrase, p); + explicit_bzero_safe(passphrase, sizeof(passphrase)); + if (!x) { + r = -ENOMEM; + goto finish; + } + + r = strv_consume(&l, x); + if (r < 0) + goto finish; + +skipped: + if (strv_isempty(l)) + r = log_debug_errno(SYNTHETIC_ERRNO(ECANCELED), "Password query was cancelled."); + else { + if (keyname) + (void) add_to_keyring_and_log(keyname, flags, l); + + *ret = TAKE_PTR(l); + r = 0; + } + +finish: + if (ttyfd >= 0 && reset_tty) { + (void) loop_write(ttyfd, "\n", 1, false); + (void) tcsetattr(ttyfd, TCSADRAIN, &old_termios); + } + + return r; +} + +static int create_socket(char **ret) { + _cleanup_free_ char *path = NULL; + union sockaddr_union sa; + socklen_t sa_len; + _cleanup_close_ int fd = -1; + int r; + + assert(ret); + + fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (fd < 0) + return -errno; + + if (asprintf(&path, "/run/systemd/ask-password/sck.%" PRIx64, random_u64()) < 0) + return -ENOMEM; + + r = sockaddr_un_set_path(&sa.un, path); + if (r < 0) + return r; + sa_len = r; + + RUN_WITH_UMASK(0177) + if (bind(fd, &sa.sa, sa_len) < 0) + return -errno; + + r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true); + if (r < 0) + return r; + + *ret = TAKE_PTR(path); + return TAKE_FD(fd); +} + +int ask_password_agent( + const char *message, + const char *icon, + const char *id, + const char *keyname, + usec_t until, + AskPasswordFlags flags, + char ***ret) { + + enum { + FD_SOCKET, + FD_SIGNAL, + FD_INOTIFY, + _FD_MAX + }; + + _cleanup_close_ int socket_fd = -1, signal_fd = -1, notify = -1, fd = -1; + char temp[] = "/run/systemd/ask-password/tmp.XXXXXX"; + char final[sizeof(temp)] = ""; + _cleanup_free_ char *socket_name = NULL; + _cleanup_strv_free_erase_ char **l = NULL; + _cleanup_fclose_ FILE *f = NULL; + struct pollfd pollfd[_FD_MAX]; + sigset_t mask, oldmask; + int r; + + assert(ret); + + if (flags & ASK_PASSWORD_NO_AGENT) + return -EUNATCH; + + assert_se(sigemptyset(&mask) >= 0); + assert_se(sigset_add_many(&mask, SIGINT, SIGTERM, -1) >= 0); + assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) >= 0); + + (void) mkdir_p_label("/run/systemd/ask-password", 0755); + + if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) { + r = ask_password_keyring(keyname, flags, ret); + if (r >= 0) { + r = 0; + goto finish; + } else if (r != -ENOKEY) + goto finish; + + notify = inotify_init1(IN_CLOEXEC | IN_NONBLOCK); + if (notify < 0) { + r = -errno; + goto finish; + } + if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0) { + r = -errno; + goto finish; + } + } + + fd = mkostemp_safe(temp); + if (fd < 0) { + r = fd; + goto finish; + } + + (void) fchmod(fd, 0644); + + f = take_fdopen(&fd, "w"); + if (!f) { + r = -errno; + goto finish; + } + + signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC); + if (signal_fd < 0) { + r = -errno; + goto finish; + } + + socket_fd = create_socket(&socket_name); + if (socket_fd < 0) { + r = socket_fd; + goto finish; + } + + fprintf(f, + "[Ask]\n" + "PID="PID_FMT"\n" + "Socket=%s\n" + "AcceptCached=%i\n" + "Echo=%i\n" + "NotAfter="USEC_FMT"\n", + getpid_cached(), + socket_name, + (flags & ASK_PASSWORD_ACCEPT_CACHED) ? 1 : 0, + (flags & ASK_PASSWORD_ECHO) ? 1 : 0, + until); + + if (message) + fprintf(f, "Message=%s\n", message); + + if (icon) + fprintf(f, "Icon=%s\n", icon); + + if (id) + fprintf(f, "Id=%s\n", id); + + r = fflush_and_check(f); + if (r < 0) + goto finish; + + memcpy(final, temp, sizeof(temp)); + + final[sizeof(final)-11] = 'a'; + final[sizeof(final)-10] = 's'; + final[sizeof(final)-9] = 'k'; + + if (rename(temp, final) < 0) { + r = -errno; + goto finish; + } + + zero(pollfd); + pollfd[FD_SOCKET].fd = socket_fd; + pollfd[FD_SOCKET].events = POLLIN; + pollfd[FD_SIGNAL].fd = signal_fd; + pollfd[FD_SIGNAL].events = POLLIN; + pollfd[FD_INOTIFY].fd = notify; + pollfd[FD_INOTIFY].events = POLLIN; + + for (;;) { + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control; + char passphrase[LINE_MAX+1]; + struct iovec iovec; + struct ucred *ucred; + ssize_t n; + int k; + usec_t t; + + t = now(CLOCK_MONOTONIC); + + if (until > 0 && until <= t) { + r = -ETIME; + goto finish; + } + + k = poll(pollfd, notify >= 0 ? _FD_MAX : _FD_MAX - 1, until > 0 ? (int) ((until-t)/USEC_PER_MSEC) : -1); + if (k < 0) { + if (errno == EINTR) + continue; + + r = -errno; + goto finish; + } + + if (k <= 0) { + r = -ETIME; + goto finish; + } + + if (pollfd[FD_SOCKET].revents & POLLNVAL || + pollfd[FD_SIGNAL].revents & POLLNVAL || + (notify >= 0 && pollfd[FD_INOTIFY].revents & POLLNVAL)) { + r = -EBADF; + goto finish; + } + + if (pollfd[FD_SIGNAL].revents & POLLIN) { + r = -EINTR; + goto finish; + } + + if (notify >= 0 && pollfd[FD_INOTIFY].revents != 0) { + (void) flush_fd(notify); + + r = ask_password_keyring(keyname, flags, ret); + if (r >= 0) { + r = 0; + goto finish; + } else if (r != -ENOKEY) + goto finish; + } + + if (pollfd[FD_SOCKET].revents == 0) + continue; + + if (pollfd[FD_SOCKET].revents != POLLIN) { + r = -EIO; + goto finish; + } + + iovec = IOVEC_MAKE(passphrase, sizeof(passphrase)); + + struct msghdr msghdr = { + .msg_iov = &iovec, + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + }; + + n = recvmsg_safe(socket_fd, &msghdr, 0); + if (IN_SET(n, -EAGAIN, -EINTR)) + continue; + if (n == -EXFULL) { + log_debug("Got message with truncated control data, ignoring."); + continue; + } + if (n < 0) { + r = (int) n; + goto finish; + } + + cmsg_close_all(&msghdr); + + if (n <= 0) { + log_debug("Message too short"); + continue; + } + + ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred); + if (!ucred) { + log_debug("Received message without credentials. Ignoring."); + continue; + } + + if (ucred->uid != 0) { + log_debug("Got request from unprivileged user. Ignoring."); + continue; + } + + if (passphrase[0] == '+') { + /* An empty message refers to the empty password */ + if (n == 1) + l = strv_new(""); + else + l = strv_parse_nulstr(passphrase+1, n-1); + explicit_bzero_safe(passphrase, n); + if (!l) { + r = -ENOMEM; + goto finish; + } + + if (strv_isempty(l)) { + l = strv_free(l); + log_debug("Invalid packet"); + continue; + } + + break; + } + + if (passphrase[0] == '-') { + r = -ECANCELED; + goto finish; + } + + log_debug("Invalid packet"); + } + + if (keyname) + (void) add_to_keyring_and_log(keyname, flags, l); + + *ret = TAKE_PTR(l); + r = 0; + +finish: + if (socket_name) + (void) unlink(socket_name); + + (void) unlink(temp); + + if (final[0]) + (void) unlink(final); + + assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) == 0); + return r; +} + +int ask_password_auto( + const char *message, + const char *icon, + const char *id, + const char *keyname, + usec_t until, + AskPasswordFlags flags, + char ***ret) { + + int r; + + assert(ret); + + if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && + keyname && + ((flags & ASK_PASSWORD_NO_TTY) || !isatty(STDIN_FILENO)) && + (flags & ASK_PASSWORD_NO_AGENT)) { + r = ask_password_keyring(keyname, flags, ret); + if (r != -ENOKEY) + return r; + } + + if (!(flags & ASK_PASSWORD_NO_TTY) && isatty(STDIN_FILENO)) + return ask_password_tty(-1, message, keyname, until, flags, NULL, ret); + + if (!(flags & ASK_PASSWORD_NO_AGENT)) + return ask_password_agent(message, icon, id, keyname, until, flags, ret); + + return -EUNATCH; +} diff --git a/src/shared/ask-password-api.h b/src/shared/ask-password-api.h new file mode 100644 index 0000000..7aac5e5 --- /dev/null +++ b/src/shared/ask-password-api.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "time-util.h" + +typedef enum AskPasswordFlags { + ASK_PASSWORD_ACCEPT_CACHED = 1 << 0, + ASK_PASSWORD_PUSH_CACHE = 1 << 1, + ASK_PASSWORD_ECHO = 1 << 2, /* show the password literally while reading, instead of "*" */ + ASK_PASSWORD_SILENT = 1 << 3, /* do no show any password at all while reading */ + ASK_PASSWORD_NO_TTY = 1 << 4, + ASK_PASSWORD_NO_AGENT = 1 << 5, + ASK_PASSWORD_CONSOLE_COLOR = 1 << 6, /* Use color if /dev/console points to a console that supports color */ +} AskPasswordFlags; + +int ask_password_tty(int tty_fd, const char *message, const char *keyname, usec_t until, AskPasswordFlags flags, const char *flag_file, char ***ret); +int ask_password_plymouth(const char *message, usec_t until, AskPasswordFlags flags, const char *flag_file, char ***ret); +int ask_password_agent(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret); +int ask_password_auto(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret); diff --git a/src/shared/barrier.c b/src/shared/barrier.c new file mode 100644 index 0000000..9c93d61 --- /dev/null +++ b/src/shared/barrier.c @@ -0,0 +1,398 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <sys/eventfd.h> +#include <sys/types.h> +#include <unistd.h> + +#include "barrier.h" +#include "fd-util.h" +#include "macro.h" + +/** + * Barriers + * This barrier implementation provides a simple synchronization method based + * on file-descriptors that can safely be used between threads and processes. A + * barrier object contains 2 shared counters based on eventfd. Both processes + * can now place barriers and wait for the other end to reach a random or + * specific barrier. + * Barriers are numbered, so you can either wait for the other end to reach any + * barrier or the last barrier that you placed. This way, you can use barriers + * for one-way *and* full synchronization. Note that even-though barriers are + * numbered, these numbers are internal and recycled once both sides reached the + * same barrier (implemented as a simple signed counter). It is thus not + * possible to address barriers by their ID. + * + * Barrier-API: Both ends can place as many barriers via barrier_place() as + * they want and each pair of barriers on both sides will be implicitly linked. + * Each side can use the barrier_wait/sync_*() family of calls to wait for the + * other side to place a specific barrier. barrier_wait_next() waits until the + * other side calls barrier_place(). No links between the barriers are + * considered and this simply serves as most basic asynchronous barrier. + * barrier_sync_next() is like barrier_wait_next() and waits for the other side + * to place their next barrier via barrier_place(). However, it only waits for + * barriers that are linked to a barrier we already placed. If the other side + * already placed more barriers than we did, barrier_sync_next() returns + * immediately. + * barrier_sync() extends barrier_sync_next() and waits until the other end + * placed as many barriers via barrier_place() as we did. If they already placed + * as many as we did (or more), it returns immediately. + * + * Additionally to basic barriers, an abortion event is available. + * barrier_abort() places an abortion event that cannot be undone. An abortion + * immediately cancels all placed barriers and replaces them. Any running and + * following wait/sync call besides barrier_wait_abortion() will immediately + * return false on both sides (otherwise, they always return true). + * barrier_abort() can be called multiple times on both ends and will be a + * no-op if already called on this side. + * barrier_wait_abortion() can be used to wait for the other side to call + * barrier_abort() and is the only wait/sync call that does not return + * immediately if we aborted outself. It only returns once the other side + * called barrier_abort(). + * + * Barriers can be used for in-process and inter-process synchronization. + * However, for in-process synchronization you could just use mutexes. + * Therefore, main target is IPC and we require both sides to *not* share the FD + * table. If that's given, barriers provide target tracking: If the remote side + * exit()s, an abortion event is implicitly queued on the other side. This way, + * a sync/wait call will be woken up if the remote side crashed or exited + * unexpectedly. However, note that these abortion events are only queued if the + * barrier-queue has been drained. Therefore, it is safe to place a barrier and + * exit. The other side can safely wait on the barrier even though the exit + * queued an abortion event. Usually, the abortion event would overwrite the + * barrier, however, that's not true for exit-abortion events. Those are only + * queued if the barrier-queue is drained (thus, the receiving side has placed + * more barriers than the remote side). + */ + +/** + * barrier_create() - Initialize a barrier object + * @obj: barrier to initialize + * + * This initializes a barrier object. The caller is responsible of allocating + * the memory and keeping it valid. The memory does not have to be zeroed + * beforehand. + * Two eventfd objects are allocated for each barrier. If allocation fails, an + * error is returned. + * + * If this function fails, the barrier is reset to an invalid state so it is + * safe to call barrier_destroy() on the object regardless whether the + * initialization succeeded or not. + * + * The caller is responsible to destroy the object via barrier_destroy() before + * releasing the underlying memory. + * + * Returns: 0 on success, negative error code on failure. + */ +int barrier_create(Barrier *b) { + _cleanup_(barrier_destroyp) Barrier *staging = b; + int r; + + assert(b); + + b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (b->me < 0) + return -errno; + + b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (b->them < 0) + return -errno; + + r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK); + if (r < 0) + return -errno; + + staging = NULL; + return 0; +} + +/** + * barrier_destroy() - Destroy a barrier object + * @b: barrier to destroy or NULL + * + * This destroys a barrier object that has previously been passed to + * barrier_create(). The object is released and reset to invalid + * state. Therefore, it is safe to call barrier_destroy() multiple + * times or even if barrier_create() failed. However, barrier must be + * always initialized with BARRIER_NULL. + * + * If @b is NULL, this is a no-op. + */ +void barrier_destroy(Barrier *b) { + if (!b) + return; + + b->me = safe_close(b->me); + b->them = safe_close(b->them); + safe_close_pair(b->pipe); + b->barriers = 0; +} + +/** + * barrier_set_role() - Set the local role of the barrier + * @b: barrier to operate on + * @role: role to set on the barrier + * + * This sets the roles on a barrier object. This is needed to know + * which side of the barrier you're on. Usually, the parent creates + * the barrier via barrier_create() and then calls fork() or clone(). + * Therefore, the FDs are duplicated and the child retains the same + * barrier object. + * + * Both sides need to call barrier_set_role() after fork() or clone() + * are done. If this is not done, barriers will not work correctly. + * + * Note that barriers could be supported without fork() or clone(). However, + * this is currently not needed so it hasn't been implemented. + */ +void barrier_set_role(Barrier *b, unsigned role) { + assert(b); + assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD)); + /* make sure this is only called once */ + assert(b->pipe[0] >= 0 && b->pipe[1] >= 0); + + if (role == BARRIER_PARENT) + b->pipe[1] = safe_close(b->pipe[1]); + else { + b->pipe[0] = safe_close(b->pipe[0]); + + /* swap me/them for children */ + SWAP_TWO(b->me, b->them); + } +} + +/* places barrier; returns false if we aborted, otherwise true */ +static bool barrier_write(Barrier *b, uint64_t buf) { + ssize_t len; + + /* prevent new sync-points if we already aborted */ + if (barrier_i_aborted(b)) + return false; + + assert(b->me >= 0); + do { + len = write(b->me, &buf, sizeof(buf)); + } while (len < 0 && IN_SET(errno, EAGAIN, EINTR)); + + if (len != sizeof(buf)) + goto error; + + /* lock if we aborted */ + if (buf >= (uint64_t)BARRIER_ABORTION) { + if (barrier_they_aborted(b)) + b->barriers = BARRIER_WE_ABORTED; + else + b->barriers = BARRIER_I_ABORTED; + } else if (!barrier_is_aborted(b)) + b->barriers += buf; + + return !barrier_i_aborted(b); + +error: + /* If there is an unexpected error, we have to make this fatal. There + * is no way we can recover from sync-errors. Therefore, we close the + * pipe-ends and treat this as abortion. The other end will notice the + * pipe-close and treat it as abortion, too. */ + + safe_close_pair(b->pipe); + b->barriers = BARRIER_WE_ABORTED; + return false; +} + +/* waits for barriers; returns false if they aborted, otherwise true */ +static bool barrier_read(Barrier *b, int64_t comp) { + if (barrier_they_aborted(b)) + return false; + + while (b->barriers > comp) { + struct pollfd pfd[2] = { + { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1], + .events = POLLHUP }, + { .fd = b->them, + .events = POLLIN }}; + uint64_t buf; + int r; + + r = poll(pfd, ELEMENTSOF(pfd), -1); + if (r < 0) { + if (IN_SET(errno, EAGAIN, EINTR)) + continue; + goto error; + } + if (pfd[0].revents & POLLNVAL || + pfd[1].revents & POLLNVAL) + goto error; + + if (pfd[1].revents) { + ssize_t len; + + /* events on @them signal new data for us */ + len = read(b->them, &buf, sizeof(buf)); + if (len < 0 && IN_SET(errno, EAGAIN, EINTR)) + continue; + + if (len != sizeof(buf)) + goto error; + } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL)) + /* POLLHUP on the pipe tells us the other side exited. + * We treat this as implicit abortion. But we only + * handle it if there's no event on the eventfd. This + * guarantees that exit-abortions do not overwrite real + * barriers. */ + buf = BARRIER_ABORTION; + else + continue; + + /* lock if they aborted */ + if (buf >= (uint64_t)BARRIER_ABORTION) { + if (barrier_i_aborted(b)) + b->barriers = BARRIER_WE_ABORTED; + else + b->barriers = BARRIER_THEY_ABORTED; + } else if (!barrier_is_aborted(b)) + b->barriers -= buf; + } + + return !barrier_they_aborted(b); + +error: + /* If there is an unexpected error, we have to make this fatal. There + * is no way we can recover from sync-errors. Therefore, we close the + * pipe-ends and treat this as abortion. The other end will notice the + * pipe-close and treat it as abortion, too. */ + + safe_close_pair(b->pipe); + b->barriers = BARRIER_WE_ABORTED; + return false; +} + +/** + * barrier_place() - Place a new barrier + * @b: barrier object + * + * This places a new barrier on the barrier object. If either side already + * aborted, this is a no-op and returns "false". Otherwise, the barrier is + * placed and this returns "true". + * + * Returns: true if barrier was placed, false if either side aborted. + */ +bool barrier_place(Barrier *b) { + assert(b); + + if (barrier_is_aborted(b)) + return false; + + barrier_write(b, BARRIER_SINGLE); + return true; +} + +/** + * barrier_abort() - Abort the synchronization + * @b: barrier object to abort + * + * This aborts the barrier-synchronization. If barrier_abort() was already + * called on this side, this is a no-op. Otherwise, the barrier is put into the + * ABORT-state and will stay there. The other side is notified about the + * abortion. Any following attempt to place normal barriers or to wait on normal + * barriers will return immediately as "false". + * + * You can wait for the other side to call barrier_abort(), too. Use + * barrier_wait_abortion() for that. + * + * Returns: false if the other side already aborted, true otherwise. + */ +bool barrier_abort(Barrier *b) { + assert(b); + + barrier_write(b, BARRIER_ABORTION); + return !barrier_they_aborted(b); +} + +/** + * barrier_wait_next() - Wait for the next barrier of the other side + * @b: barrier to operate on + * + * This waits until the other side places its next barrier. This is independent + * of any barrier-links and just waits for any next barrier of the other side. + * + * If either side aborted, this returns false. + * + * Returns: false if either side aborted, true otherwise. + */ +bool barrier_wait_next(Barrier *b) { + assert(b); + + if (barrier_is_aborted(b)) + return false; + + barrier_read(b, b->barriers - 1); + return !barrier_is_aborted(b); +} + +/** + * barrier_wait_abortion() - Wait for the other side to abort + * @b: barrier to operate on + * + * This waits until the other side called barrier_abort(). This can be called + * regardless whether the local side already called barrier_abort() or not. + * + * If the other side has already aborted, this returns immediately. + * + * Returns: false if the local side aborted, true otherwise. + */ +bool barrier_wait_abortion(Barrier *b) { + assert(b); + + barrier_read(b, BARRIER_THEY_ABORTED); + return !barrier_i_aborted(b); +} + +/** + * barrier_sync_next() - Wait for the other side to place a next linked barrier + * @b: barrier to operate on + * + * This is like barrier_wait_next() and waits for the other side to call + * barrier_place(). However, this only waits for linked barriers. That means, if + * the other side already placed more barriers than (or as much as) we did, this + * returns immediately instead of waiting. + * + * If either side aborted, this returns false. + * + * Returns: false if either side aborted, true otherwise. + */ +bool barrier_sync_next(Barrier *b) { + assert(b); + + if (barrier_is_aborted(b)) + return false; + + barrier_read(b, MAX((int64_t)0, b->barriers - 1)); + return !barrier_is_aborted(b); +} + +/** + * barrier_sync() - Wait for the other side to place as many barriers as we did + * @b: barrier to operate on + * + * This is like barrier_sync_next() but waits for the other side to call + * barrier_place() as often as we did (in total). If they already placed as much + * as we did (or more), this returns immediately instead of waiting. + * + * If either side aborted, this returns false. + * + * Returns: false if either side aborted, true otherwise. + */ +bool barrier_sync(Barrier *b) { + assert(b); + + if (barrier_is_aborted(b)) + return false; + + barrier_read(b, 0); + return !barrier_is_aborted(b); +} diff --git a/src/shared/barrier.h b/src/shared/barrier.h new file mode 100644 index 0000000..b11dce4 --- /dev/null +++ b/src/shared/barrier.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include <sys/types.h> + +#include "macro.h" + +/* See source file for an API description. */ + +typedef struct Barrier Barrier; + +enum { + BARRIER_SINGLE = 1LL, + BARRIER_ABORTION = INT64_MAX, + + /* bias values to store state; keep @WE < @THEY < @I */ + BARRIER_BIAS = INT64_MIN, + BARRIER_WE_ABORTED = BARRIER_BIAS + 1LL, + BARRIER_THEY_ABORTED = BARRIER_BIAS + 2LL, + BARRIER_I_ABORTED = BARRIER_BIAS + 3LL, +}; + +enum { + BARRIER_PARENT, + BARRIER_CHILD, +}; + +struct Barrier { + int me; + int them; + int pipe[2]; + int64_t barriers; +}; + +#define BARRIER_NULL {-1, -1, {-1, -1}, 0} + +int barrier_create(Barrier *obj); +void barrier_destroy(Barrier *b); + +DEFINE_TRIVIAL_CLEANUP_FUNC(Barrier*, barrier_destroy); + +void barrier_set_role(Barrier *b, unsigned role); + +bool barrier_place(Barrier *b); +bool barrier_abort(Barrier *b); + +bool barrier_wait_next(Barrier *b); +bool barrier_wait_abortion(Barrier *b); +bool barrier_sync_next(Barrier *b); +bool barrier_sync(Barrier *b); + +static inline bool barrier_i_aborted(Barrier *b) { + return IN_SET(b->barriers, BARRIER_I_ABORTED, BARRIER_WE_ABORTED); +} + +static inline bool barrier_they_aborted(Barrier *b) { + return IN_SET(b->barriers, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED); +} + +static inline bool barrier_we_aborted(Barrier *b) { + return b->barriers == BARRIER_WE_ABORTED; +} + +static inline bool barrier_is_aborted(Barrier *b) { + return IN_SET(b->barriers, + BARRIER_I_ABORTED, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED); +} + +static inline bool barrier_place_and_sync(Barrier *b) { + (void) barrier_place(b); + return barrier_sync(b); +} diff --git a/src/shared/base-filesystem.c b/src/shared/base-filesystem.c new file mode 100644 index 0000000..1d05409 --- /dev/null +++ b/src/shared/base-filesystem.c @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <syslog.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "base-filesystem.h" +#include "fd-util.h" +#include "log.h" +#include "macro.h" +#include "nulstr-util.h" +#include "path-util.h" +#include "string-util.h" +#include "umask-util.h" +#include "user-util.h" + +typedef struct BaseFilesystem { + const char *dir; + mode_t mode; + const char *target; + const char *exists; + bool ignore_failure; +} BaseFilesystem; + +static const BaseFilesystem table[] = { + { "bin", 0, "usr/bin\0", NULL }, + { "lib", 0, "usr/lib\0", NULL }, + { "root", 0755, NULL, NULL, true }, + { "sbin", 0, "usr/sbin\0", NULL }, + { "usr", 0755, NULL, NULL }, + { "var", 0755, NULL, NULL }, + { "etc", 0755, NULL, NULL }, + { "proc", 0755, NULL, NULL, true }, + { "sys", 0755, NULL, NULL, true }, + { "dev", 0755, NULL, NULL, true }, +#if defined(__i386__) || defined(__x86_64__) + { "lib64", 0, "usr/lib/x86_64-linux-gnu\0" + "usr/lib64\0", "ld-linux-x86-64.so.2" }, +#endif +}; + +int base_filesystem_create(const char *root, uid_t uid, gid_t gid) { + _cleanup_close_ int fd = -1; + size_t i; + int r; + + fd = open(root, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); + if (fd < 0) + return log_error_errno(errno, "Failed to open root file system: %m"); + + for (i = 0; i < ELEMENTSOF(table); i ++) { + if (faccessat(fd, table[i].dir, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) + continue; + + if (table[i].target) { + const char *target = NULL, *s; + + /* check if one of the targets exists */ + NULSTR_FOREACH(s, table[i].target) { + if (faccessat(fd, s, F_OK, AT_SYMLINK_NOFOLLOW) < 0) + continue; + + /* check if a specific file exists at the target path */ + if (table[i].exists) { + _cleanup_free_ char *p = NULL; + + p = path_join(s, table[i].exists); + if (!p) + return log_oom(); + + if (faccessat(fd, p, F_OK, AT_SYMLINK_NOFOLLOW) < 0) + continue; + } + + target = s; + break; + } + + if (!target) + continue; + + if (symlinkat(target, fd, table[i].dir) < 0) { + log_full_errno(IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure ? LOG_DEBUG : LOG_ERR, errno, + "Failed to create symlink at %s/%s: %m", root, table[i].dir); + + if (IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure) + continue; + + return -errno; + } + + if (uid_is_valid(uid) || gid_is_valid(gid)) { + if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0) + return log_error_errno(errno, "Failed to chown symlink at %s/%s: %m", root, table[i].dir); + } + + continue; + } + + RUN_WITH_UMASK(0000) + r = mkdirat(fd, table[i].dir, table[i].mode); + if (r < 0) { + log_full_errno(IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure ? LOG_DEBUG : LOG_ERR, errno, + "Failed to create directory at %s/%s: %m", root, table[i].dir); + + if (IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure) + continue; + + return -errno; + } + + if (uid != UID_INVALID || gid != UID_INVALID) { + if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0) + return log_error_errno(errno, "Failed to chown directory at %s/%s: %m", root, table[i].dir); + } + } + + return 0; +} diff --git a/src/shared/base-filesystem.h b/src/shared/base-filesystem.h new file mode 100644 index 0000000..a33975f --- /dev/null +++ b/src/shared/base-filesystem.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <sys/types.h> + +int base_filesystem_create(const char *root, uid_t uid, gid_t gid); diff --git a/src/shared/binfmt-util.c b/src/shared/binfmt-util.c new file mode 100644 index 0000000..724d7f2 --- /dev/null +++ b/src/shared/binfmt-util.c @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <sys/vfs.h> + +#include "binfmt-util.h" +#include "fileio.h" +#include "missing_magic.h" +#include "stat-util.h" + +int disable_binfmt(void) { + int r; + + /* Flush out all rules. This is important during shutdown to cover for rules using "F", since those + * might pin a file and thus block us from unmounting stuff cleanly. + * + * We are a bit careful here, since binfmt_misc might still be an autofs which we don't want to + * trigger. */ + + r = path_is_fs_type("/proc/sys/fs/binfmt_misc", BINFMTFS_MAGIC); + if (r == 0 || r == -ENOENT) { + log_debug("binfmt_misc is not mounted, not detaching entries."); + return 0; + } + if (r < 0) + return log_warning_errno(r, "Failed to determine whether binfmt_misc is mounted: %m"); + + r = write_string_file("/proc/sys/fs/binfmt_misc/status", "-1", WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return log_warning_errno(r, "Failed to unregister binfmt_misc entries: %m"); + + log_debug("Unregistered all remaining binfmt_misc entries."); + return 0; +} diff --git a/src/shared/binfmt-util.h b/src/shared/binfmt-util.h new file mode 100644 index 0000000..2f008d1 --- /dev/null +++ b/src/shared/binfmt-util.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +int disable_binfmt(void); diff --git a/src/shared/bitmap.c b/src/shared/bitmap.c new file mode 100644 index 0000000..5d450c8 --- /dev/null +++ b/src/shared/bitmap.c @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "bitmap.h" +#include "hashmap.h" +#include "macro.h" +#include "memory-util.h" + +/* Bitmaps are only meant to store relatively small numbers + * (corresponding to, say, an enum), so it is ok to limit + * the max entry. 64k should be plenty. */ +#define BITMAPS_MAX_ENTRY 0xffff + +/* This indicates that we reached the end of the bitmap */ +#define BITMAP_END ((unsigned) -1) + +#define BITMAP_NUM_TO_OFFSET(n) ((n) / (sizeof(uint64_t) * 8)) +#define BITMAP_NUM_TO_REM(n) ((n) % (sizeof(uint64_t) * 8)) +#define BITMAP_OFFSET_TO_NUM(offset, rem) ((offset) * sizeof(uint64_t) * 8 + (rem)) + +Bitmap *bitmap_new(void) { + return new0(Bitmap, 1); +} + +Bitmap *bitmap_copy(Bitmap *b) { + Bitmap *ret; + + ret = bitmap_new(); + if (!ret) + return NULL; + + ret->bitmaps = newdup(uint64_t, b->bitmaps, b->n_bitmaps); + if (!ret->bitmaps) + return mfree(ret); + + ret->n_bitmaps = ret->bitmaps_allocated = b->n_bitmaps; + return ret; +} + +void bitmap_free(Bitmap *b) { + if (!b) + return; + + free(b->bitmaps); + free(b); +} + +int bitmap_ensure_allocated(Bitmap **b) { + Bitmap *a; + + assert(b); + + if (*b) + return 0; + + a = bitmap_new(); + if (!a) + return -ENOMEM; + + *b = a; + + return 0; +} + +int bitmap_set(Bitmap *b, unsigned n) { + uint64_t bitmask; + unsigned offset; + + assert(b); + + /* we refuse to allocate huge bitmaps */ + if (n > BITMAPS_MAX_ENTRY) + return -ERANGE; + + offset = BITMAP_NUM_TO_OFFSET(n); + + if (offset >= b->n_bitmaps) { + if (!GREEDY_REALLOC0(b->bitmaps, b->bitmaps_allocated, offset + 1)) + return -ENOMEM; + + b->n_bitmaps = offset + 1; + } + + bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n); + + b->bitmaps[offset] |= bitmask; + + return 0; +} + +void bitmap_unset(Bitmap *b, unsigned n) { + uint64_t bitmask; + unsigned offset; + + if (!b) + return; + + offset = BITMAP_NUM_TO_OFFSET(n); + + if (offset >= b->n_bitmaps) + return; + + bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n); + + b->bitmaps[offset] &= ~bitmask; +} + +bool bitmap_isset(const Bitmap *b, unsigned n) { + uint64_t bitmask; + unsigned offset; + + if (!b) + return false; + + offset = BITMAP_NUM_TO_OFFSET(n); + + if (offset >= b->n_bitmaps) + return false; + + bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n); + + return !!(b->bitmaps[offset] & bitmask); +} + +bool bitmap_isclear(const Bitmap *b) { + unsigned i; + + if (!b) + return true; + + for (i = 0; i < b->n_bitmaps; i++) + if (b->bitmaps[i] != 0) + return false; + + return true; +} + +void bitmap_clear(Bitmap *b) { + if (!b) + return; + + b->bitmaps = mfree(b->bitmaps); + b->n_bitmaps = 0; + b->bitmaps_allocated = 0; +} + +bool bitmap_iterate(const Bitmap *b, Iterator *i, unsigned *n) { + uint64_t bitmask; + unsigned offset, rem; + + assert(i); + assert(n); + + if (!b || i->idx == BITMAP_END) + return false; + + offset = BITMAP_NUM_TO_OFFSET(i->idx); + rem = BITMAP_NUM_TO_REM(i->idx); + bitmask = UINT64_C(1) << rem; + + for (; offset < b->n_bitmaps; offset ++) { + if (b->bitmaps[offset]) { + for (; bitmask; bitmask <<= 1, rem ++) { + if (b->bitmaps[offset] & bitmask) { + *n = BITMAP_OFFSET_TO_NUM(offset, rem); + i->idx = *n + 1; + + return true; + } + } + } + + rem = 0; + bitmask = 1; + } + + i->idx = BITMAP_END; + + return false; +} + +bool bitmap_equal(const Bitmap *a, const Bitmap *b) { + size_t common_n_bitmaps; + const Bitmap *c; + unsigned i; + + if (a == b) + return true; + + if (!a != !b) + return false; + + if (!a) + return true; + + common_n_bitmaps = MIN(a->n_bitmaps, b->n_bitmaps); + if (memcmp_safe(a->bitmaps, b->bitmaps, sizeof(uint64_t) * common_n_bitmaps) != 0) + return false; + + c = a->n_bitmaps > b->n_bitmaps ? a : b; + for (i = common_n_bitmaps; i < c->n_bitmaps; i++) + if (c->bitmaps[i] != 0) + return false; + + return true; +} diff --git a/src/shared/bitmap.h b/src/shared/bitmap.h new file mode 100644 index 0000000..1c305a2 --- /dev/null +++ b/src/shared/bitmap.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "hashmap.h" +#include "macro.h" + +typedef struct Bitmap { + uint64_t *bitmaps; + size_t n_bitmaps; + size_t bitmaps_allocated; +} Bitmap; + +Bitmap *bitmap_new(void); +Bitmap *bitmap_copy(Bitmap *b); +int bitmap_ensure_allocated(Bitmap **b); +void bitmap_free(Bitmap *b); + +int bitmap_set(Bitmap *b, unsigned n); +void bitmap_unset(Bitmap *b, unsigned n); +bool bitmap_isset(const Bitmap *b, unsigned n); +bool bitmap_isclear(const Bitmap *b); +void bitmap_clear(Bitmap *b); + +bool bitmap_iterate(const Bitmap *b, Iterator *i, unsigned *n); + +bool bitmap_equal(const Bitmap *a, const Bitmap *b); + +#define _BITMAP_FOREACH(n, b, i) \ + for (Iterator i = {}; bitmap_iterate((b), &i, (unsigned*)&(n)); ) +#define BITMAP_FOREACH(n, b) \ + _BITMAP_FOREACH(n, b, UNIQ_T(i, UNIQ)) + +DEFINE_TRIVIAL_CLEANUP_FUNC(Bitmap*, bitmap_free); + +#define _cleanup_bitmap_free_ _cleanup_(bitmap_freep) diff --git a/src/shared/blkid-util.h b/src/shared/blkid-util.h new file mode 100644 index 0000000..3f38e9b --- /dev/null +++ b/src/shared/blkid-util.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#if HAVE_BLKID +# include <blkid.h> + +# include "macro.h" + +DEFINE_TRIVIAL_CLEANUP_FUNC(blkid_probe, blkid_free_probe); +#endif diff --git a/src/shared/bond-util.c b/src/shared/bond-util.c new file mode 100644 index 0000000..e04b201 --- /dev/null +++ b/src/shared/bond-util.c @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bond-util.h" +#include "string-table.h" + +static const char* const bond_mode_table[_NETDEV_BOND_MODE_MAX] = { + [NETDEV_BOND_MODE_BALANCE_RR] = "balance-rr", + [NETDEV_BOND_MODE_ACTIVE_BACKUP] = "active-backup", + [NETDEV_BOND_MODE_BALANCE_XOR] = "balance-xor", + [NETDEV_BOND_MODE_BROADCAST] = "broadcast", + [NETDEV_BOND_MODE_802_3AD] = "802.3ad", + [NETDEV_BOND_MODE_BALANCE_TLB] = "balance-tlb", + [NETDEV_BOND_MODE_BALANCE_ALB] = "balance-alb", +}; + +DEFINE_STRING_TABLE_LOOKUP(bond_mode, BondMode); + +static const char* const bond_xmit_hash_policy_table[_NETDEV_BOND_XMIT_HASH_POLICY_MAX] = { + [NETDEV_BOND_XMIT_HASH_POLICY_LAYER2] = "layer2", + [NETDEV_BOND_XMIT_HASH_POLICY_LAYER34] = "layer3+4", + [NETDEV_BOND_XMIT_HASH_POLICY_LAYER23] = "layer2+3", + [NETDEV_BOND_XMIT_HASH_POLICY_ENCAP23] = "encap2+3", + [NETDEV_BOND_XMIT_HASH_POLICY_ENCAP34] = "encap3+4", +}; + +DEFINE_STRING_TABLE_LOOKUP(bond_xmit_hash_policy, BondXmitHashPolicy); + +static const char* const bond_lacp_rate_table[_NETDEV_BOND_LACP_RATE_MAX] = { + [NETDEV_BOND_LACP_RATE_SLOW] = "slow", + [NETDEV_BOND_LACP_RATE_FAST] = "fast", +}; + +DEFINE_STRING_TABLE_LOOKUP(bond_lacp_rate, BondLacpRate); + +static const char* const bond_ad_select_table[_NETDEV_BOND_AD_SELECT_MAX] = { + [NETDEV_BOND_AD_SELECT_STABLE] = "stable", + [NETDEV_BOND_AD_SELECT_BANDWIDTH] = "bandwidth", + [NETDEV_BOND_AD_SELECT_COUNT] = "count", +}; + +DEFINE_STRING_TABLE_LOOKUP(bond_ad_select, BondAdSelect); + +static const char* const bond_fail_over_mac_table[_NETDEV_BOND_FAIL_OVER_MAC_MAX] = { + [NETDEV_BOND_FAIL_OVER_MAC_NONE] = "none", + [NETDEV_BOND_FAIL_OVER_MAC_ACTIVE] = "active", + [NETDEV_BOND_FAIL_OVER_MAC_FOLLOW] = "follow", +}; + +DEFINE_STRING_TABLE_LOOKUP(bond_fail_over_mac, BondFailOverMac); + +static const char *const bond_arp_validate_table[_NETDEV_BOND_ARP_VALIDATE_MAX] = { + [NETDEV_BOND_ARP_VALIDATE_NONE] = "none", + [NETDEV_BOND_ARP_VALIDATE_ACTIVE]= "active", + [NETDEV_BOND_ARP_VALIDATE_BACKUP]= "backup", + [NETDEV_BOND_ARP_VALIDATE_ALL]= "all", +}; + +DEFINE_STRING_TABLE_LOOKUP(bond_arp_validate, BondArpValidate); + +static const char *const bond_arp_all_targets_table[_NETDEV_BOND_ARP_ALL_TARGETS_MAX] = { + [NETDEV_BOND_ARP_ALL_TARGETS_ANY] = "any", + [NETDEV_BOND_ARP_ALL_TARGETS_ALL] = "all", +}; + +DEFINE_STRING_TABLE_LOOKUP(bond_arp_all_targets, BondArpAllTargets); + +static const char *const bond_primary_reselect_table[_NETDEV_BOND_PRIMARY_RESELECT_MAX] = { + [NETDEV_BOND_PRIMARY_RESELECT_ALWAYS] = "always", + [NETDEV_BOND_PRIMARY_RESELECT_BETTER]= "better", + [NETDEV_BOND_PRIMARY_RESELECT_FAILURE]= "failure", +}; + +DEFINE_STRING_TABLE_LOOKUP(bond_primary_reselect, BondPrimaryReselect); diff --git a/src/shared/bond-util.h b/src/shared/bond-util.h new file mode 100644 index 0000000..a8f9ecb --- /dev/null +++ b/src/shared/bond-util.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <netinet/in.h> +#include <linux/if_bonding.h> + +#include "macro.h" + +/* + * Maximum number of targets supported by the kernel for a single + * bond netdev. + */ +#define NETDEV_BOND_ARP_TARGETS_MAX 16 + +typedef enum BondMode { + NETDEV_BOND_MODE_BALANCE_RR = BOND_MODE_ROUNDROBIN, + NETDEV_BOND_MODE_ACTIVE_BACKUP = BOND_MODE_ACTIVEBACKUP, + NETDEV_BOND_MODE_BALANCE_XOR = BOND_MODE_XOR, + NETDEV_BOND_MODE_BROADCAST = BOND_MODE_BROADCAST, + NETDEV_BOND_MODE_802_3AD = BOND_MODE_8023AD, + NETDEV_BOND_MODE_BALANCE_TLB = BOND_MODE_TLB, + NETDEV_BOND_MODE_BALANCE_ALB = BOND_MODE_ALB, + _NETDEV_BOND_MODE_MAX, + _NETDEV_BOND_MODE_INVALID = -1 +} BondMode; + +typedef enum BondXmitHashPolicy { + NETDEV_BOND_XMIT_HASH_POLICY_LAYER2 = BOND_XMIT_POLICY_LAYER2, + NETDEV_BOND_XMIT_HASH_POLICY_LAYER34 = BOND_XMIT_POLICY_LAYER34, + NETDEV_BOND_XMIT_HASH_POLICY_LAYER23 = BOND_XMIT_POLICY_LAYER23, + NETDEV_BOND_XMIT_HASH_POLICY_ENCAP23 = BOND_XMIT_POLICY_ENCAP23, + NETDEV_BOND_XMIT_HASH_POLICY_ENCAP34 = BOND_XMIT_POLICY_ENCAP34, + _NETDEV_BOND_XMIT_HASH_POLICY_MAX, + _NETDEV_BOND_XMIT_HASH_POLICY_INVALID = -1 +} BondXmitHashPolicy; + +typedef enum BondLacpRate { + NETDEV_BOND_LACP_RATE_SLOW, + NETDEV_BOND_LACP_RATE_FAST, + _NETDEV_BOND_LACP_RATE_MAX, + _NETDEV_BOND_LACP_RATE_INVALID = -1, +} BondLacpRate; + +typedef enum BondAdSelect { + NETDEV_BOND_AD_SELECT_STABLE, + NETDEV_BOND_AD_SELECT_BANDWIDTH, + NETDEV_BOND_AD_SELECT_COUNT, + _NETDEV_BOND_AD_SELECT_MAX, + _NETDEV_BOND_AD_SELECT_INVALID = -1, +} BondAdSelect; + +typedef enum BondFailOverMac { + NETDEV_BOND_FAIL_OVER_MAC_NONE, + NETDEV_BOND_FAIL_OVER_MAC_ACTIVE, + NETDEV_BOND_FAIL_OVER_MAC_FOLLOW, + _NETDEV_BOND_FAIL_OVER_MAC_MAX, + _NETDEV_BOND_FAIL_OVER_MAC_INVALID = -1, +} BondFailOverMac; + +typedef enum BondArpValidate { + NETDEV_BOND_ARP_VALIDATE_NONE, + NETDEV_BOND_ARP_VALIDATE_ACTIVE, + NETDEV_BOND_ARP_VALIDATE_BACKUP, + NETDEV_BOND_ARP_VALIDATE_ALL, + _NETDEV_BOND_ARP_VALIDATE_MAX, + _NETDEV_BOND_ARP_VALIDATE_INVALID = -1, +} BondArpValidate; + +typedef enum BondArpAllTargets { + NETDEV_BOND_ARP_ALL_TARGETS_ANY, + NETDEV_BOND_ARP_ALL_TARGETS_ALL, + _NETDEV_BOND_ARP_ALL_TARGETS_MAX, + _NETDEV_BOND_ARP_ALL_TARGETS_INVALID = -1, +} BondArpAllTargets; + +typedef enum BondPrimaryReselect { + NETDEV_BOND_PRIMARY_RESELECT_ALWAYS, + NETDEV_BOND_PRIMARY_RESELECT_BETTER, + NETDEV_BOND_PRIMARY_RESELECT_FAILURE, + _NETDEV_BOND_PRIMARY_RESELECT_MAX, + _NETDEV_BOND_PRIMARY_RESELECT_INVALID = -1, +} BondPrimaryReselect; + +const char *bond_mode_to_string(BondMode d) _const_; +BondMode bond_mode_from_string(const char *d) _pure_; + +const char *bond_xmit_hash_policy_to_string(BondXmitHashPolicy d) _const_; +BondXmitHashPolicy bond_xmit_hash_policy_from_string(const char *d) _pure_; + +const char *bond_lacp_rate_to_string(BondLacpRate d) _const_; +BondLacpRate bond_lacp_rate_from_string(const char *d) _pure_; + +const char *bond_fail_over_mac_to_string(BondFailOverMac d) _const_; +BondFailOverMac bond_fail_over_mac_from_string(const char *d) _pure_; + +const char *bond_ad_select_to_string(BondAdSelect d) _const_; +BondAdSelect bond_ad_select_from_string(const char *d) _pure_; + +const char *bond_arp_validate_to_string(BondArpValidate d) _const_; +BondArpValidate bond_arp_validate_from_string(const char *d) _pure_; + +const char *bond_arp_all_targets_to_string(BondArpAllTargets d) _const_; +BondArpAllTargets bond_arp_all_targets_from_string(const char *d) _pure_; + +const char *bond_primary_reselect_to_string(BondPrimaryReselect d) _const_; +BondPrimaryReselect bond_primary_reselect_from_string(const char *d) _pure_; diff --git a/src/shared/boot-timestamps.c b/src/shared/boot-timestamps.c new file mode 100644 index 0000000..8786e89 --- /dev/null +++ b/src/shared/boot-timestamps.c @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "acpi-fpdt.h" +#include "boot-timestamps.h" +#include "efi-loader.h" +#include "macro.h" +#include "time-util.h" + +int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader) { + usec_t x = 0, y = 0, a; + int r; + dual_timestamp _n; + + assert(firmware); + assert(loader); + + if (!n) { + dual_timestamp_get(&_n); + n = &_n; + } + + r = acpi_get_boot_usec(&x, &y); + if (r < 0) { + r = efi_loader_get_boot_usec(&x, &y); + if (r < 0) + return r; + } + + /* Let's convert this to timestamps where the firmware + * began/loader began working. To make this more confusing: + * since usec_t is unsigned and the kernel's monotonic clock + * begins at kernel initialization we'll actually initialize + * the monotonic timestamps here as negative of the actual + * value. */ + + firmware->monotonic = y; + loader->monotonic = y - x; + + a = n->monotonic + firmware->monotonic; + firmware->realtime = n->realtime > a ? n->realtime - a : 0; + + a = n->monotonic + loader->monotonic; + loader->realtime = n->realtime > a ? n->realtime - a : 0; + + return 0; +} diff --git a/src/shared/boot-timestamps.h b/src/shared/boot-timestamps.h new file mode 100644 index 0000000..55b7ad1 --- /dev/null +++ b/src/shared/boot-timestamps.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <time-util.h> + +int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader); diff --git a/src/shared/bootspec.c b/src/shared/bootspec.c new file mode 100644 index 0000000..e50408a --- /dev/null +++ b/src/shared/bootspec.c @@ -0,0 +1,1432 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdio.h> +#include <linux/magic.h> +#include <unistd.h> + +#include "sd-device.h" +#include "sd-id128.h" + +#include "alloc-util.h" +#include "blkid-util.h" +#include "bootspec.h" +#include "conf-files.h" +#include "def.h" +#include "device-nodes.h" +#include "dirent-util.h" +#include "efivars.h" +#include "efi-loader.h" +#include "env-file.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "parse-util.h" +#include "path-util.h" +#include "pe-header.h" +#include "sort-util.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "unaligned.h" +#include "util.h" +#include "virt.h" + +static void boot_entry_free(BootEntry *entry) { + assert(entry); + + free(entry->id); + free(entry->id_old); + free(entry->path); + free(entry->root); + free(entry->title); + free(entry->show_title); + free(entry->version); + free(entry->machine_id); + free(entry->architecture); + strv_free(entry->options); + free(entry->kernel); + free(entry->efi); + strv_free(entry->initrd); + free(entry->device_tree); +} + +static int boot_entry_load( + const char *root, + const char *path, + BootEntry *entry) { + + _cleanup_(boot_entry_free) BootEntry tmp = { + .type = BOOT_ENTRY_CONF, + }; + + _cleanup_fclose_ FILE *f = NULL; + unsigned line = 1; + char *b, *c; + int r; + + assert(root); + assert(path); + assert(entry); + + c = endswith_no_case(path, ".conf"); + if (!c) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid loader entry file suffix: %s", path); + + b = basename(path); + tmp.id = strdup(b); + tmp.id_old = strndup(b, c - b); + if (!tmp.id || !tmp.id_old) + return log_oom(); + + if (!efi_loader_entry_name_valid(tmp.id)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid loader entry name: %s", tmp.id); + + tmp.path = strdup(path); + if (!tmp.path) + return log_oom(); + + tmp.root = strdup(root); + if (!tmp.root) + return log_oom(); + + f = fopen(path, "re"); + if (!f) + return log_error_errno(errno, "Failed to open \"%s\": %m", path); + + for (;;) { + _cleanup_free_ char *buf = NULL, *field = NULL; + const char *p; + + r = read_line(f, LONG_LINE_MAX, &buf); + if (r == 0) + break; + if (r == -ENOBUFS) + return log_error_errno(r, "%s:%u: Line too long", path, line); + if (r < 0) + return log_error_errno(r, "%s:%u: Error while reading: %m", path, line); + + line++; + + if (IN_SET(*strstrip(buf), '#', '\0')) + continue; + + p = buf; + r = extract_first_word(&p, &field, " \t", 0); + if (r < 0) { + log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line); + continue; + } + if (r == 0) { + log_warning("%s:%u: Bad syntax", path, line); + continue; + } + + if (streq(field, "title")) + r = free_and_strdup(&tmp.title, p); + else if (streq(field, "version")) + r = free_and_strdup(&tmp.version, p); + else if (streq(field, "machine-id")) + r = free_and_strdup(&tmp.machine_id, p); + else if (streq(field, "architecture")) + r = free_and_strdup(&tmp.architecture, p); + else if (streq(field, "options")) + r = strv_extend(&tmp.options, p); + else if (streq(field, "linux")) + r = free_and_strdup(&tmp.kernel, p); + else if (streq(field, "efi")) + r = free_and_strdup(&tmp.efi, p); + else if (streq(field, "initrd")) + r = strv_extend(&tmp.initrd, p); + else if (streq(field, "devicetree")) + r = free_and_strdup(&tmp.device_tree, p); + else { + log_notice("%s:%u: Unknown line \"%s\", ignoring.", path, line, field); + continue; + } + if (r < 0) + return log_error_errno(r, "%s:%u: Error while reading: %m", path, line); + } + + *entry = tmp; + tmp = (BootEntry) {}; + return 0; +} + +void boot_config_free(BootConfig *config) { + size_t i; + + assert(config); + + free(config->default_pattern); + free(config->timeout); + free(config->editor); + free(config->auto_entries); + free(config->auto_firmware); + free(config->console_mode); + free(config->random_seed_mode); + + free(config->entry_oneshot); + free(config->entry_default); + + for (i = 0; i < config->n_entries; i++) + boot_entry_free(config->entries + i); + free(config->entries); +} + +static int boot_loader_read_conf(const char *path, BootConfig *config) { + _cleanup_fclose_ FILE *f = NULL; + unsigned line = 1; + int r; + + assert(path); + assert(config); + + f = fopen(path, "re"); + if (!f) { + if (errno == ENOENT) + return 0; + + return log_error_errno(errno, "Failed to open \"%s\": %m", path); + } + + for (;;) { + _cleanup_free_ char *buf = NULL, *field = NULL; + const char *p; + + r = read_line(f, LONG_LINE_MAX, &buf); + if (r == 0) + break; + if (r == -ENOBUFS) + return log_error_errno(r, "%s:%u: Line too long", path, line); + if (r < 0) + return log_error_errno(r, "%s:%u: Error while reading: %m", path, line); + + line++; + + if (IN_SET(*strstrip(buf), '#', '\0')) + continue; + + p = buf; + r = extract_first_word(&p, &field, " \t", 0); + if (r < 0) { + log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line); + continue; + } + if (r == 0) { + log_warning("%s:%u: Bad syntax", path, line); + continue; + } + + if (streq(field, "default")) + r = free_and_strdup(&config->default_pattern, p); + else if (streq(field, "timeout")) + r = free_and_strdup(&config->timeout, p); + else if (streq(field, "editor")) + r = free_and_strdup(&config->editor, p); + else if (streq(field, "auto-entries")) + r = free_and_strdup(&config->auto_entries, p); + else if (streq(field, "auto-firmware")) + r = free_and_strdup(&config->auto_firmware, p); + else if (streq(field, "console-mode")) + r = free_and_strdup(&config->console_mode, p); + else if (streq(field, "random-seed-mode")) + r = free_and_strdup(&config->random_seed_mode, p); + else { + log_notice("%s:%u: Unknown line \"%s\", ignoring.", path, line, field); + continue; + } + if (r < 0) + return log_error_errno(r, "%s:%u: Error while reading: %m", path, line); + } + + return 1; +} + +static int boot_entry_compare(const BootEntry *a, const BootEntry *b) { + return str_verscmp(a->id, b->id); +} + +static int boot_entries_find( + const char *root, + const char *dir, + BootEntry **entries, + size_t *n_entries) { + + _cleanup_strv_free_ char **files = NULL; + size_t n_allocated = *n_entries; + char **f; + int r; + + assert(root); + assert(dir); + assert(entries); + assert(n_entries); + + r = conf_files_list(&files, ".conf", NULL, 0, dir); + if (r < 0) + return log_error_errno(r, "Failed to list files in \"%s\": %m", dir); + + STRV_FOREACH(f, files) { + if (!GREEDY_REALLOC0(*entries, n_allocated, *n_entries + 1)) + return log_oom(); + + r = boot_entry_load(root, *f, *entries + *n_entries); + if (r < 0) + continue; + + (*n_entries) ++; + } + + return 0; +} + +static int boot_entry_load_unified( + const char *root, + const char *path, + const char *osrelease, + const char *cmdline, + BootEntry *ret) { + + _cleanup_free_ char *os_pretty_name = NULL, *os_id = NULL, *version_id = NULL, *build_id = NULL; + _cleanup_(boot_entry_free) BootEntry tmp = { + .type = BOOT_ENTRY_UNIFIED, + }; + _cleanup_fclose_ FILE *f = NULL; + const char *k; + char *b; + int r; + + assert(root); + assert(path); + assert(osrelease); + + k = path_startswith(path, root); + if (!k) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Path is not below root: %s", path); + + f = fmemopen_unlocked((void*) osrelease, strlen(osrelease), "r"); + if (!f) + return log_error_errno(errno, "Failed to open os-release buffer: %m"); + + r = parse_env_file(f, "os-release", + "PRETTY_NAME", &os_pretty_name, + "ID", &os_id, + "VERSION_ID", &version_id, + "BUILD_ID", &build_id); + if (r < 0) + return log_error_errno(r, "Failed to parse os-release data from unified kernel image %s: %m", path); + + if (!os_pretty_name || !os_id || !(version_id || build_id)) + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Missing fields in os-release data from unified kernel image %s, refusing.", path); + + b = basename(path); + tmp.id = strdup(b); + tmp.id_old = strjoin(os_id, "-", version_id ?: build_id); + if (!tmp.id || !tmp.id_old) + return log_oom(); + + if (!efi_loader_entry_name_valid(tmp.id)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid loader entry name: %s", tmp.id); + + tmp.path = strdup(path); + if (!tmp.path) + return log_oom(); + + tmp.root = strdup(root); + if (!tmp.root) + return log_oom(); + + tmp.kernel = strdup(skip_leading_chars(k, "/")); + if (!tmp.kernel) + return log_oom(); + + tmp.options = strv_new(skip_leading_chars(cmdline, WHITESPACE)); + if (!tmp.options) + return log_oom(); + + delete_trailing_chars(tmp.options[0], WHITESPACE); + + tmp.title = TAKE_PTR(os_pretty_name); + + *ret = tmp; + tmp = (BootEntry) {}; + return 0; +} + +/* Maximum PE section we are willing to load (Note that sections we are not interested in may be larger, but + * the ones we do care about and we are willing to load into memory have this size limit.) */ +#define PE_SECTION_SIZE_MAX (4U*1024U*1024U) + +static int find_sections( + int fd, + char **ret_osrelease, + char **ret_cmdline) { + + _cleanup_free_ struct PeSectionHeader *sections = NULL; + _cleanup_free_ char *osrelease = NULL, *cmdline = NULL; + size_t i, n_sections; + struct DosFileHeader dos; + struct PeHeader pe; + uint64_t start; + ssize_t n; + + n = pread(fd, &dos, sizeof(dos), 0); + if (n < 0) + return log_error_errno(errno, "Failed read DOS header: %m"); + if (n != sizeof(dos)) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading DOS header, refusing."); + + if (dos.Magic[0] != 'M' || dos.Magic[1] != 'Z') + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "DOS executable magic missing, refusing."); + + start = unaligned_read_le32(&dos.ExeHeader); + n = pread(fd, &pe, sizeof(pe), start); + if (n < 0) + return log_error_errno(errno, "Failed to read PE header: %m"); + if (n != sizeof(pe)) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading PE header, refusing."); + + if (pe.Magic[0] != 'P' || pe.Magic[1] != 'E' || pe.Magic[2] != 0 || pe.Magic[3] != 0) + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "PE executable magic missing, refusing."); + + n_sections = unaligned_read_le16(&pe.FileHeader.NumberOfSections); + if (n_sections > 96) + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "PE header has too many sections, refusing."); + + sections = new(struct PeSectionHeader, n_sections); + if (!sections) + return log_oom(); + + n = pread(fd, sections, + n_sections * sizeof(struct PeSectionHeader), + start + sizeof(pe) + unaligned_read_le16(&pe.FileHeader.SizeOfOptionalHeader)); + if (n < 0) + return log_error_errno(errno, "Failed to read section data: %m"); + if ((size_t) n != n_sections * sizeof(struct PeSectionHeader)) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading sections, refusing."); + + for (i = 0; i < n_sections; i++) { + _cleanup_free_ char *k = NULL; + uint32_t offset, size; + char **b; + + if (strneq((char*) sections[i].Name, ".osrel", sizeof(sections[i].Name))) + b = &osrelease; + else if (strneq((char*) sections[i].Name, ".cmdline", sizeof(sections[i].Name))) + b = &cmdline; + else + continue; + + if (*b) + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Duplicate section %s, refusing.", sections[i].Name); + + offset = unaligned_read_le32(§ions[i].PointerToRawData); + size = unaligned_read_le32(§ions[i].VirtualSize); + + if (size > PE_SECTION_SIZE_MAX) + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Section %s too large, refusing.", sections[i].Name); + + k = new(char, size+1); + if (!k) + return log_oom(); + + n = pread(fd, k, size, offset); + if (n < 0) + return log_error_errno(errno, "Failed to read section payload: %m"); + if ((size_t) n != size) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading section payload, refusing:"); + + /* Allow one trailing NUL byte, but nothing more. */ + if (size > 0 && memchr(k, 0, size - 1)) + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Section contains embedded NUL byte: %m"); + + k[size] = 0; + *b = TAKE_PTR(k); + } + + if (!osrelease) + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Image lacks .osrel section, refusing."); + + if (ret_osrelease) + *ret_osrelease = TAKE_PTR(osrelease); + if (ret_cmdline) + *ret_cmdline = TAKE_PTR(cmdline); + + return 0; +} + +static int boot_entries_find_unified( + const char *root, + const char *dir, + BootEntry **entries, + size_t *n_entries) { + + _cleanup_(closedirp) DIR *d = NULL; + size_t n_allocated = *n_entries; + struct dirent *de; + int r; + + assert(root); + assert(dir); + assert(entries); + assert(n_entries); + + d = opendir(dir); + if (!d) { + if (errno == ENOENT) + return 0; + + return log_error_errno(errno, "Failed to open %s: %m", dir); + } + + FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read %s: %m", dir)) { + _cleanup_free_ char *j = NULL, *osrelease = NULL, *cmdline = NULL; + _cleanup_close_ int fd = -1; + + dirent_ensure_type(d, de); + if (!dirent_is_file(de)) + continue; + + if (!endswith_no_case(de->d_name, ".efi")) + continue; + + if (!GREEDY_REALLOC0(*entries, n_allocated, *n_entries + 1)) + return log_oom(); + + fd = openat(dirfd(d), de->d_name, O_RDONLY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) { + log_warning_errno(errno, "Failed to open %s/%s, ignoring: %m", dir, de->d_name); + continue; + } + + r = fd_verify_regular(fd); + if (r < 0) { + log_warning_errno(r, "File %s/%s is not regular, ignoring: %m", dir, de->d_name); + continue; + } + + r = find_sections(fd, &osrelease, &cmdline); + if (r < 0) + continue; + + j = path_join(dir, de->d_name); + if (!j) + return log_oom(); + + r = boot_entry_load_unified(root, j, osrelease, cmdline, *entries + *n_entries); + if (r < 0) + continue; + + (*n_entries) ++; + } + + return 0; +} + +static bool find_nonunique(BootEntry *entries, size_t n_entries, bool *arr) { + size_t i, j; + bool non_unique = false; + + assert(entries || n_entries == 0); + assert(arr || n_entries == 0); + + for (i = 0; i < n_entries; i++) + arr[i] = false; + + for (i = 0; i < n_entries; i++) + for (j = 0; j < n_entries; j++) + if (i != j && streq(boot_entry_title(entries + i), + boot_entry_title(entries + j))) + non_unique = arr[i] = arr[j] = true; + + return non_unique; +} + +static int boot_entries_uniquify(BootEntry *entries, size_t n_entries) { + char *s; + size_t i; + int r; + bool arr[n_entries]; + + assert(entries || n_entries == 0); + + /* Find _all_ non-unique titles */ + if (!find_nonunique(entries, n_entries, arr)) + return 0; + + /* Add version to non-unique titles */ + for (i = 0; i < n_entries; i++) + if (arr[i] && entries[i].version) { + r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].version); + if (r < 0) + return -ENOMEM; + + free_and_replace(entries[i].show_title, s); + } + + if (!find_nonunique(entries, n_entries, arr)) + return 0; + + /* Add machine-id to non-unique titles */ + for (i = 0; i < n_entries; i++) + if (arr[i] && entries[i].machine_id) { + r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].machine_id); + if (r < 0) + return -ENOMEM; + + free_and_replace(entries[i].show_title, s); + } + + if (!find_nonunique(entries, n_entries, arr)) + return 0; + + /* Add file name to non-unique titles */ + for (i = 0; i < n_entries; i++) + if (arr[i]) { + r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].id); + if (r < 0) + return -ENOMEM; + + free_and_replace(entries[i].show_title, s); + } + + return 0; +} + +static int boot_entries_select_default(const BootConfig *config) { + int i; + + assert(config); + assert(config->entries || config->n_entries == 0); + + if (config->n_entries == 0) { + log_debug("Found no default boot entry :("); + return -1; /* -1 means "no default" */ + } + + if (config->entry_oneshot) + for (i = config->n_entries - 1; i >= 0; i--) + if (streq(config->entry_oneshot, config->entries[i].id)) { + log_debug("Found default: id \"%s\" is matched by LoaderEntryOneShot", + config->entries[i].id); + return i; + } + + if (config->entry_default) + for (i = config->n_entries - 1; i >= 0; i--) + if (streq(config->entry_default, config->entries[i].id)) { + log_debug("Found default: id \"%s\" is matched by LoaderEntryDefault", + config->entries[i].id); + return i; + } + + if (config->default_pattern) + for (i = config->n_entries - 1; i >= 0; i--) + if (fnmatch(config->default_pattern, config->entries[i].id, FNM_CASEFOLD) == 0) { + log_debug("Found default: id \"%s\" is matched by pattern \"%s\"", + config->entries[i].id, config->default_pattern); + return i; + } + + log_debug("Found default: last entry \"%s\"", config->entries[config->n_entries - 1].id); + return config->n_entries - 1; +} + +int boot_entries_load_config( + const char *esp_path, + const char *xbootldr_path, + BootConfig *config) { + + const char *p; + int r; + + assert(config); + + if (esp_path) { + p = strjoina(esp_path, "/loader/loader.conf"); + r = boot_loader_read_conf(p, config); + if (r < 0) + return r; + + p = strjoina(esp_path, "/loader/entries"); + r = boot_entries_find(esp_path, p, &config->entries, &config->n_entries); + if (r < 0) + return r; + + p = strjoina(esp_path, "/EFI/Linux/"); + r = boot_entries_find_unified(esp_path, p, &config->entries, &config->n_entries); + if (r < 0) + return r; + } + + if (xbootldr_path) { + p = strjoina(xbootldr_path, "/loader/entries"); + r = boot_entries_find(xbootldr_path, p, &config->entries, &config->n_entries); + if (r < 0) + return r; + + p = strjoina(xbootldr_path, "/EFI/Linux/"); + r = boot_entries_find_unified(xbootldr_path, p, &config->entries, &config->n_entries); + if (r < 0) + return r; + } + + typesafe_qsort(config->entries, config->n_entries, boot_entry_compare); + + r = boot_entries_uniquify(config->entries, config->n_entries); + if (r < 0) + return log_error_errno(r, "Failed to uniquify boot entries: %m"); + + if (is_efi_boot()) { + r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryOneShot", &config->entry_oneshot); + if (r < 0 && !IN_SET(r, -ENOENT, -ENODATA)) { + log_warning_errno(r, "Failed to read EFI variable \"LoaderEntryOneShot\": %m"); + if (r == -ENOMEM) + return r; + } + + r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryDefault", &config->entry_default); + if (r < 0 && !IN_SET(r, -ENOENT, -ENODATA)) { + log_warning_errno(r, "Failed to read EFI variable \"LoaderEntryDefault\": %m"); + if (r == -ENOMEM) + return r; + } + } + + config->default_entry = boot_entries_select_default(config); + return 0; +} + +int boot_entries_load_config_auto( + const char *override_esp_path, + const char *override_xbootldr_path, + BootConfig *config) { + + _cleanup_free_ char *esp_where = NULL, *xbootldr_where = NULL; + int r; + + assert(config); + + /* This function is similar to boot_entries_load_config(), however we automatically search for the + * ESP and the XBOOTLDR partition unless it is explicitly specified. Also, if the user did not pass + * an ESP or XBOOTLDR path directly, let's see if /run/boot-loader-entries/ exists. If so, let's + * read data from there, as if it was an ESP (i.e. loading both entries and loader.conf data from + * it). This allows other boot loaders to pass boot loader entry information to our tools if they + * want to. */ + + if (!override_esp_path && !override_xbootldr_path) { + if (access("/run/boot-loader-entries/", F_OK) >= 0) + return boot_entries_load_config("/run/boot-loader-entries/", NULL, config); + + if (errno != ENOENT) + return log_error_errno(errno, + "Failed to determine whether /run/boot-loader-entries/ exists: %m"); + } + + r = find_esp_and_warn(override_esp_path, false, &esp_where, NULL, NULL, NULL, NULL); + if (r < 0) /* we don't log about ENOKEY here, but propagate it, leaving it to the caller to log */ + return r; + + r = find_xbootldr_and_warn(override_xbootldr_path, false, &xbootldr_where, NULL); + if (r < 0 && r != -ENOKEY) + return r; /* It's fine if the XBOOTLDR partition doesn't exist, hence we ignore ENOKEY here */ + + return boot_entries_load_config(esp_where, xbootldr_where, config); +} + +int boot_entries_augment_from_loader( + BootConfig *config, + char **found_by_loader, + bool only_auto) { + + static const char *const title_table[] = { + /* Pretty names for a few well-known automatically discovered entries. */ + "auto-osx", "macOS", + "auto-windows", "Windows Boot Manager", + "auto-efi-shell", "EFI Shell", + "auto-efi-default", "EFI Default Loader", + "auto-reboot-to-firmware-setup", "Reboot Into Firmware Interface", + }; + + size_t n_allocated; + char **i; + + assert(config); + + /* Let's add the entries discovered by the boot loader to the end of our list, unless they are + * already included there. */ + + n_allocated = config->n_entries; + + STRV_FOREACH(i, found_by_loader) { + _cleanup_free_ char *c = NULL, *t = NULL, *p = NULL; + char **a, **b; + + if (boot_config_has_entry(config, *i)) + continue; + + if (only_auto && !startswith(*i, "auto-")) + continue; + + c = strdup(*i); + if (!c) + return log_oom(); + + STRV_FOREACH_PAIR(a, b, (char**) title_table) + if (streq(*a, *i)) { + t = strdup(*b); + if (!t) + return log_oom(); + break; + } + + p = efi_variable_path(EFI_VENDOR_LOADER, "LoaderEntries"); + if (!p) + return log_oom(); + + if (!GREEDY_REALLOC0(config->entries, n_allocated, config->n_entries + 1)) + return log_oom(); + + config->entries[config->n_entries++] = (BootEntry) { + .type = BOOT_ENTRY_LOADER, + .id = TAKE_PTR(c), + .title = TAKE_PTR(t), + .path = TAKE_PTR(p), + }; + } + + return 0; +} + +/********************************************************************************/ + +static int verify_esp_blkid( + dev_t devid, + bool searching, + uint32_t *ret_part, + uint64_t *ret_pstart, + uint64_t *ret_psize, + sd_id128_t *ret_uuid) { + + sd_id128_t uuid = SD_ID128_NULL; + uint64_t pstart = 0, psize = 0; + uint32_t part = 0; + +#if HAVE_BLKID + _cleanup_(blkid_free_probep) blkid_probe b = NULL; + _cleanup_free_ char *node = NULL; + const char *v; + int r; + + r = device_path_make_major_minor(S_IFBLK, devid, &node); + if (r < 0) + return log_error_errno(r, "Failed to format major/minor device path: %m"); + + errno = 0; + b = blkid_new_probe_from_filename(node); + if (!b) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(ENOMEM), "Failed to open file system \"%s\": %m", node); + + blkid_probe_enable_superblocks(b, 1); + blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE); + blkid_probe_enable_partitions(b, 1); + blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); + + errno = 0; + r = blkid_do_safeprobe(b); + if (r == -2) + return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" is ambiguous.", node); + else if (r == 1) + return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" does not contain a label.", node); + else if (r != 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe file system \"%s\": %m", node); + + r = blkid_probe_lookup_value(b, "TYPE", &v, NULL); + if (r != 0) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), + "No filesystem found on \"%s\": %m", node); + if (!streq(v, "vfat")) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), + "File system \"%s\" is not FAT.", node); + + r = blkid_probe_lookup_value(b, "PART_ENTRY_SCHEME", &v, NULL); + if (r != 0) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), + "File system \"%s\" is not located on a partitioned block device.", node); + if (!streq(v, "gpt")) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), + "File system \"%s\" is not on a GPT partition table.", node); + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: EIO, "Failed to probe partition type UUID of \"%s\": %m", node); + if (!streq(v, "c12a7328-f81f-11d2-ba4b-00a0c93ec93b")) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), + "File system \"%s\" has wrong type for an EFI System Partition (ESP).", node); + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition entry UUID of \"%s\": %m", node); + r = sd_id128_from_string(v, &uuid); + if (r < 0) + return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v); + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_NUMBER", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition number of \"%s\": %m", node); + r = safe_atou32(v, &part); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_NUMBER field."); + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_OFFSET", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition offset of \"%s\": %m", node); + r = safe_atou64(v, &pstart); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_OFFSET field."); + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_SIZE", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition size of \"%s\": %m", node); + r = safe_atou64(v, &psize); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_SIZE field."); +#endif + + if (ret_part) + *ret_part = part; + if (ret_pstart) + *ret_pstart = pstart; + if (ret_psize) + *ret_psize = psize; + if (ret_uuid) + *ret_uuid = uuid; + + return 0; +} + +static int verify_esp_udev( + dev_t devid, + bool searching, + uint32_t *ret_part, + uint64_t *ret_pstart, + uint64_t *ret_psize, + sd_id128_t *ret_uuid) { + + _cleanup_(sd_device_unrefp) sd_device *d = NULL; + _cleanup_free_ char *node = NULL; + sd_id128_t uuid = SD_ID128_NULL; + uint64_t pstart = 0, psize = 0; + uint32_t part = 0; + const char *v; + int r; + + r = device_path_make_major_minor(S_IFBLK, devid, &node); + if (r < 0) + return log_error_errno(r, "Failed to format major/minor device path: %m"); + + r = sd_device_new_from_devnum(&d, 'b', devid); + if (r < 0) + return log_error_errno(r, "Failed to get device from device number: %m"); + + r = sd_device_get_property_value(d, "ID_FS_TYPE", &v); + if (r < 0) + return log_error_errno(r, "Failed to get device property: %m"); + if (!streq(v, "vfat")) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), + "File system \"%s\" is not FAT.", node ); + + r = sd_device_get_property_value(d, "ID_PART_ENTRY_SCHEME", &v); + if (r < 0) + return log_error_errno(r, "Failed to get device property: %m"); + if (!streq(v, "gpt")) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), + "File system \"%s\" is not on a GPT partition table.", node); + + r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &v); + if (r < 0) + return log_error_errno(r, "Failed to get device property: %m"); + if (!streq(v, "c12a7328-f81f-11d2-ba4b-00a0c93ec93b")) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), + "File system \"%s\" has wrong type for an EFI System Partition (ESP).", node); + + r = sd_device_get_property_value(d, "ID_PART_ENTRY_UUID", &v); + if (r < 0) + return log_error_errno(r, "Failed to get device property: %m"); + r = sd_id128_from_string(v, &uuid); + if (r < 0) + return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v); + + r = sd_device_get_property_value(d, "ID_PART_ENTRY_NUMBER", &v); + if (r < 0) + return log_error_errno(r, "Failed to get device property: %m"); + r = safe_atou32(v, &part); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_NUMBER field."); + + r = sd_device_get_property_value(d, "ID_PART_ENTRY_OFFSET", &v); + if (r < 0) + return log_error_errno(r, "Failed to get device property: %m"); + r = safe_atou64(v, &pstart); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_OFFSET field."); + + r = sd_device_get_property_value(d, "ID_PART_ENTRY_SIZE", &v); + if (r < 0) + return log_error_errno(r, "Failed to get device property: %m"); + r = safe_atou64(v, &psize); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_SIZE field."); + + if (ret_part) + *ret_part = part; + if (ret_pstart) + *ret_pstart = pstart; + if (ret_psize) + *ret_psize = psize; + if (ret_uuid) + *ret_uuid = uuid; + + return 0; +} + +static int verify_fsroot_dir( + const char *path, + bool searching, + bool unprivileged_mode, + dev_t *ret_dev) { + + struct stat st, st2; + const char *t2, *trigger; + int r; + + assert(path); + assert(ret_dev); + + /* So, the ESP and XBOOTLDR partition are commonly located on an autofs mount. stat() on the + * directory won't trigger it, if it is not mounted yet. Let's hence explicitly trigger it here, + * before stat()ing */ + trigger = strjoina(path, "/trigger"); /* Filename doesn't matter... */ + (void) access(trigger, F_OK); + + if (stat(path, &st) < 0) + return log_full_errno((searching && errno == ENOENT) || + (unprivileged_mode && errno == EACCES) ? LOG_DEBUG : LOG_ERR, errno, + "Failed to determine block device node of \"%s\": %m", path); + + if (major(st.st_dev) == 0) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), + "Block device node of \"%s\" is invalid.", path); + + t2 = strjoina(path, "/.."); + if (stat(t2, &st2) < 0) { + if (errno != EACCES) + r = -errno; + else { + _cleanup_free_ char *parent = NULL; + + /* If going via ".." didn't work due to EACCESS, then let's determine the parent path + * directly instead. It's not as good, due to symlinks and such, but we can't do + * anything better here. */ + + parent = dirname_malloc(path); + if (!parent) + return log_oom(); + + if (stat(parent, &st2) < 0) + r = -errno; + else + r = 0; + } + + if (r < 0) + return log_full_errno(unprivileged_mode && r == -EACCES ? LOG_DEBUG : LOG_ERR, r, + "Failed to determine block device node of parent of \"%s\": %m", path); + } + + if (st.st_dev == st2.st_dev) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), + "Directory \"%s\" is not the root of the file system.", path); + + if (ret_dev) + *ret_dev = st.st_dev; + + return 0; +} + +static int verify_esp( + const char *p, + bool searching, + bool unprivileged_mode, + uint32_t *ret_part, + uint64_t *ret_pstart, + uint64_t *ret_psize, + sd_id128_t *ret_uuid) { + + bool relax_checks; + dev_t devid; + int r; + + assert(p); + + /* This logs about all errors, except: + * + * -ENOENT → if 'searching' is set, and the dir doesn't exist + * -EADDRNOTAVAIL → if 'searching' is set, and the dir doesn't look like an ESP + * -EACESS → if 'unprivileged_mode' is set, and we have trouble accessing the thing + */ + + relax_checks = getenv_bool("SYSTEMD_RELAX_ESP_CHECKS") > 0; + + /* Non-root user can only check the status, so if an error occurred in the following, it does not cause any + * issues. Let's also, silence the error messages. */ + + if (!relax_checks) { + struct statfs sfs; + + if (statfs(p, &sfs) < 0) + /* If we are searching for the mount point, don't generate a log message if we can't find the path */ + return log_full_errno((searching && errno == ENOENT) || + (unprivileged_mode && errno == EACCES) ? LOG_DEBUG : LOG_ERR, errno, + "Failed to check file system type of \"%s\": %m", p); + + if (!F_TYPE_EQUAL(sfs.f_type, MSDOS_SUPER_MAGIC)) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV), + "File system \"%s\" is not a FAT EFI System Partition (ESP) file system.", p); + } + + r = verify_fsroot_dir(p, searching, unprivileged_mode, &devid); + if (r < 0) + return r; + + /* In a container we don't have access to block devices, skip this part of the verification, we trust + * the container manager set everything up correctly on its own. */ + if (detect_container() > 0 || relax_checks) + goto finish; + + /* If we are unprivileged we ask udev for the metadata about the partition. If we are privileged we + * use blkid instead. Why? Because this code is called from 'bootctl' which is pretty much an + * emergency recovery tool that should also work when udev isn't up (i.e. from the emergency shell), + * however blkid can't work if we have no privileges to access block devices directly, which is why + * we use udev in that case. */ + if (unprivileged_mode) + return verify_esp_udev(devid, searching, ret_part, ret_pstart, ret_psize, ret_uuid); + else + return verify_esp_blkid(devid, searching, ret_part, ret_pstart, ret_psize, ret_uuid); + +finish: + if (ret_part) + *ret_part = 0; + if (ret_pstart) + *ret_pstart = 0; + if (ret_psize) + *ret_psize = 0; + if (ret_uuid) + *ret_uuid = SD_ID128_NULL; + + return 0; +} + +int find_esp_and_warn( + const char *path, + bool unprivileged_mode, + char **ret_path, + uint32_t *ret_part, + uint64_t *ret_pstart, + uint64_t *ret_psize, + sd_id128_t *ret_uuid) { + + int r; + + /* This logs about all errors except: + * + * -ENOKEY → when we can't find the partition + * -EACCESS → when unprivileged_mode is true, and we can't access something + */ + + if (path) { + r = verify_esp(path, false, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid); + if (r < 0) + return r; + + goto found; + } + + path = getenv("SYSTEMD_ESP_PATH"); + if (path) { + if (!path_is_valid(path) || !path_is_absolute(path)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "$SYSTEMD_ESP_PATH does not refer to absolute path, refusing to use it: %s", + path); + + /* Note: when the user explicitly configured things with an env var we won't validate the mount + * point. After all we want this to be useful for testing. */ + goto found; + } + + FOREACH_STRING(path, "/efi", "/boot", "/boot/efi") { + + r = verify_esp(path, true, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid); + if (r >= 0) + goto found; + if (!IN_SET(r, -ENOENT, -EADDRNOTAVAIL)) /* This one is not it */ + return r; + } + + /* No logging here */ + return -ENOKEY; + +found: + if (ret_path) { + char *c; + + c = strdup(path); + if (!c) + return log_oom(); + + *ret_path = c; + } + + return 0; +} + +static int verify_xbootldr_blkid( + dev_t devid, + bool searching, + sd_id128_t *ret_uuid) { + + sd_id128_t uuid = SD_ID128_NULL; + +#if HAVE_BLKID + _cleanup_(blkid_free_probep) blkid_probe b = NULL; + _cleanup_free_ char *node = NULL; + const char *v; + int r; + + r = device_path_make_major_minor(S_IFBLK, devid, &node); + if (r < 0) + return log_error_errno(r, "Failed to format major/minor device path: %m"); + errno = 0; + b = blkid_new_probe_from_filename(node); + if (!b) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(ENOMEM), "Failed to open file system \"%s\": %m", node); + + blkid_probe_enable_partitions(b, 1); + blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); + + errno = 0; + r = blkid_do_safeprobe(b); + if (r == -2) + return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" is ambiguous.", node); + else if (r == 1) + return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" does not contain a label.", node); + else if (r != 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe file system \"%s\": %m", node); + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_SCHEME", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition scheme of \"%s\": %m", node); + if (streq(v, "gpt")) { + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition type UUID of \"%s\": %m", node); + if (!streq(v, "bc13c2ff-59e6-4262-a352-b275fd6f7172")) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV), + "File system \"%s\" has wrong type for extended boot loader partition.", node); + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition entry UUID of \"%s\": %m", node); + r = sd_id128_from_string(v, &uuid); + if (r < 0) + return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v); + + } else if (streq(v, "dos")) { + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition type UUID of \"%s\": %m", node); + if (!streq(v, "0xea")) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV), + "File system \"%s\" has wrong type for extended boot loader partition.", node); + + } else + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV), + "File system \"%s\" is not on a GPT or DOS partition table.", node); +#endif + + if (ret_uuid) + *ret_uuid = uuid; + + return 0; +} + +static int verify_xbootldr_udev( + dev_t devid, + bool searching, + sd_id128_t *ret_uuid) { + + _cleanup_(sd_device_unrefp) sd_device *d = NULL; + _cleanup_free_ char *node = NULL; + sd_id128_t uuid = SD_ID128_NULL; + const char *v; + int r; + + r = device_path_make_major_minor(S_IFBLK, devid, &node); + if (r < 0) + return log_error_errno(r, "Failed to format major/minor device path: %m"); + + r = sd_device_new_from_devnum(&d, 'b', devid); + if (r < 0) + return log_error_errno(r, "Failed to get device from device number: %m"); + + r = sd_device_get_property_value(d, "ID_PART_ENTRY_SCHEME", &v); + if (r < 0) + return log_error_errno(r, "Failed to get device property: %m"); + + if (streq(v, "gpt")) { + + r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &v); + if (r < 0) + return log_error_errno(r, "Failed to get device property: %m"); + if (!streq(v, "bc13c2ff-59e6-4262-a352-b275fd6f7172")) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV), + "File system \"%s\" has wrong type for extended boot loader partition.", node); + + r = sd_device_get_property_value(d, "ID_PART_ENTRY_UUID", &v); + if (r < 0) + return log_error_errno(r, "Failed to get device property: %m"); + r = sd_id128_from_string(v, &uuid); + if (r < 0) + return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v); + + } else if (streq(v, "dos")) { + + r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &v); + if (r < 0) + return log_error_errno(r, "Failed to get device property: %m"); + if (!streq(v, "0xea")) + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV), + "File system \"%s\" has wrong type for extended boot loader partition.", node); + } else + return log_full_errno(searching ? LOG_DEBUG : LOG_ERR, + searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV), + "File system \"%s\" is not on a GPT or DOS partition table.", node); + + if (ret_uuid) + *ret_uuid = uuid; + + return 0; +} + +static int verify_xbootldr( + const char *p, + bool searching, + bool unprivileged_mode, + sd_id128_t *ret_uuid) { + + bool relax_checks; + dev_t devid; + int r; + + assert(p); + + relax_checks = getenv_bool("SYSTEMD_RELAX_XBOOTLDR_CHECKS") > 0; + + r = verify_fsroot_dir(p, searching, unprivileged_mode, &devid); + if (r < 0) + return r; + + if (detect_container() > 0 || relax_checks) + goto finish; + + if (unprivileged_mode) + return verify_xbootldr_udev(devid, searching, ret_uuid); + else + return verify_xbootldr_blkid(devid, searching, ret_uuid); + +finish: + if (ret_uuid) + *ret_uuid = SD_ID128_NULL; + + return 0; +} + +int find_xbootldr_and_warn( + const char *path, + bool unprivileged_mode, + char **ret_path, + sd_id128_t *ret_uuid) { + + int r; + + /* Similar to find_esp_and_warn(), but finds the XBOOTLDR partition. Returns the same errors. */ + + if (path) { + r = verify_xbootldr(path, false, unprivileged_mode, ret_uuid); + if (r < 0) + return r; + + goto found; + } + + path = getenv("SYSTEMD_XBOOTLDR_PATH"); + if (path) { + if (!path_is_valid(path) || !path_is_absolute(path)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "$SYSTEMD_XBOOTLDR_PATH does not refer to absolute path, refusing to use it: %s", + path); + + goto found; + } + + r = verify_xbootldr("/boot", true, unprivileged_mode, ret_uuid); + if (r >= 0) { + path = "/boot"; + goto found; + } + if (!IN_SET(r, -ENOENT, -EADDRNOTAVAIL)) /* This one is not it */ + return r; + + return -ENOKEY; + +found: + if (ret_path) { + char *c; + + c = strdup(path); + if (!c) + return log_oom(); + + *ret_path = c; + } + + return 0; +} diff --git a/src/shared/bootspec.h b/src/shared/bootspec.h new file mode 100644 index 0000000..1557bd0 --- /dev/null +++ b/src/shared/bootspec.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#pragma once + +#include <inttypes.h> +#include <stdbool.h> +#include <sys/types.h> + +#include "sd-id128.h" + +#include "string-util.h" + +typedef enum BootEntryType { + BOOT_ENTRY_CONF, /* Type #1 entries: *.conf files */ + BOOT_ENTRY_UNIFIED, /* Type #2 entries: *.efi files */ + BOOT_ENTRY_LOADER, /* Additional entries augmented from LoaderEntries EFI var */ + _BOOT_ENTRY_MAX, + _BOOT_ENTRY_INVALID = -1, +} BootEntryType; + +typedef struct BootEntry { + BootEntryType type; + char *id; /* This is the file basename without extension */ + char *id_old; /* Old-style ID, for deduplication purposes. */ + char *path; /* This is the full path to the drop-in file */ + char *root; /* The root path in which the drop-in was found, i.e. to which 'kernel', 'efi' and 'initrd' are relative */ + char *title; + char *show_title; + char *version; + char *machine_id; + char *architecture; + char **options; + char *kernel; /* linux is #defined to 1, yikes! */ + char *efi; + char **initrd; + char *device_tree; +} BootEntry; + +typedef struct BootConfig { + char *default_pattern; + char *timeout; + char *editor; + char *auto_entries; + char *auto_firmware; + char *console_mode; + char *random_seed_mode; + + char *entry_oneshot; + char *entry_default; + + BootEntry *entries; + size_t n_entries; + ssize_t default_entry; +} BootConfig; + +static inline bool boot_config_has_entry(BootConfig *config, const char *id) { + size_t j; + + for (j = 0; j < config->n_entries; j++) { + const char* entry_id_old = config->entries[j].id_old; + if (streq(config->entries[j].id, id) || + (entry_id_old && streq(entry_id_old, id))) + return true; + } + + return false; +} + +static inline BootEntry* boot_config_default_entry(BootConfig *config) { + if (config->default_entry < 0) + return NULL; + + return config->entries + config->default_entry; +} + +void boot_config_free(BootConfig *config); +int boot_entries_load_config(const char *esp_path, const char *xbootldr_path, BootConfig *config); +int boot_entries_load_config_auto(const char *override_esp_path, const char *override_xbootldr_path, BootConfig *config); +int boot_entries_augment_from_loader(BootConfig *config, char **list, bool only_auto); + +static inline const char* boot_entry_title(const BootEntry *entry) { + return entry->show_title ?: entry->title ?: entry->id; +} + +int find_esp_and_warn(const char *path, bool unprivileged_mode, char **ret_path, uint32_t *ret_part, uint64_t *ret_pstart, uint64_t *ret_psize, sd_id128_t *ret_uuid); +int find_xbootldr_and_warn(const char *path, bool unprivileged_mode, char **ret_path,sd_id128_t *ret_uuid); diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c new file mode 100644 index 0000000..1023914 --- /dev/null +++ b/src/shared/bpf-program.c @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "bpf-program.h" +#include "fd-util.h" +#include "memory-util.h" +#include "missing_syscall.h" +#include "path-util.h" + +int bpf_program_new(uint32_t prog_type, BPFProgram **ret) { + _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL; + + p = new0(BPFProgram, 1); + if (!p) + return -ENOMEM; + + p->n_ref = 1; + p->prog_type = prog_type; + p->kernel_fd = -1; + + *ret = TAKE_PTR(p); + + return 0; +} + +static BPFProgram *bpf_program_free(BPFProgram *p) { + assert(p); + + /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last + * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated + * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with + * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in + * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during + * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To + * counter this, we track closely to which cgroup a program was attached to and will detach it on our own + * whenever we close the BPF fd. */ + (void) bpf_program_cgroup_detach(p); + + safe_close(p->kernel_fd); + free(p->instructions); + free(p->attached_path); + + return mfree(p); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free); + +int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) { + + assert(p); + + if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */ + return -EBUSY; + + if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count)) + return -ENOMEM; + + memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count); + p->n_instructions += count; + + return 0; +} + +int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) { + union bpf_attr attr; + + assert(p); + + if (p->kernel_fd >= 0) { /* make this idempotent */ + memzero(log_buf, log_size); + return 0; + } + + // FIXME: Clang doesn't 0-pad with structured initialization, causing + // the kernel to reject the bpf_attr as invalid. See: + // https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65 + // Ideally it should behave like GCC, so that we can remove these workarounds. + zero(attr); + attr.prog_type = p->prog_type; + attr.insns = PTR_TO_UINT64(p->instructions); + attr.insn_cnt = p->n_instructions; + attr.license = PTR_TO_UINT64("GPL"); + attr.log_buf = PTR_TO_UINT64(log_buf); + attr.log_level = !!log_buf; + attr.log_size = log_size; + + p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + if (p->kernel_fd < 0) + return -errno; + + return 0; +} + +int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path) { + union bpf_attr attr; + + assert(p); + + if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */ + return -EBUSY; + + zero(attr); + attr.pathname = PTR_TO_UINT64(path); + + p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr)); + if (p->kernel_fd < 0) + return -errno; + + return 0; +} + +int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) { + _cleanup_free_ char *copy = NULL; + _cleanup_close_ int fd = -1; + union bpf_attr attr; + int r; + + assert(p); + assert(type >= 0); + assert(path); + + if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI)) + return -EINVAL; + + /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's + * refuse this early. */ + if (p->attached_path) { + if (!path_equal(p->attached_path, path)) + return -EBUSY; + if (p->attached_type != type) + return -EBUSY; + if (p->attached_flags != flags) + return -EBUSY; + + /* Here's a shortcut: if we previously attached this program already, then we don't have to do so + * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have + * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags + * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags + * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours + * would remain in effect. */ + if (flags != BPF_F_ALLOW_OVERRIDE) + return 0; + } + + /* Ensure we have a kernel object for this. */ + r = bpf_program_load_kernel(p, NULL, 0); + if (r < 0) + return r; + + copy = strdup(path); + if (!copy) + return -ENOMEM; + + fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC); + if (fd < 0) + return -errno; + + zero(attr); + attr.attach_type = type; + attr.target_fd = fd; + attr.attach_bpf_fd = p->kernel_fd; + attr.attach_flags = flags; + + if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) + return -errno; + + free_and_replace(p->attached_path, copy); + p->attached_type = type; + p->attached_flags = flags; + + return 0; +} + +int bpf_program_cgroup_detach(BPFProgram *p) { + _cleanup_close_ int fd = -1; + + assert(p); + + if (!p->attached_path) + return -EUNATCH; + + fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC); + if (fd < 0) { + if (errno != ENOENT) + return -errno; + + /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached + * implicitly by the removal, hence don't complain */ + + } else { + union bpf_attr attr; + + zero(attr); + attr.attach_type = p->attached_type; + attr.target_fd = fd; + attr.attach_bpf_fd = p->kernel_fd; + + if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) + return -errno; + } + + p->attached_path = mfree(p->attached_path); + + return 0; +} + +int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) { + union bpf_attr attr; + int fd; + + zero(attr); + attr.map_type = type; + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; + attr.map_flags = flags; + + fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + if (fd < 0) + return -errno; + + return fd; +} + +int bpf_map_update_element(int fd, const void *key, void *value) { + union bpf_attr attr; + + zero(attr); + attr.map_fd = fd; + attr.key = PTR_TO_UINT64(key); + attr.value = PTR_TO_UINT64(value); + + if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0) + return -errno; + + return 0; +} + +int bpf_map_lookup_element(int fd, const void *key, void *value) { + union bpf_attr attr; + + zero(attr); + attr.map_fd = fd; + attr.key = PTR_TO_UINT64(key); + attr.value = PTR_TO_UINT64(value); + + if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0) + return -errno; + + return 0; +} diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h new file mode 100644 index 0000000..eef77f9 --- /dev/null +++ b/src/shared/bpf-program.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <linux/bpf.h> +#include <stdint.h> +#include <sys/syscall.h> + +#include "list.h" +#include "macro.h" + +typedef struct BPFProgram BPFProgram; + +struct BPFProgram { + unsigned n_ref; + + int kernel_fd; + uint32_t prog_type; + + size_t n_instructions; + size_t allocated; + struct bpf_insn *instructions; + + char *attached_path; + int attached_type; + uint32_t attached_flags; +}; + +int bpf_program_new(uint32_t prog_type, BPFProgram **ret); +BPFProgram *bpf_program_unref(BPFProgram *p); +BPFProgram *bpf_program_ref(BPFProgram *p); + +int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count); +int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size); +int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path); + +int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags); +int bpf_program_cgroup_detach(BPFProgram *p); + +int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags); +int bpf_map_update_element(int fd, const void *key, void *value); +int bpf_map_lookup_element(int fd, const void *key, void *value); + +DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref); diff --git a/src/shared/bridge-util.c b/src/shared/bridge-util.c new file mode 100644 index 0000000..e1a8bcb --- /dev/null +++ b/src/shared/bridge-util.c @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bridge-util.h" +#include "string-table.h" + +static const char* const bridge_state_table[_NETDEV_BRIDGE_STATE_MAX] = { + [NETDEV_BRIDGE_STATE_DISABLED] = "disabled", + [NETDEV_BRIDGE_STATE_LISTENING] = "listening", + [NETDEV_BRIDGE_STATE_LEARNING] = "learning", + [NETDEV_BRIDGE_STATE_FORWARDING] = "forwarding", +}; + +DEFINE_STRING_TABLE_LOOKUP(bridge_state, BridgeState); diff --git a/src/shared/bridge-util.h b/src/shared/bridge-util.h new file mode 100644 index 0000000..c9b02d8 --- /dev/null +++ b/src/shared/bridge-util.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <netinet/in.h> +#include <linux/if_bridge.h> + +#include "conf-parser.h" + +typedef enum BridgeState { + NETDEV_BRIDGE_STATE_DISABLED = BR_STATE_DISABLED, + NETDEV_BRIDGE_STATE_LISTENING = BR_STATE_LISTENING, + NETDEV_BRIDGE_STATE_LEARNING = BR_STATE_LEARNING, + NETDEV_BRIDGE_STATE_FORWARDING = BR_STATE_FORWARDING, + NETDEV_BRIDGE_STATE_BLOCKING = BR_STATE_BLOCKING, + _NETDEV_BRIDGE_STATE_MAX, + _NETDEV_BRIDGE_STATE_INVALID = -1, +} BridgeState; + +const char *bridge_state_to_string(BridgeState d) _const_; +BridgeState bridge_state_from_string(const char *d) _pure_; diff --git a/src/shared/bus-get-properties.c b/src/shared/bus-get-properties.c new file mode 100644 index 0000000..32f68d5 --- /dev/null +++ b/src/shared/bus-get-properties.c @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-get-properties.h" +#include "rlimit-util.h" +#include "stdio-util.h" +#include "string-util.h" + +int bus_property_get_bool( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + int b = *(bool*) userdata; + + return sd_bus_message_append_basic(reply, 'b', &b); +} + +int bus_property_set_bool( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *value, + void *userdata, + sd_bus_error *error) { + + int b, r; + + r = sd_bus_message_read(value, "b", &b); + if (r < 0) + return r; + + *(bool*) userdata = b; + return 0; +} + +int bus_property_get_id128( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + sd_id128_t *id = userdata; + + if (sd_id128_is_null(*id)) /* Add an empty array if the ID is zero */ + return sd_bus_message_append(reply, "ay", 0); + else + return sd_bus_message_append_array(reply, 'y', id->bytes, 16); +} + +int bus_property_get_percent( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + char pstr[DECIMAL_STR_MAX(int) + 2]; + int p = *(int*) userdata; + + xsprintf(pstr, "%d%%", p); + + return sd_bus_message_append_basic(reply, 's', pstr); +} + +#if __SIZEOF_SIZE_T__ != 8 +int bus_property_get_size( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t sz = *(size_t*) userdata; + + return sd_bus_message_append_basic(reply, 't', &sz); +} +#endif + +#if __SIZEOF_LONG__ != 8 +int bus_property_get_long( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + int64_t l = *(long*) userdata; + + return sd_bus_message_append_basic(reply, 'x', &l); +} + +int bus_property_get_ulong( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t ul = *(unsigned long*) userdata; + + return sd_bus_message_append_basic(reply, 't', &ul); +} +#endif + +int bus_property_get_rlimit( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + const char *is_soft; + struct rlimit *rl; + uint64_t u; + rlim_t x; + + assert(bus); + assert(reply); + assert(userdata); + + is_soft = endswith(property, "Soft"); + + rl = *(struct rlimit**) userdata; + if (rl) + x = is_soft ? rl->rlim_cur : rl->rlim_max; + else { + struct rlimit buf = {}; + const char *s, *p; + int z; + + /* Chop off "Soft" suffix */ + s = is_soft ? strndupa(property, is_soft - property) : property; + + /* Skip over any prefix, such as "Default" */ + assert_se(p = strstr(s, "Limit")); + + z = rlimit_from_string(p + 5); + assert(z >= 0); + + (void) getrlimit(z, &buf); + x = is_soft ? buf.rlim_cur : buf.rlim_max; + } + + /* rlim_t might have different sizes, let's map RLIMIT_INFINITY to (uint64_t) -1, so that it is the same on all + * archs */ + u = x == RLIM_INFINITY ? (uint64_t) -1 : (uint64_t) x; + + return sd_bus_message_append(reply, "t", u); +} diff --git a/src/shared/bus-get-properties.h b/src/shared/bus-get-properties.h new file mode 100644 index 0000000..9832c0d --- /dev/null +++ b/src/shared/bus-get-properties.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +#include "macro.h" + +int bus_property_get_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); +int bus_property_set_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error); +int bus_property_get_id128(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); +int bus_property_get_percent(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); + +#define bus_property_get_usec ((sd_bus_property_get_t) NULL) +#define bus_property_set_usec ((sd_bus_property_set_t) NULL) + +assert_cc(sizeof(int) == sizeof(int32_t)); +#define bus_property_get_int ((sd_bus_property_get_t) NULL) + +assert_cc(sizeof(unsigned) == sizeof(uint32_t)); +#define bus_property_get_unsigned ((sd_bus_property_get_t) NULL) + +/* On 64bit machines we can use the default serializer for size_t and + * friends, otherwise we need to cast this manually */ +#if __SIZEOF_SIZE_T__ == 8 +#define bus_property_get_size ((sd_bus_property_get_t) NULL) +#else +int bus_property_get_size(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); +#endif + +#if __SIZEOF_LONG__ == 8 +#define bus_property_get_long ((sd_bus_property_get_t) NULL) +#define bus_property_get_ulong ((sd_bus_property_get_t) NULL) +#else +int bus_property_get_long(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); +int bus_property_get_ulong(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); +#endif + +/* uid_t and friends on Linux 32 bit. This means we can just use the + * default serializer for 32bit unsigned, for serializing it, and map + * it to NULL here */ +assert_cc(sizeof(uid_t) == sizeof(uint32_t)); +#define bus_property_get_uid ((sd_bus_property_get_t) NULL) + +assert_cc(sizeof(gid_t) == sizeof(uint32_t)); +#define bus_property_get_gid ((sd_bus_property_get_t) NULL) + +assert_cc(sizeof(pid_t) == sizeof(uint32_t)); +#define bus_property_get_pid ((sd_bus_property_get_t) NULL) + +assert_cc(sizeof(mode_t) == sizeof(uint32_t)); +#define bus_property_get_mode ((sd_bus_property_get_t) NULL) + +int bus_property_get_rlimit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); + +#define BUS_DEFINE_PROPERTY_GET_GLOBAL(function, bus_type, val) \ + int function(sd_bus *bus, \ + const char *path, \ + const char *interface, \ + const char *property, \ + sd_bus_message *reply, \ + void *userdata, \ + sd_bus_error *error) { \ + \ + assert(bus); \ + assert(reply); \ + \ + return sd_bus_message_append(reply, bus_type, val); \ + } + +#define BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, get2) \ + int function(sd_bus *bus, \ + const char *path, \ + const char *interface, \ + const char *property, \ + sd_bus_message *reply, \ + void *userdata, \ + sd_bus_error *error) { \ + \ + data_type *data = userdata; \ + \ + assert(bus); \ + assert(reply); \ + assert(data); \ + \ + return sd_bus_message_append(reply, bus_type, \ + get2(get1(data))); \ + } + +#define ident(x) (x) +#define BUS_DEFINE_PROPERTY_GET(function, bus_type, data_type, get1) \ + BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, ident) + +#define ref(x) (*(x)) +#define BUS_DEFINE_PROPERTY_GET_REF(function, bus_type, data_type, get) \ + BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, ref, get) + +#define BUS_DEFINE_PROPERTY_GET_ENUM(function, name, type) \ + BUS_DEFINE_PROPERTY_GET_REF(function, "s", type, name##_to_string) + +#define BUS_PROPERTY_DUAL_TIMESTAMP(name, offset, flags) \ + SD_BUS_PROPERTY(name, "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, realtime), (flags)), \ + SD_BUS_PROPERTY(name "Monotonic", "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, monotonic), (flags)) diff --git a/src/shared/bus-locator.c b/src/shared/bus-locator.c new file mode 100644 index 0000000..3754d1d --- /dev/null +++ b/src/shared/bus-locator.c @@ -0,0 +1,213 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-locator.h" +#include "macro.h" + +const BusLocator* const bus_home_mgr = &(BusLocator){ + .destination = "org.freedesktop.home1", + .path = "/org/freedesktop/home1", + .interface = "org.freedesktop.home1.Manager", +}; + +const BusLocator* const bus_import_mgr = &(BusLocator){ + .destination ="org.freedesktop.import1", + .path = "/org/freedesktop/import1", + .interface = "org.freedesktop.import1.Manager" +}; + +const BusLocator* const bus_locale = &(BusLocator){ + .destination = "org.freedesktop.locale1", + .path = "/org/freedesktop/locale1", + .interface = "org.freedesktop.locale1" +}; + +const BusLocator* const bus_login_mgr = &(BusLocator){ + .destination = "org.freedesktop.login1", + .path = "/org/freedesktop/login1", + .interface = "org.freedesktop.login1.Manager" +}; + +const BusLocator* const bus_machine_mgr = &(BusLocator){ + .destination ="org.freedesktop.machine1", + .path = "/org/freedesktop/machine1", + .interface = "org.freedesktop.machine1.Manager" +}; + +const BusLocator* const bus_network_mgr = &(BusLocator){ + .destination = "org.freedesktop.network1", + .path = "/org/freedesktop/network1", + .interface = "org.freedesktop.network1.Manager" +}; + +const BusLocator* const bus_portable_mgr = &(BusLocator){ + .destination = "org.freedesktop.portable1", + .path = "/org/freedesktop/portable1", + .interface = "org.freedesktop.portable1.Manager" +}; + +const BusLocator* const bus_resolve_mgr = &(BusLocator){ + .destination = "org.freedesktop.resolve1", + .path = "/org/freedesktop/resolve1", + .interface = "org.freedesktop.resolve1.Manager" +}; + +const BusLocator* const bus_systemd_mgr = &(BusLocator){ + .destination = "org.freedesktop.systemd1", + .path = "/org/freedesktop/systemd1", + .interface = "org.freedesktop.systemd1.Manager" +}; + +const BusLocator* const bus_timedate = &(BusLocator){ + .destination = "org.freedesktop.timedate1", + .path = "/org/freedesktop/timedate1", + .interface = "org.freedesktop.timedate1" +}; + +/* Shorthand flavors of the sd-bus convenience helpers with destination,path,interface strings encapsulated + * within a single struct. */ +int bus_call_method_async( + sd_bus *bus, + sd_bus_slot **slot, + const BusLocator *locator, + const char *member, + sd_bus_message_handler_t callback, + void *userdata, + const char *types, ...) { + + va_list ap; + int r; + + assert(locator); + + va_start(ap, types); + r = sd_bus_call_method_asyncv(bus, slot, locator->destination, locator->path, locator->interface, member, callback, userdata, types, ap); + va_end(ap); + + return r; +} + +int bus_call_method( + sd_bus *bus, + const BusLocator *locator, + const char *member, + sd_bus_error *error, + sd_bus_message **reply, + const char *types, ...) { + + va_list ap; + int r; + + assert(locator); + + va_start(ap, types); + r = sd_bus_call_methodv(bus, locator->destination, locator->path, locator->interface, member, error, reply, types, ap); + va_end(ap); + + return r; +} + +int bus_get_property( + sd_bus *bus, + const BusLocator *locator, + const char *member, + sd_bus_error *error, + sd_bus_message **reply, + const char *type) { + + assert(locator); + + return sd_bus_get_property(bus, locator->destination, locator->path, locator->interface, member, error, reply, type); +} + +int bus_get_property_trivial( + sd_bus *bus, + const BusLocator *locator, + const char *member, + sd_bus_error *error, + char type, void *ptr) { + + assert(locator); + + return sd_bus_get_property_trivial(bus, locator->destination, locator->path, locator->interface, member, error, type, ptr); +} + +int bus_get_property_string( + sd_bus *bus, + const BusLocator *locator, + const char *member, + sd_bus_error *error, + char **ret) { + + assert(locator); + + return sd_bus_get_property_string(bus, locator->destination, locator->path, locator->interface, member, error, ret); +} + +int bus_get_property_strv( + sd_bus *bus, + const BusLocator *locator, + const char *member, + sd_bus_error *error, + char ***ret) { + + assert(locator); + + return sd_bus_get_property_strv(bus, locator->destination, locator->path, locator->interface, member, error, ret); +} + +int bus_set_property( + sd_bus *bus, + const BusLocator *locator, + const char *member, + sd_bus_error *error, + const char *type, ...) { + + va_list ap; + int r; + + assert(locator); + + va_start(ap, type); + r = sd_bus_set_propertyv(bus, locator->destination, locator->path, locator->interface, member, error, type, ap); + va_end(ap); + + return r; +} + +int bus_match_signal( + sd_bus *bus, + sd_bus_slot **ret, + const BusLocator *locator, + const char *member, + sd_bus_message_handler_t callback, + void *userdata) { + + assert(locator); + + return sd_bus_match_signal(bus, ret, locator->destination, locator->path, locator->interface, member, callback, userdata); +} + +int bus_match_signal_async( + sd_bus *bus, + sd_bus_slot **ret, + const BusLocator *locator, + const char *member, + sd_bus_message_handler_t callback, + sd_bus_message_handler_t install_callback, + void *userdata) { + + assert(locator); + + return sd_bus_match_signal_async(bus, ret, locator->destination, locator->path, locator->interface, member, callback, install_callback, userdata); +} + +int bus_message_new_method_call( + sd_bus *bus, + sd_bus_message **m, + const BusLocator *locator, + const char *member) { + + assert(locator); + + return sd_bus_message_new_method_call(bus, m, locator->destination, locator->path, locator->interface, member); +} diff --git a/src/shared/bus-locator.h b/src/shared/bus-locator.h new file mode 100644 index 0000000..fe3b876 --- /dev/null +++ b/src/shared/bus-locator.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +typedef struct BusLocator { + const char *destination; + const char *path; + const char *interface; +} BusLocator; + +extern const BusLocator* const bus_home_mgr; +extern const BusLocator* const bus_import_mgr; +extern const BusLocator* const bus_locale; +extern const BusLocator* const bus_login_mgr; +extern const BusLocator* const bus_machine_mgr; +extern const BusLocator* const bus_network_mgr; +extern const BusLocator* const bus_portable_mgr; +extern const BusLocator* const bus_resolve_mgr; +extern const BusLocator* const bus_systemd_mgr; +extern const BusLocator* const bus_timedate; + +/* Shorthand flavors of the sd-bus convenience helpers with destination,path,interface strings encapsulated + * within a single struct. */ +int bus_call_method_async(sd_bus *bus, sd_bus_slot **slot, const BusLocator *locator, const char *member, sd_bus_message_handler_t callback, void *userdata, const char *types, ...); +int bus_call_method(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, sd_bus_message **reply, const char *types, ...); +int bus_get_property(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, sd_bus_message **reply, const char *type); +int bus_get_property_trivial(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, char type, void *ptr); +int bus_get_property_string(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, char **ret); +int bus_get_property_strv(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, char ***ret); +int bus_set_property(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, const char *type, ...); +int bus_match_signal(sd_bus *bus, sd_bus_slot **ret, const BusLocator *locator, const char *member, sd_bus_message_handler_t callback, void *userdata); +int bus_match_signal_async(sd_bus *bus, sd_bus_slot **ret, const BusLocator *locator, const char *member, sd_bus_message_handler_t callback, sd_bus_message_handler_t install_callback, void *userdata); +int bus_message_new_method_call(sd_bus *bus, sd_bus_message **m, const BusLocator *locator, const char *member); diff --git a/src/shared/bus-log-control-api.c b/src/shared/bus-log-control-api.c new file mode 100644 index 0000000..06e6697 --- /dev/null +++ b/src/shared/bus-log-control-api.c @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "bus-get-properties.h" +#include "bus-log-control-api.h" +#include "bus-util.h" +#include "log.h" +#include "sd-bus.h" +#include "syslog-util.h" + +int bus_property_get_log_level( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + _cleanup_free_ char *t = NULL; + int r; + + assert(bus); + assert(reply); + + r = log_level_to_string_alloc(log_get_max_level(), &t); + if (r < 0) + return r; + + return sd_bus_message_append(reply, "s", t); +} + +int bus_property_set_log_level( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *value, + void *userdata, + sd_bus_error *error) { + + const char *t; + int r; + + assert(bus); + assert(value); + + r = sd_bus_message_read(value, "s", &t); + if (r < 0) + return r; + + r = log_level_from_string(t); + if (r < 0) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid log level '%s'", t); + + log_info("Setting log level to %s.", t); + log_set_max_level(r); + + return 0; +} + +BUS_DEFINE_PROPERTY_GET_GLOBAL(bus_property_get_log_target, "s", log_target_to_string(log_get_target())); + +int bus_property_set_log_target( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *value, + void *userdata, + sd_bus_error *error) { + + LogTarget target; + const char *t; + int r; + + assert(bus); + assert(value); + + r = sd_bus_message_read(value, "s", &t); + if (r < 0) + return r; + + target = log_target_from_string(t); + if (target < 0) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid log target '%s'", t); + + log_info("Setting log target to %s.", log_target_to_string(target)); + log_set_target(target); + log_open(); + + return 0; +} + +BUS_DEFINE_PROPERTY_GET_GLOBAL(bus_property_get_syslog_identifier, "s", program_invocation_short_name); + +static const sd_bus_vtable log_control_vtable[] = { + SD_BUS_VTABLE_START(0), + + SD_BUS_WRITABLE_PROPERTY("LogLevel", "s", bus_property_get_log_level, bus_property_set_log_level, 0, 0), + SD_BUS_WRITABLE_PROPERTY("LogTarget", "s", bus_property_get_log_target, bus_property_set_log_target, 0, 0), + SD_BUS_PROPERTY("SyslogIdentifier", "s", bus_property_get_syslog_identifier, 0, 0), + + /* One of those days we might want to add a similar, second interface to cover common service + * operations such as Reload(), Reexecute(), Exit() … and maybe some properties exposing version + * number and other meta-data of the service. */ + + SD_BUS_VTABLE_END, +}; + +const BusObjectImplementation log_control_object = { + "/org/freedesktop/LogControl1", + "org.freedesktop.LogControl1", + .vtables = BUS_VTABLES(log_control_vtable), +}; diff --git a/src/shared/bus-log-control-api.h b/src/shared/bus-log-control-api.h new file mode 100644 index 0000000..85f60a7 --- /dev/null +++ b/src/shared/bus-log-control-api.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +#include "bus-object.h" + +extern const BusObjectImplementation log_control_object; +static inline int bus_log_control_api_register(sd_bus *bus) { + return bus_add_implementation(bus, &log_control_object, NULL); +} + +int bus_property_get_log_level(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); +int bus_property_set_log_level(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error); + +int bus_property_get_log_target(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); +int bus_property_set_log_target(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); + +int bus_property_get_syslog_identifier(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); diff --git a/src/shared/bus-map-properties.c b/src/shared/bus-map-properties.c new file mode 100644 index 0000000..8460856 --- /dev/null +++ b/src/shared/bus-map-properties.c @@ -0,0 +1,246 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-map-properties.h" +#include "alloc-util.h" +#include "strv.h" +#include "bus-message.h" + +int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) { + sd_id128_t *p = userdata; + const void *v; + size_t n; + int r; + + r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, &v, &n); + if (r < 0) + return r; + + if (n == 0) + *p = SD_ID128_NULL; + else if (n == 16) + memcpy((*p).bytes, v, n); + else + return -EINVAL; + + return 0; +} + +int bus_map_strv_sort(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) { + _cleanup_strv_free_ char **l = NULL; + char ***p = userdata; + int r; + + r = bus_message_read_strv_extend(m, &l); + if (r < 0) + return r; + + r = strv_extend_strv(p, l, false); + if (r < 0) + return r; + + strv_sort(*p); + return 0; +} + +static int map_basic(sd_bus *bus, const char *member, sd_bus_message *m, unsigned flags, sd_bus_error *error, void *userdata) { + char type; + int r; + + r = sd_bus_message_peek_type(m, &type, NULL); + if (r < 0) + return r; + + switch (type) { + + case SD_BUS_TYPE_STRING: + case SD_BUS_TYPE_OBJECT_PATH: { + const char **p = userdata; + const char *s; + + r = sd_bus_message_read_basic(m, type, &s); + if (r < 0) + return r; + + if (isempty(s)) + s = NULL; + + if (flags & BUS_MAP_STRDUP) + return free_and_strdup((char **) userdata, s); + + *p = s; + return 0; + } + + case SD_BUS_TYPE_ARRAY: { + _cleanup_strv_free_ char **l = NULL; + char ***p = userdata; + + r = bus_message_read_strv_extend(m, &l); + if (r < 0) + return r; + + return strv_extend_strv(p, l, false); + } + + case SD_BUS_TYPE_BOOLEAN: { + int b; + + r = sd_bus_message_read_basic(m, type, &b); + if (r < 0) + return r; + + if (flags & BUS_MAP_BOOLEAN_AS_BOOL) + *(bool*) userdata = b; + else + *(int*) userdata = b; + + return 0; + } + + case SD_BUS_TYPE_INT32: + case SD_BUS_TYPE_UINT32: { + uint32_t u, *p = userdata; + + r = sd_bus_message_read_basic(m, type, &u); + if (r < 0) + return r; + + *p = u; + return 0; + } + + case SD_BUS_TYPE_INT64: + case SD_BUS_TYPE_UINT64: { + uint64_t t, *p = userdata; + + r = sd_bus_message_read_basic(m, type, &t); + if (r < 0) + return r; + + *p = t; + return 0; + } + + case SD_BUS_TYPE_DOUBLE: { + double d, *p = userdata; + + r = sd_bus_message_read_basic(m, type, &d); + if (r < 0) + return r; + + *p = d; + return 0; + }} + + return -EOPNOTSUPP; +} + +int bus_message_map_all_properties( + sd_bus_message *m, + const struct bus_properties_map *map, + unsigned flags, + sd_bus_error *error, + void *userdata) { + + int r; + + assert(m); + assert(map); + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}"); + if (r < 0) + return r; + + while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) { + const struct bus_properties_map *prop; + const char *member; + const char *contents; + void *v; + unsigned i; + + r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &member); + if (r < 0) + return r; + + for (i = 0, prop = NULL; map[i].member; i++) + if (streq(map[i].member, member)) { + prop = &map[i]; + break; + } + + if (prop) { + r = sd_bus_message_peek_type(m, NULL, &contents); + if (r < 0) + return r; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents); + if (r < 0) + return r; + + v = (uint8_t *)userdata + prop->offset; + if (map[i].set) + r = prop->set(sd_bus_message_get_bus(m), member, m, error, v); + else + r = map_basic(sd_bus_message_get_bus(m), member, m, flags, error, v); + if (r < 0) + return r; + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + } else { + r = sd_bus_message_skip(m, "v"); + if (r < 0) + return r; + } + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + } + if (r < 0) + return r; + + return sd_bus_message_exit_container(m); +} + +int bus_map_all_properties( + sd_bus *bus, + const char *destination, + const char *path, + const struct bus_properties_map *map, + unsigned flags, + sd_bus_error *error, + sd_bus_message **reply, + void *userdata) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + assert(bus); + assert(destination); + assert(path); + assert(map); + assert(reply || (flags & BUS_MAP_STRDUP)); + + r = sd_bus_call_method( + bus, + destination, + path, + "org.freedesktop.DBus.Properties", + "GetAll", + error, + &m, + "s", ""); + if (r < 0) + return r; + + r = bus_message_map_all_properties(m, map, flags, error, userdata); + if (r < 0) + return r; + + if (reply) + *reply = sd_bus_message_ref(m); + + return r; +} diff --git a/src/shared/bus-map-properties.h b/src/shared/bus-map-properties.h new file mode 100644 index 0000000..2a766e3 --- /dev/null +++ b/src/shared/bus-map-properties.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +typedef int (*bus_property_set_t) (sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata); + +struct bus_properties_map { + const char *member; + const char *signature; + bus_property_set_t set; + size_t offset; +}; + +enum { + BUS_MAP_STRDUP = 1 << 0, /* If set, each "s" message is duplicated. Thus, each pointer needs to be freed. */ + BUS_MAP_BOOLEAN_AS_BOOL = 1 << 1, /* If set, each "b" message is written to a bool pointer. If not set, "b" is written to a int pointer. */ +}; + +int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata); +int bus_map_strv_sort(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata); + +int bus_message_map_all_properties(sd_bus_message *m, const struct bus_properties_map *map, unsigned flags, sd_bus_error *error, void *userdata); +int bus_map_all_properties(sd_bus *bus, const char *destination, const char *path, const struct bus_properties_map *map, + unsigned flags, sd_bus_error *error, sd_bus_message **reply, void *userdata); diff --git a/src/shared/bus-message-util.c b/src/shared/bus-message-util.c new file mode 100644 index 0000000..19500a5 --- /dev/null +++ b/src/shared/bus-message-util.c @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-message-util.h" + +#include "resolve-util.h" + +int bus_message_read_ifindex(sd_bus_message *message, sd_bus_error *error, int *ret) { + int ifindex, r; + + assert(message); + assert(ret); + + assert_cc(sizeof(int) == sizeof(int32_t)); + + r = sd_bus_message_read(message, "i", &ifindex); + if (r < 0) + return r; + + if (ifindex <= 0) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface index"); + + *ret = ifindex; + + return 0; +} + +int bus_message_read_family(sd_bus_message *message, sd_bus_error *error, int *ret) { + int family, r; + + assert(message); + assert(ret); + + assert_cc(sizeof(int) == sizeof(int32_t)); + + r = sd_bus_message_read(message, "i", &family); + if (r < 0) + return r; + + if (!IN_SET(family, AF_INET, AF_INET6)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family); + + *ret = family; + return 0; +} + +int bus_message_read_in_addr_auto(sd_bus_message *message, sd_bus_error *error, int *ret_family, union in_addr_union *ret_addr) { + int family, r; + const void *d; + size_t sz; + + assert(message); + + r = sd_bus_message_read(message, "i", &family); + if (r < 0) + return r; + + r = sd_bus_message_read_array(message, 'y', &d, &sz); + if (r < 0) + return r; + + if (!IN_SET(family, AF_INET, AF_INET6)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family); + + if (sz != FAMILY_ADDRESS_SIZE(family)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid address size"); + + if (ret_family) + *ret_family = family; + if (ret_addr) + memcpy(ret_addr, d, sz); + return 0; +} + +static int bus_message_read_dns_one( + sd_bus_message *message, + sd_bus_error *error, + bool extended, + int *ret_family, + union in_addr_union *ret_address, + uint16_t *ret_port, + const char **ret_server_name) { + const char *server_name = NULL; + union in_addr_union a; + uint16_t port = 0; + int family, r; + + assert(message); + assert(ret_family); + assert(ret_address); + assert(ret_port); + assert(ret_server_name); + + r = sd_bus_message_enter_container(message, 'r', extended ? "iayqs" : "iay"); + if (r <= 0) + return r; + + r = bus_message_read_in_addr_auto(message, error, &family, &a); + if (r < 0) + return r; + + if (!dns_server_address_valid(family, &a)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid DNS server address"); + + if (extended) { + r = sd_bus_message_read(message, "q", &port); + if (r < 0) + return r; + + if (IN_SET(port, 53, 853)) + port = 0; + + r = sd_bus_message_read(message, "s", &server_name); + if (r < 0) + return r; + } + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + *ret_family = family; + *ret_address = a; + *ret_port = port; + *ret_server_name = server_name; + + return 1; +} + +int bus_message_read_dns_servers( + sd_bus_message *message, + sd_bus_error *error, + bool extended, + struct in_addr_full ***ret_dns, + size_t *ret_n_dns) { + + struct in_addr_full **dns = NULL; + size_t n = 0, allocated = 0; + int r; + + assert(message); + assert(ret_dns); + assert(ret_n_dns); + + r = sd_bus_message_enter_container(message, 'a', extended ? "(iayqs)" : "(iay)"); + if (r < 0) + return r; + + for (;;) { + const char *server_name; + union in_addr_union a; + uint16_t port; + int family; + + r = bus_message_read_dns_one(message, error, extended, &family, &a, &port, &server_name); + if (r < 0) + goto clear; + if (r == 0) + break; + + if (!GREEDY_REALLOC(dns, allocated, n+1)) { + r = -ENOMEM; + goto clear; + } + + r = in_addr_full_new(family, &a, port, 0, server_name, dns + n); + if (r < 0) + goto clear; + + n++; + } + + *ret_dns = TAKE_PTR(dns); + *ret_n_dns = n; + return 0; + +clear: + for (size_t i = 0; i < n; i++) + in_addr_full_free(dns[i]); + free(dns); + + return r; +} diff --git a/src/shared/bus-message-util.h b/src/shared/bus-message-util.h new file mode 100644 index 0000000..b82c083 --- /dev/null +++ b/src/shared/bus-message-util.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +#include "in-addr-util.h" +#include "socket-netlink.h" + +int bus_message_read_ifindex(sd_bus_message *message, sd_bus_error *error, int *ret); +int bus_message_read_family(sd_bus_message *message, sd_bus_error *error, int *ret); +int bus_message_read_in_addr_auto(sd_bus_message *message, sd_bus_error *error, int *ret_family, union in_addr_union *ret_addr); + +int bus_message_read_dns_servers( + sd_bus_message *message, + sd_bus_error *error, + bool extended, + struct in_addr_full ***ret_dns, + size_t *ret_n_dns); diff --git a/src/shared/bus-object.c b/src/shared/bus-object.c new file mode 100644 index 0000000..f2e5391 --- /dev/null +++ b/src/shared/bus-object.c @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-introspect.h" +#include "bus-object.h" +#include "macro.h" +#include "string-util.h" +#include "strv.h" + +int bus_add_implementation(sd_bus *bus, const BusObjectImplementation *impl, void *userdata) { + int r; + + log_debug("Registering bus object implementation for path=%s iface=%s", impl->path, impl->interface); + + for (const sd_bus_vtable **p = impl->vtables; p && *p; p++) { + r = sd_bus_add_object_vtable(bus, NULL, + impl->path, + impl->interface, + *p, + userdata); + if (r < 0) + return log_error_errno(r, "Failed to register bus path %s with interface %s: %m", + impl->path, + impl->interface); + } + + for (const BusObjectVtablePair *p = impl->fallback_vtables; p && p->vtable; p++) { + r = sd_bus_add_fallback_vtable(bus, NULL, + impl->path, + impl->interface, + p->vtable, + p->object_find, + userdata); + if (r < 0) + return log_error_errno(r, "Failed to register bus path %s with interface %s: %m", + impl->path, + impl->interface); + } + + if (impl->node_enumerator) { + r = sd_bus_add_node_enumerator(bus, NULL, + impl->path, + impl->node_enumerator, + userdata); + if (r < 0) + return log_error_errno(r, "Failed to add node enumerator for %s: %m", + impl->path); + } + + if (impl->manager) { + r = sd_bus_add_object_manager(bus, NULL, impl->path); + if (r < 0) + return log_error_errno(r, "Failed to add object manager for %s: %m", impl->path); + } + + for (size_t i = 0; impl->children && impl->children[i]; i++) { + r = bus_add_implementation(bus, impl->children[i], userdata); + if (r < 0) + return r; + } + + return 0; +} + +static const BusObjectImplementation* find_implementation( + const char *pattern, + const BusObjectImplementation* const* bus_objects) { + + for (size_t i = 0; bus_objects && bus_objects[i]; i++) { + const BusObjectImplementation *impl = bus_objects[i]; + + if (STR_IN_SET(pattern, impl->path, impl->interface)) + return impl; + + impl = find_implementation(pattern, impl->children); + if (impl) + return impl; + } + + return NULL; +} + +static int bus_introspect_implementation( + struct introspect *intro, + const BusObjectImplementation *impl) { + int r; + + for (const sd_bus_vtable **p = impl->vtables; p && *p; p++) { + r = introspect_write_interface(intro, impl->interface, *p); + if (r < 0) + return log_error_errno(r, "Failed to write introspection data: %m"); + } + + for (const BusObjectVtablePair *p = impl->fallback_vtables; p && p->vtable; p++) { + r = introspect_write_interface(intro, impl->interface, p->vtable); + if (r < 0) + return log_error_errno(r, "Failed to write introspection data: %m"); + } + + return 0; +} + +static void list_paths( + FILE *out, + const BusObjectImplementation* const* bus_objects) { + + for (size_t i = 0; bus_objects[i]; i++) { + fprintf(out, "%s\t%s\n", bus_objects[i]->path, bus_objects[i]->interface); + if (bus_objects[i]->children) + list_paths(out, bus_objects[i]->children); + } +} + +int bus_introspect_implementations( + FILE *out, + const char *pattern, + const BusObjectImplementation* const* bus_objects) { + + const BusObjectImplementation *impl, *main_impl = NULL; + _cleanup_free_ char *s = NULL; + int r; + + if (streq(pattern, "list")) { + list_paths(out, bus_objects); + return 0; + } + + struct introspect intro = {}; + bool is_interface = sd_bus_interface_name_is_valid(pattern); + + impl = find_implementation(pattern, bus_objects); + if (!impl) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), + "%s %s not found", + is_interface ? "Interface" : "Object path", + pattern); + + /* We use trusted=false here to get all the @org.freedesktop.systemd1.Privileged annotations. */ + r = introspect_begin(&intro, false); + if (r < 0) + return log_error_errno(r, "Failed to write introspection data: %m"); + + r = introspect_write_default_interfaces(&intro, impl->manager); + if (r < 0) + return log_error_errno(r, "Failed to write introspection data: %m"); + + /* Check if there is a non-fallback path that applies to the given interface, also + * print it. This is useful in the case of units: o.fd.systemd1.Service is declared + * as a fallback vtable for o/fd/systemd1/unit, and we also want to print + * o.fd.systemd1.Unit, which is the non-fallback implementation. */ + if (impl->fallback_vtables && is_interface) + main_impl = find_implementation(impl->path, bus_objects); + + if (main_impl) + bus_introspect_implementation(&intro, main_impl); + + if (impl != main_impl) + bus_introspect_implementation(&intro, impl); + + _cleanup_set_free_ Set *nodes = NULL; + + for (size_t i = 0; impl->children && impl->children[i]; i++) { + r = set_put_strdup(&nodes, impl->children[i]->path); + if (r < 0) + return log_oom(); + } + + r = introspect_write_child_nodes(&intro, nodes, impl->path); + if (r < 0) + return r; + + r = introspect_finish(&intro, &s); + if (r < 0) + return log_error_errno(r, "Failed to write introspection data: %m"); + + fputs(s, out); + return 0; +} diff --git a/src/shared/bus-object.h b/src/shared/bus-object.h new file mode 100644 index 0000000..145bbd2 --- /dev/null +++ b/src/shared/bus-object.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stdio.h> + +#include "sd-bus.h" + +typedef struct BusObjectImplementation BusObjectImplementation; + +typedef struct BusObjectVtablePair { + const sd_bus_vtable *vtable; + sd_bus_object_find_t object_find; +} BusObjectVtablePair; + +struct BusObjectImplementation { + const char *path; + const char *interface; + const sd_bus_vtable **vtables; + const BusObjectVtablePair *fallback_vtables; + sd_bus_node_enumerator_t node_enumerator; + bool manager; + const BusObjectImplementation **children; +}; + +#define BUS_VTABLES(...) ((const sd_bus_vtable* []){ __VA_ARGS__, NULL }) +#define BUS_FALLBACK_VTABLES(...) ((const BusObjectVtablePair[]) { __VA_ARGS__, {} }) +#define BUS_IMPLEMENTATIONS(...) ((const BusObjectImplementation* []) { __VA_ARGS__, NULL }) + +int bus_add_implementation(sd_bus *bus, const BusObjectImplementation *impl, void *userdata); +int bus_introspect_implementations( + FILE *out, + const char *pattern, + const BusObjectImplementation* const* bus_objects); diff --git a/src/shared/bus-polkit.c b/src/shared/bus-polkit.c new file mode 100644 index 0000000..14122e0 --- /dev/null +++ b/src/shared/bus-polkit.c @@ -0,0 +1,415 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-internal.h" +#include "bus-message.h" +#include "bus-polkit.h" +#include "bus-util.h" +#include "strv.h" +#include "user-util.h" + +static int check_good_user(sd_bus_message *m, uid_t good_user) { + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; + uid_t sender_uid; + int r; + + assert(m); + + if (good_user == UID_INVALID) + return 0; + + r = sd_bus_query_sender_creds(m, SD_BUS_CREDS_EUID, &creds); + if (r < 0) + return r; + + /* Don't trust augmented credentials for authorization */ + assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_EUID) == 0, -EPERM); + + r = sd_bus_creds_get_euid(creds, &sender_uid); + if (r < 0) + return r; + + return sender_uid == good_user; +} + +#if ENABLE_POLKIT +static int bus_message_append_strv_key_value( + sd_bus_message *m, + const char **l) { + + const char **k, **v; + int r; + + assert(m); + + r = sd_bus_message_open_container(m, 'a', "{ss}"); + if (r < 0) + return r; + + STRV_FOREACH_PAIR(k, v, l) { + r = sd_bus_message_append(m, "{ss}", *k, *v); + if (r < 0) + return r; + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return r; + + return r; +} +#endif + +int bus_test_polkit( + sd_bus_message *call, + int capability, + const char *action, + const char **details, + uid_t good_user, + bool *_challenge, + sd_bus_error *ret_error) { + + int r; + + assert(call); + assert(action); + + /* Tests non-interactively! */ + + r = check_good_user(call, good_user); + if (r != 0) + return r; + + r = sd_bus_query_sender_privilege(call, capability); + if (r < 0) + return r; + else if (r > 0) + return 1; +#if ENABLE_POLKIT + else { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *request = NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + int authorized = false, challenge = false; + const char *sender; + + sender = sd_bus_message_get_sender(call); + if (!sender) + return -EBADMSG; + + r = sd_bus_message_new_method_call( + call->bus, + &request, + "org.freedesktop.PolicyKit1", + "/org/freedesktop/PolicyKit1/Authority", + "org.freedesktop.PolicyKit1.Authority", + "CheckAuthorization"); + if (r < 0) + return r; + + r = sd_bus_message_append( + request, + "(sa{sv})s", + "system-bus-name", 1, "name", "s", sender, + action); + if (r < 0) + return r; + + r = bus_message_append_strv_key_value(request, details); + if (r < 0) + return r; + + r = sd_bus_message_append(request, "us", 0, NULL); + if (r < 0) + return r; + + r = sd_bus_call(call->bus, request, 0, ret_error, &reply); + if (r < 0) { + /* Treat no PK available as access denied */ + if (bus_error_is_unknown_service(ret_error)) { + sd_bus_error_free(ret_error); + return -EACCES; + } + + return r; + } + + r = sd_bus_message_enter_container(reply, 'r', "bba{ss}"); + if (r < 0) + return r; + + r = sd_bus_message_read(reply, "bb", &authorized, &challenge); + if (r < 0) + return r; + + if (authorized) + return 1; + + if (_challenge) { + *_challenge = challenge; + return 0; + } + } +#endif + + return -EACCES; +} + +#if ENABLE_POLKIT + +typedef struct AsyncPolkitQuery { + char *action; + char **details; + + sd_bus_message *request, *reply; + sd_bus_slot *slot; + + Hashmap *registry; + sd_event_source *defer_event_source; +} AsyncPolkitQuery; + +static void async_polkit_query_free(AsyncPolkitQuery *q) { + if (!q) + return; + + sd_bus_slot_unref(q->slot); + + if (q->registry && q->request) + hashmap_remove(q->registry, q->request); + + sd_bus_message_unref(q->request); + sd_bus_message_unref(q->reply); + + free(q->action); + strv_free(q->details); + + sd_event_source_disable_unref(q->defer_event_source); + free(q); +} + +static int async_polkit_defer(sd_event_source *s, void *userdata) { + AsyncPolkitQuery *q = userdata; + + assert(s); + + /* This is called as idle event source after we processed the async polkit reply, hopefully after the + * method call we re-enqueued has been properly processed. */ + + async_polkit_query_free(q); + return 0; +} + +static int async_polkit_callback(sd_bus_message *reply, void *userdata, sd_bus_error *error) { + AsyncPolkitQuery *q = userdata; + int r; + + assert(reply); + assert(q); + + assert(q->slot); + q->slot = sd_bus_slot_unref(q->slot); + + assert(!q->reply); + q->reply = sd_bus_message_ref(reply); + + /* Now, let's dispatch the original message a second time be re-enqueing. This will then traverse the + * whole message processing again, and thus re-validating and re-retrieving the "userdata" field + * again. + * + * We install an idle event loop event to clean-up the PolicyKit request data when we are idle again, + * i.e. after the second time the message is processed is complete. */ + + assert(!q->defer_event_source); + r = sd_event_add_defer(sd_bus_get_event(sd_bus_message_get_bus(reply)), &q->defer_event_source, async_polkit_defer, q); + if (r < 0) + goto fail; + + r = sd_event_source_set_priority(q->defer_event_source, SD_EVENT_PRIORITY_IDLE); + if (r < 0) + goto fail; + + r = sd_event_source_set_enabled(q->defer_event_source, SD_EVENT_ONESHOT); + if (r < 0) + goto fail; + + r = sd_bus_message_rewind(q->request, true); + if (r < 0) + goto fail; + + r = sd_bus_enqueue_for_read(sd_bus_message_get_bus(q->request), q->request); + if (r < 0) + goto fail; + + return 1; + +fail: + log_debug_errno(r, "Processing asynchronous PolicyKit reply failed, ignoring: %m"); + (void) sd_bus_reply_method_errno(q->request, r, NULL); + async_polkit_query_free(q); + return r; +} + +#endif + +int bus_verify_polkit_async( + sd_bus_message *call, + int capability, + const char *action, + const char **details, + bool interactive, + uid_t good_user, + Hashmap **registry, + sd_bus_error *ret_error) { + +#if ENABLE_POLKIT + _cleanup_(sd_bus_message_unrefp) sd_bus_message *pk = NULL; + AsyncPolkitQuery *q; + int c; +#endif + const char *sender; + int r; + + assert(call); + assert(action); + assert(registry); + + r = check_good_user(call, good_user); + if (r != 0) + return r; + +#if ENABLE_POLKIT + q = hashmap_get(*registry, call); + if (q) { + int authorized, challenge; + + /* This is the second invocation of this function, and there's already a response from + * polkit, let's process it */ + assert(q->reply); + + /* If the operation we want to authenticate changed between the first and the second time, + * let's not use this authentication, it might be out of date as the object and context we + * operate on might have changed. */ + if (!streq(q->action, action) || + !strv_equal(q->details, (char**) details)) + return -ESTALE; + + if (sd_bus_message_is_method_error(q->reply, NULL)) { + const sd_bus_error *e; + + e = sd_bus_message_get_error(q->reply); + + /* Treat no PK available as access denied */ + if (bus_error_is_unknown_service(e)) + return -EACCES; + + /* Copy error from polkit reply */ + sd_bus_error_copy(ret_error, e); + return -sd_bus_error_get_errno(e); + } + + r = sd_bus_message_enter_container(q->reply, 'r', "bba{ss}"); + if (r >= 0) + r = sd_bus_message_read(q->reply, "bb", &authorized, &challenge); + if (r < 0) + return r; + + if (authorized) + return 1; + + if (challenge) + return sd_bus_error_set(ret_error, SD_BUS_ERROR_INTERACTIVE_AUTHORIZATION_REQUIRED, "Interactive authentication required."); + + return -EACCES; + } +#endif + + r = sd_bus_query_sender_privilege(call, capability); + if (r < 0) + return r; + else if (r > 0) + return 1; + + sender = sd_bus_message_get_sender(call); + if (!sender) + return -EBADMSG; + +#if ENABLE_POLKIT + c = sd_bus_message_get_allow_interactive_authorization(call); + if (c < 0) + return c; + if (c > 0) + interactive = true; + + r = hashmap_ensure_allocated(registry, NULL); + if (r < 0) + return r; + + r = sd_bus_message_new_method_call( + call->bus, + &pk, + "org.freedesktop.PolicyKit1", + "/org/freedesktop/PolicyKit1/Authority", + "org.freedesktop.PolicyKit1.Authority", + "CheckAuthorization"); + if (r < 0) + return r; + + r = sd_bus_message_append( + pk, + "(sa{sv})s", + "system-bus-name", 1, "name", "s", sender, + action); + if (r < 0) + return r; + + r = bus_message_append_strv_key_value(pk, details); + if (r < 0) + return r; + + r = sd_bus_message_append(pk, "us", interactive, NULL); + if (r < 0) + return r; + + q = new(AsyncPolkitQuery, 1); + if (!q) + return -ENOMEM; + + *q = (AsyncPolkitQuery) { + .request = sd_bus_message_ref(call), + }; + + q->action = strdup(action); + if (!q->action) { + async_polkit_query_free(q); + return -ENOMEM; + } + + q->details = strv_copy((char**) details); + if (!q->details) { + async_polkit_query_free(q); + return -ENOMEM; + } + + r = hashmap_put(*registry, call, q); + if (r < 0) { + async_polkit_query_free(q); + return r; + } + + q->registry = *registry; + + r = sd_bus_call_async(call->bus, &q->slot, pk, async_polkit_callback, q, 0); + if (r < 0) { + async_polkit_query_free(q); + return r; + } + + return 0; +#endif + + return -EACCES; +} + +void bus_verify_polkit_async_registry_free(Hashmap *registry) { +#if ENABLE_POLKIT + hashmap_free_with_destructor(registry, async_polkit_query_free); +#endif +} diff --git a/src/shared/bus-polkit.h b/src/shared/bus-polkit.h new file mode 100644 index 0000000..91a88a2 --- /dev/null +++ b/src/shared/bus-polkit.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +#include "hashmap.h" + +int bus_test_polkit(sd_bus_message *call, int capability, const char *action, const char **details, uid_t good_user, bool *_challenge, sd_bus_error *e); + +int bus_verify_polkit_async(sd_bus_message *call, int capability, const char *action, const char **details, bool interactive, uid_t good_user, Hashmap **registry, sd_bus_error *error); +void bus_verify_polkit_async_registry_free(Hashmap *registry); diff --git a/src/shared/bus-print-properties.c b/src/shared/bus-print-properties.c new file mode 100644 index 0000000..4cea250 --- /dev/null +++ b/src/shared/bus-print-properties.c @@ -0,0 +1,462 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-print-properties.h" +#include "cap-list.h" +#include "cgroup-util.h" +#include "escape.h" +#include "mountpoint-util.h" +#include "nsflags.h" +#include "parse-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "user-util.h" + +int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *value) { + assert(name); + + if (expected_value && !streq_ptr(expected_value, value)) + return 0; + + if (only_value) + puts(value); + else + printf("%s=%s\n", name, value); + + return 0; +} + +int bus_print_property_valuef(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) { + va_list ap; + int r; + + assert(name); + assert(fmt); + + if (expected_value) { + _cleanup_free_ char *s = NULL; + + va_start(ap, fmt); + r = vasprintf(&s, fmt, ap); + va_end(ap); + if (r < 0) + return -ENOMEM; + + if (streq_ptr(expected_value, s)) { + if (only_value) + puts(s); + else + printf("%s=%s\n", name, s); + } + + return 0; + } + + if (!only_value) + printf("%s=", name); + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + puts(""); + + return 0; +} + +static int bus_print_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) { + char type; + const char *contents; + int r; + + assert(name); + assert(m); + + r = sd_bus_message_peek_type(m, &type, &contents); + if (r < 0) + return r; + + switch (type) { + + case SD_BUS_TYPE_STRING: { + const char *s; + + r = sd_bus_message_read_basic(m, type, &s); + if (r < 0) + return r; + + if (all || !isempty(s)) { + bool good; + + /* This property has a single value, so we need to take + * care not to print a new line, everything else is OK. */ + good = !strchr(s, '\n'); + bus_print_property_value(name, expected_value, value, good ? s : "[unprintable]"); + } + + return 1; + } + + case SD_BUS_TYPE_BOOLEAN: { + int b; + + r = sd_bus_message_read_basic(m, type, &b); + if (r < 0) + return r; + + if (expected_value && parse_boolean(expected_value) != b) + return 1; + + bus_print_property_value(name, NULL, value, yes_no(b)); + return 1; + } + + case SD_BUS_TYPE_UINT64: { + uint64_t u; + + r = sd_bus_message_read_basic(m, type, &u); + if (r < 0) + return r; + + /* Yes, heuristics! But we can change this check + * should it turn out to not be sufficient */ + + if (endswith(name, "Timestamp") || + STR_IN_SET(name, "NextElapseUSecRealtime", "LastTriggerUSec", "TimeUSec", "RTCTimeUSec")) { + char timestamp[FORMAT_TIMESTAMP_MAX]; + const char *t; + + t = format_timestamp(timestamp, sizeof(timestamp), u); + if (t || all) + bus_print_property_value(name, expected_value, value, strempty(t)); + + } else if (strstr(name, "USec")) { + char timespan[FORMAT_TIMESPAN_MAX]; + + (void) format_timespan(timespan, sizeof(timespan), u, 0); + bus_print_property_value(name, expected_value, value, timespan); + + } else if (streq(name, "CoredumpFilter")) { + char buf[STRLEN("0xFFFFFFFF")]; + + xsprintf(buf, "0x%"PRIx64, u); + bus_print_property_value(name, expected_value, value, buf); + + } else if (streq(name, "RestrictNamespaces")) { + _cleanup_free_ char *s = NULL; + const char *result; + + if ((u & NAMESPACE_FLAGS_ALL) == 0) + result = "yes"; + else if (FLAGS_SET(u, NAMESPACE_FLAGS_ALL)) + result = "no"; + else { + r = namespace_flags_to_string(u, &s); + if (r < 0) + return r; + + result = strempty(s); + } + + bus_print_property_value(name, expected_value, value, result); + + } else if (streq(name, "MountFlags")) { + const char *result; + + result = mount_propagation_flags_to_string(u); + if (!result) + return -EINVAL; + + bus_print_property_value(name, expected_value, value, result); + + } else if (STR_IN_SET(name, "CapabilityBoundingSet", "AmbientCapabilities")) { + _cleanup_free_ char *s = NULL; + + r = capability_set_to_string_alloc(u, &s); + if (r < 0) + return r; + + bus_print_property_value(name, expected_value, value, s); + + } else if ((STR_IN_SET(name, "CPUWeight", "StartupCPUWeight", "IOWeight", "StartupIOWeight") && u == CGROUP_WEIGHT_INVALID) || + (STR_IN_SET(name, "CPUShares", "StartupCPUShares") && u == CGROUP_CPU_SHARES_INVALID) || + (STR_IN_SET(name, "BlockIOWeight", "StartupBlockIOWeight") && u == CGROUP_BLKIO_WEIGHT_INVALID) || + (STR_IN_SET(name, "MemoryCurrent", "TasksCurrent") && u == (uint64_t) -1) || + (endswith(name, "NSec") && u == (uint64_t) -1)) + + bus_print_property_value(name, expected_value, value, "[not set]"); + + else if ((STR_IN_SET(name, "DefaultMemoryLow", "DefaultMemoryMin", "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit") && u == CGROUP_LIMIT_MAX) || + (STR_IN_SET(name, "TasksMax", "DefaultTasksMax") && u == (uint64_t) -1) || + (startswith(name, "Limit") && u == (uint64_t) -1) || + (startswith(name, "DefaultLimit") && u == (uint64_t) -1)) + + bus_print_property_value(name, expected_value, value, "infinity"); + else if (STR_IN_SET(name, "IPIngressBytes", "IPIngressPackets", "IPEgressBytes", "IPEgressPackets") && u == (uint64_t) -1) + bus_print_property_value(name, expected_value, value, "[no data]"); + else + bus_print_property_valuef(name, expected_value, value, "%"PRIu64, u); + + return 1; + } + + case SD_BUS_TYPE_INT64: { + int64_t i; + + r = sd_bus_message_read_basic(m, type, &i); + if (r < 0) + return r; + + bus_print_property_valuef(name, expected_value, value, "%"PRIi64, i); + return 1; + } + + case SD_BUS_TYPE_UINT32: { + uint32_t u; + + r = sd_bus_message_read_basic(m, type, &u); + if (r < 0) + return r; + + if (strstr(name, "UMask") || strstr(name, "Mode")) + bus_print_property_valuef(name, expected_value, value, "%04o", u); + + else if (streq(name, "UID")) { + if (u == UID_INVALID) + bus_print_property_value(name, expected_value, value, "[not set]"); + else + bus_print_property_valuef(name, expected_value, value, "%"PRIu32, u); + } else if (streq(name, "GID")) { + if (u == GID_INVALID) + bus_print_property_value(name, expected_value, value, "[not set]"); + else + bus_print_property_valuef(name, expected_value, value, "%"PRIu32, u); + } else + bus_print_property_valuef(name, expected_value, value, "%"PRIu32, u); + + return 1; + } + + case SD_BUS_TYPE_INT32: { + int32_t i; + + r = sd_bus_message_read_basic(m, type, &i); + if (r < 0) + return r; + + bus_print_property_valuef(name, expected_value, value, "%"PRIi32, i); + return 1; + } + + case SD_BUS_TYPE_DOUBLE: { + double d; + + r = sd_bus_message_read_basic(m, type, &d); + if (r < 0) + return r; + + bus_print_property_valuef(name, expected_value, value, "%g", d); + return 1; + } + + case SD_BUS_TYPE_ARRAY: + if (streq(contents, "s")) { + bool first = true; + const char *str; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, contents); + if (r < 0) + return r; + + while ((r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &str)) > 0) { + _cleanup_free_ char *e = NULL; + + e = shell_maybe_quote(str, ESCAPE_BACKSLASH_ONELINE); + if (!e) + return -ENOMEM; + + if (first) { + if (!value) + printf("%s=", name); + first = false; + } else + fputs(" ", stdout); + + fputs(e, stdout); + } + if (r < 0) + return r; + + if (first && all && !value) + printf("%s=", name); + if (!first || all) + puts(""); + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + + return 1; + + } else if (streq(contents, "y")) { + const uint8_t *u; + size_t n; + + r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, (const void**) &u, &n); + if (r < 0) + return r; + + if (all || n > 0) { + unsigned i; + + if (!value) + printf("%s=", name); + + for (i = 0; i < n; i++) + printf("%02x", u[i]); + + puts(""); + } + + return 1; + + } else if (streq(contents, "u")) { + uint32_t *u; + size_t n; + + r = sd_bus_message_read_array(m, SD_BUS_TYPE_UINT32, (const void**) &u, &n); + if (r < 0) + return r; + + if (all || n > 0) { + unsigned i; + + if (!value) + printf("%s=", name); + + for (i = 0; i < n; i++) + printf("%08x", u[i]); + + puts(""); + } + + return 1; + } + + break; + } + + return 0; +} + +int bus_message_print_all_properties( + sd_bus_message *m, + bus_message_print_t func, + char **filter, + bool value, + bool all, + Set **found_properties) { + + int r; + + assert(m); + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}"); + if (r < 0) + return r; + + while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) { + _cleanup_free_ char *name_with_equal = NULL; + const char *name, *contents, *expected_value = NULL; + + r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &name); + if (r < 0) + return r; + + if (found_properties) { + r = set_ensure_put(found_properties, &string_hash_ops, name); + if (r < 0) + return log_oom(); + } + + name_with_equal = strjoin(name, "="); + if (!name_with_equal) + return log_oom(); + + if (!filter || strv_find(filter, name) || + (expected_value = strv_find_startswith(filter, name_with_equal))) { + r = sd_bus_message_peek_type(m, NULL, &contents); + if (r < 0) + return r; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents); + if (r < 0) + return r; + + if (func) + r = func(name, expected_value, m, value, all); + if (!func || r == 0) + r = bus_print_property(name, expected_value, m, value, all); + if (r < 0) + return r; + if (r == 0) { + if (all && !expected_value) + printf("%s=[unprintable]\n", name); + /* skip what we didn't read */ + r = sd_bus_message_skip(m, contents); + if (r < 0) + return r; + } + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + } else { + r = sd_bus_message_skip(m, "v"); + if (r < 0) + return r; + } + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + } + if (r < 0) + return r; + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + + return 0; +} + +int bus_print_all_properties( + sd_bus *bus, + const char *dest, + const char *path, + bus_message_print_t func, + char **filter, + bool value, + bool all, + Set **found_properties) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + int r; + + assert(bus); + assert(path); + + r = sd_bus_call_method(bus, + dest, + path, + "org.freedesktop.DBus.Properties", + "GetAll", + &error, + &reply, + "s", ""); + if (r < 0) + return r; + + return bus_message_print_all_properties(reply, func, filter, value, all, found_properties); +} diff --git a/src/shared/bus-print-properties.h b/src/shared/bus-print-properties.h new file mode 100644 index 0000000..a457475 --- /dev/null +++ b/src/shared/bus-print-properties.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "sd-bus.h" + +#include "macro.h" +#include "set.h" + +typedef int (*bus_message_print_t) (const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all); + +int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *value); +int bus_print_property_valuef(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) _printf_(4,5); +int bus_message_print_all_properties(sd_bus_message *m, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties); +int bus_print_all_properties(sd_bus *bus, const char *dest, const char *path, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties); diff --git a/src/shared/bus-unit-procs.c b/src/shared/bus-unit-procs.c new file mode 100644 index 0000000..3e97be9 --- /dev/null +++ b/src/shared/bus-unit-procs.c @@ -0,0 +1,407 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-unit-procs.h" +#include "hashmap.h" +#include "list.h" +#include "locale-util.h" +#include "macro.h" +#include "path-util.h" +#include "process-util.h" +#include "sort-util.h" +#include "string-util.h" +#include "terminal-util.h" + +struct CGroupInfo { + char *cgroup_path; + bool is_const; /* If false, cgroup_path should be free()'d */ + + Hashmap *pids; /* PID → process name */ + bool done; + + struct CGroupInfo *parent; + LIST_FIELDS(struct CGroupInfo, siblings); + LIST_HEAD(struct CGroupInfo, children); + size_t n_children; +}; + +static int add_cgroup(Hashmap *cgroups, const char *path, bool is_const, struct CGroupInfo **ret) { + struct CGroupInfo *parent = NULL, *cg; + int r; + + assert(cgroups); + assert(ret); + + path = empty_to_root(path); + + cg = hashmap_get(cgroups, path); + if (cg) { + *ret = cg; + return 0; + } + + if (!empty_or_root(path)) { + const char *e, *pp; + + e = strrchr(path, '/'); + if (!e) + return -EINVAL; + + pp = strndupa(path, e - path); + + r = add_cgroup(cgroups, pp, false, &parent); + if (r < 0) + return r; + } + + cg = new0(struct CGroupInfo, 1); + if (!cg) + return -ENOMEM; + + if (is_const) + cg->cgroup_path = (char*) path; + else { + cg->cgroup_path = strdup(path); + if (!cg->cgroup_path) { + free(cg); + return -ENOMEM; + } + } + + cg->is_const = is_const; + cg->parent = parent; + + r = hashmap_put(cgroups, cg->cgroup_path, cg); + if (r < 0) { + if (!is_const) + free(cg->cgroup_path); + free(cg); + return r; + } + + if (parent) { + LIST_PREPEND(siblings, parent->children, cg); + parent->n_children++; + } + + *ret = cg; + return 1; +} + +static int add_process( + Hashmap *cgroups, + const char *path, + pid_t pid, + const char *name) { + + struct CGroupInfo *cg; + int r; + + assert(cgroups); + assert(name); + assert(pid > 0); + + r = add_cgroup(cgroups, path, true, &cg); + if (r < 0) + return r; + + r = hashmap_ensure_allocated(&cg->pids, &trivial_hash_ops); + if (r < 0) + return r; + + return hashmap_put(cg->pids, PID_TO_PTR(pid), (void*) name); +} + +static void remove_cgroup(Hashmap *cgroups, struct CGroupInfo *cg) { + assert(cgroups); + assert(cg); + + while (cg->children) + remove_cgroup(cgroups, cg->children); + + hashmap_remove(cgroups, cg->cgroup_path); + + if (!cg->is_const) + free(cg->cgroup_path); + + hashmap_free(cg->pids); + + if (cg->parent) + LIST_REMOVE(siblings, cg->parent->children, cg); + + free(cg); +} + +static int cgroup_info_compare_func(struct CGroupInfo * const *a, struct CGroupInfo * const *b) { + return strcmp((*a)->cgroup_path, (*b)->cgroup_path); +} + +static int dump_processes( + Hashmap *cgroups, + const char *cgroup_path, + const char *prefix, + unsigned n_columns, + OutputFlags flags) { + + struct CGroupInfo *cg; + int r; + + assert(prefix); + + cgroup_path = empty_to_root(cgroup_path); + + cg = hashmap_get(cgroups, cgroup_path); + if (!cg) + return 0; + + if (!hashmap_isempty(cg->pids)) { + const char *name; + size_t n = 0, i; + pid_t *pids; + void *pidp; + int width; + + /* Order processes by their PID */ + pids = newa(pid_t, hashmap_size(cg->pids)); + + HASHMAP_FOREACH_KEY(name, pidp, cg->pids) + pids[n++] = PTR_TO_PID(pidp); + + assert(n == hashmap_size(cg->pids)); + typesafe_qsort(pids, n, pid_compare_func); + + width = DECIMAL_STR_WIDTH(pids[n-1]); + + for (i = 0; i < n; i++) { + _cleanup_free_ char *e = NULL; + const char *special; + bool more; + + name = hashmap_get(cg->pids, PID_TO_PTR(pids[i])); + assert(name); + + if (n_columns != 0) { + unsigned k; + + k = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U); + + e = ellipsize(name, k, 100); + if (e) + name = e; + } + + more = i+1 < n || cg->children; + special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT); + + fprintf(stdout, "%s%s%*"PID_PRI" %s\n", + prefix, + special, + width, pids[i], + name); + } + } + + if (cg->children) { + struct CGroupInfo **children, *child; + size_t n = 0, i; + + /* Order subcgroups by their name */ + children = newa(struct CGroupInfo*, cg->n_children); + LIST_FOREACH(siblings, child, cg->children) + children[n++] = child; + assert(n == cg->n_children); + typesafe_qsort(children, n, cgroup_info_compare_func); + + if (n_columns != 0) + n_columns = MAX(LESS_BY(n_columns, 2U), 20U); + + for (i = 0; i < n; i++) { + _cleanup_free_ char *pp = NULL; + const char *name, *special; + bool more; + + child = children[i]; + + name = strrchr(child->cgroup_path, '/'); + if (!name) + return -EINVAL; + name++; + + more = i+1 < n; + special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT); + + fputs(prefix, stdout); + fputs(special, stdout); + fputs(name, stdout); + fputc('\n', stdout); + + special = special_glyph(more ? SPECIAL_GLYPH_TREE_VERTICAL : SPECIAL_GLYPH_TREE_SPACE); + + pp = strjoin(prefix, special); + if (!pp) + return -ENOMEM; + + r = dump_processes(cgroups, child->cgroup_path, pp, n_columns, flags); + if (r < 0) + return r; + } + } + + cg->done = true; + return 0; +} + +static int dump_extra_processes( + Hashmap *cgroups, + const char *prefix, + unsigned n_columns, + OutputFlags flags) { + + _cleanup_free_ pid_t *pids = NULL; + _cleanup_hashmap_free_ Hashmap *names = NULL; + struct CGroupInfo *cg; + size_t n_allocated = 0, n = 0, k; + int width, r; + + /* Prints the extra processes, i.e. those that are in cgroups we haven't displayed yet. We show them as + * combined, sorted, linear list. */ + + HASHMAP_FOREACH(cg, cgroups) { + const char *name; + void *pidp; + + if (cg->done) + continue; + + if (hashmap_isempty(cg->pids)) + continue; + + r = hashmap_ensure_allocated(&names, &trivial_hash_ops); + if (r < 0) + return r; + + if (!GREEDY_REALLOC(pids, n_allocated, n + hashmap_size(cg->pids))) + return -ENOMEM; + + HASHMAP_FOREACH_KEY(name, pidp, cg->pids) { + pids[n++] = PTR_TO_PID(pidp); + + r = hashmap_put(names, pidp, (void*) name); + if (r < 0) + return r; + } + } + + if (n == 0) + return 0; + + typesafe_qsort(pids, n, pid_compare_func); + width = DECIMAL_STR_WIDTH(pids[n-1]); + + for (k = 0; k < n; k++) { + _cleanup_free_ char *e = NULL; + const char *name; + + name = hashmap_get(names, PID_TO_PTR(pids[k])); + assert(name); + + if (n_columns != 0) { + unsigned z; + + z = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U); + + e = ellipsize(name, z, 100); + if (e) + name = e; + } + + fprintf(stdout, "%s%s %*" PID_PRI " %s\n", + prefix, + special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET), + width, pids[k], + name); + } + + return 0; +} + +int unit_show_processes( + sd_bus *bus, + const char *unit, + const char *cgroup_path, + const char *prefix, + unsigned n_columns, + OutputFlags flags, + sd_bus_error *error) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + Hashmap *cgroups = NULL; + struct CGroupInfo *cg; + int r; + + assert(bus); + assert(unit); + + if (flags & OUTPUT_FULL_WIDTH) + n_columns = 0; + else if (n_columns <= 0) + n_columns = columns(); + + prefix = strempty(prefix); + + r = sd_bus_call_method( + bus, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "GetUnitProcesses", + error, + &reply, + "s", + unit); + if (r < 0) + return r; + + cgroups = hashmap_new(&path_hash_ops); + if (!cgroups) + return -ENOMEM; + + r = sd_bus_message_enter_container(reply, 'a', "(sus)"); + if (r < 0) + goto finish; + + for (;;) { + const char *path = NULL, *name = NULL; + uint32_t pid; + + r = sd_bus_message_read(reply, "(sus)", &path, &pid, &name); + if (r < 0) + goto finish; + if (r == 0) + break; + + r = add_process(cgroups, path, pid, name); + if (r == -ENOMEM) + goto finish; + if (r < 0) + log_warning_errno(r, "Invalid process description in GetUnitProcesses reply: cgroup=\"%s\" pid="PID_FMT" command=\"%s\", ignoring: %m", + path, pid, name); + } + + r = sd_bus_message_exit_container(reply); + if (r < 0) + goto finish; + + r = dump_processes(cgroups, cgroup_path, prefix, n_columns, flags); + if (r < 0) + goto finish; + + r = dump_extra_processes(cgroups, prefix, n_columns, flags); + +finish: + while ((cg = hashmap_first(cgroups))) + remove_cgroup(cgroups, cg); + + hashmap_free(cgroups); + + return r; +} diff --git a/src/shared/bus-unit-procs.h b/src/shared/bus-unit-procs.h new file mode 100644 index 0000000..78c5569 --- /dev/null +++ b/src/shared/bus-unit-procs.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +#include "output-mode.h" + +int unit_show_processes(sd_bus *bus, const char *unit, const char *cgroup_path, const char *prefix, unsigned n_columns, OutputFlags flags, sd_bus_error *error); diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c new file mode 100644 index 0000000..2bab229 --- /dev/null +++ b/src/shared/bus-unit-util.c @@ -0,0 +1,2432 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "bus-error.h" +#include "bus-unit-util.h" +#include "bus-util.h" +#include "cap-list.h" +#include "cgroup-setup.h" +#include "cgroup-util.h" +#include "condition.h" +#include "coredump-util.h" +#include "cpu-set-util.h" +#include "dissect-image.h" +#include "escape.h" +#include "exec-util.h" +#include "exit-status.h" +#include "fileio.h" +#include "hexdecoct.h" +#include "hostname-util.h" +#include "in-addr-util.h" +#include "ip-protocol-list.h" +#include "libmount-util.h" +#include "locale-util.h" +#include "log.h" +#include "missing_fs.h" +#include "mountpoint-util.h" +#include "nsflags.h" +#include "numa-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "rlimit-util.h" +#if HAVE_SECCOMP +#include "seccomp-util.h" +#endif +#include "securebits-util.h" +#include "signal-util.h" +#include "socket-util.h" +#include "sort-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "syslog-util.h" +#include "terminal-util.h" +#include "unit-def.h" +#include "user-util.h" +#include "utf8.h" + +int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) { + assert(message); + assert(u); + + u->machine = NULL; + + return sd_bus_message_read( + message, + "(ssssssouso)", + &u->id, + &u->description, + &u->load_state, + &u->active_state, + &u->sub_state, + &u->following, + &u->unit_path, + &u->job_id, + &u->job_type, + &u->job_path); +} + +#define DEFINE_BUS_APPEND_PARSE_PTR(bus_type, cast_type, type, parse_func) \ + static int bus_append_##parse_func( \ + sd_bus_message *m, \ + const char *field, \ + const char *eq) { \ + type val; \ + int r; \ + \ + r = parse_func(eq, &val); \ + if (r < 0) \ + return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); \ + \ + r = sd_bus_message_append(m, "(sv)", field, \ + bus_type, (cast_type) val); \ + if (r < 0) \ + return bus_log_create_error(r); \ + \ + return 1; \ + } + +#define DEFINE_BUS_APPEND_PARSE(bus_type, parse_func) \ + static int bus_append_##parse_func( \ + sd_bus_message *m, \ + const char *field, \ + const char *eq) { \ + int r; \ + \ + r = parse_func(eq); \ + if (r < 0) \ + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s: %s", field, eq); \ + \ + r = sd_bus_message_append(m, "(sv)", field, \ + bus_type, (int32_t) r); \ + if (r < 0) \ + return bus_log_create_error(r); \ + \ + return 1; \ + } + +DEFINE_BUS_APPEND_PARSE("b", parse_boolean); +DEFINE_BUS_APPEND_PARSE("i", ioprio_class_from_string); +DEFINE_BUS_APPEND_PARSE("i", ip_tos_from_string); +DEFINE_BUS_APPEND_PARSE("i", log_facility_unshifted_from_string); +DEFINE_BUS_APPEND_PARSE("i", log_level_from_string); +#if !HAVE_SECCOMP +static inline int seccomp_parse_errno_or_action(const char *eq) { return -EINVAL; } +#endif +DEFINE_BUS_APPEND_PARSE("i", seccomp_parse_errno_or_action); +DEFINE_BUS_APPEND_PARSE("i", sched_policy_from_string); +DEFINE_BUS_APPEND_PARSE("i", secure_bits_from_string); +DEFINE_BUS_APPEND_PARSE("i", signal_from_string); +DEFINE_BUS_APPEND_PARSE("i", parse_ip_protocol); +DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, ioprio_parse_priority); +DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, parse_nice); +DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, safe_atoi); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, nsec_t, parse_nsec); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_blkio_weight_parse); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_cpu_shares_parse); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_weight_parse); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, unsigned long, mount_propagation_flags_from_string); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, safe_atou64); +DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, mode_t, parse_mode); +DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, unsigned, safe_atou); +DEFINE_BUS_APPEND_PARSE_PTR("x", int64_t, int64_t, safe_atoi64); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, coredump_filter_mask_from_string); + +static int bus_append_string(sd_bus_message *m, const char *field, const char *eq) { + int r; + + r = sd_bus_message_append(m, "(sv)", field, "s", eq); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_strv(sd_bus_message *m, const char *field, const char *eq, ExtractFlags flags) { + const char *p; + int r; + + r = sd_bus_message_open_container(m, 'r', "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, 's', field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "as"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "s"); + if (r < 0) + return bus_log_create_error(r); + + for (p = eq;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, flags); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_error_errno(r, "Invalid syntax: %s", eq); + + r = sd_bus_message_append_basic(m, 's', word); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_byte_array(sd_bus_message *m, const char *field, const void *buf, size_t n) { + int r; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "ay"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_array(m, 'y', buf, n); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_parse_sec_rename(sd_bus_message *m, const char *field, const char *eq) { + char *n; + usec_t t; + size_t l; + int r; + + r = parse_sec(eq, &t); + if (r < 0) + return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); + + l = strlen(field); + n = newa(char, l + 2); + /* Change suffix Sec → USec */ + strcpy(mempcpy(n, field, l - 3), "USec"); + + r = sd_bus_message_append(m, "(sv)", n, "t", t); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_parse_size(sd_bus_message *m, const char *field, const char *eq, uint64_t base) { + uint64_t v; + int r; + + r = parse_size(eq, base, &v); + if (r < 0) + return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); + + r = sd_bus_message_append(m, "(sv)", field, "t", v); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_exec_command(sd_bus_message *m, const char *field, const char *eq) { + bool explicit_path = false, done = false; + _cleanup_strv_free_ char **l = NULL, **ex_opts = NULL; + _cleanup_free_ char *path = NULL, *upgraded_name = NULL; + ExecCommandFlags flags = 0; + bool is_ex_prop = endswith(field, "Ex"); + int r; + + do { + switch (*eq) { + + case '-': + if (FLAGS_SET(flags, EXEC_COMMAND_IGNORE_FAILURE)) + done = true; + else { + flags |= EXEC_COMMAND_IGNORE_FAILURE; + eq++; + } + break; + + case '@': + if (explicit_path) + done = true; + else { + explicit_path = true; + eq++; + } + break; + + case ':': + if (FLAGS_SET(flags, EXEC_COMMAND_NO_ENV_EXPAND)) + done = true; + else { + flags |= EXEC_COMMAND_NO_ENV_EXPAND; + eq++; + } + break; + + case '+': + if (flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID|EXEC_COMMAND_AMBIENT_MAGIC)) + done = true; + else { + flags |= EXEC_COMMAND_FULLY_PRIVILEGED; + eq++; + } + break; + + case '!': + if (flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_AMBIENT_MAGIC)) + done = true; + else if (FLAGS_SET(flags, EXEC_COMMAND_NO_SETUID)) { + flags &= ~EXEC_COMMAND_NO_SETUID; + flags |= EXEC_COMMAND_AMBIENT_MAGIC; + eq++; + } else { + flags |= EXEC_COMMAND_NO_SETUID; + eq++; + } + break; + + default: + done = true; + break; + } + } while (!done); + + if (!is_ex_prop && (flags & (EXEC_COMMAND_NO_ENV_EXPAND|EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID|EXEC_COMMAND_AMBIENT_MAGIC))) { + /* Upgrade the ExecXYZ= property to ExecXYZEx= for convenience */ + is_ex_prop = true; + upgraded_name = strjoin(field, "Ex"); + if (!upgraded_name) + return log_oom(); + } + + if (is_ex_prop) { + r = exec_command_flags_to_strv(flags, &ex_opts); + if (r < 0) + return log_error_errno(r, "Failed to convert ExecCommandFlags to strv: %m"); + } + + if (explicit_path) { + r = extract_first_word(&eq, &path, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE); + if (r < 0) + return log_error_errno(r, "Failed to parse path: %m"); + } + + r = strv_split_full(&l, eq, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE); + if (r < 0) + return log_error_errno(r, "Failed to parse command line: %m"); + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, upgraded_name ?: field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', is_ex_prop ? "a(sasas)" : "a(sasb)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', is_ex_prop ? "(sasas)" : "(sasb)"); + if (r < 0) + return bus_log_create_error(r); + + if (!strv_isempty(l)) { + + r = sd_bus_message_open_container(m, 'r', is_ex_prop ? "sasas" : "sasb"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "s", path ?: l[0]); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_strv(m, l); + if (r < 0) + return bus_log_create_error(r); + + r = is_ex_prop ? sd_bus_message_append_strv(m, ex_opts) : sd_bus_message_append(m, "b", FLAGS_SET(flags, EXEC_COMMAND_IGNORE_FAILURE)); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_ip_address_access(sd_bus_message *m, int family, const union in_addr_union *prefix, unsigned char prefixlen) { + int r; + + assert(m); + assert(prefix); + + r = sd_bus_message_open_container(m, 'r', "iayu"); + if (r < 0) + return r; + + r = sd_bus_message_append(m, "i", family); + if (r < 0) + return r; + + r = sd_bus_message_append_array(m, 'y', prefix, FAMILY_ADDRESS_SIZE(family)); + if (r < 0) + return r; + + r = sd_bus_message_append(m, "u", prefixlen); + if (r < 0) + return r; + + return sd_bus_message_close_container(m); +} + +static int bus_append_cgroup_property(sd_bus_message *m, const char *field, const char *eq) { + int r; + + if (STR_IN_SET(field, "DevicePolicy", + "Slice", + "ManagedOOMSwap", + "ManagedOOMMemoryPressure", + "ManagedOOMMemoryPressureLimitPercent")) + return bus_append_string(m, field, eq); + + if (STR_IN_SET(field, "CPUAccounting", + "MemoryAccounting", + "IOAccounting", + "BlockIOAccounting", + "TasksAccounting", + "IPAccounting")) + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "CPUWeight", + "StartupCPUWeight", + "IOWeight", + "StartupIOWeight")) + return bus_append_cg_weight_parse(m, field, eq); + + if (STR_IN_SET(field, "CPUShares", + "StartupCPUShares")) + return bus_append_cg_cpu_shares_parse(m, field, eq); + + if (STR_IN_SET(field, "AllowedCPUs", + "AllowedMemoryNodes")) { + _cleanup_(cpu_set_reset) CPUSet cpuset = {}; + _cleanup_free_ uint8_t *array = NULL; + size_t allocated; + + r = parse_cpu_set(eq, &cpuset); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value: %s", field, eq); + + r = cpu_set_to_dbus(&cpuset, &array, &allocated); + if (r < 0) + return log_error_errno(r, "Failed to serialize CPUSet: %m"); + + return bus_append_byte_array(m, field, array, allocated); + } + + if (STR_IN_SET(field, "BlockIOWeight", + "StartupBlockIOWeight")) + return bus_append_cg_blkio_weight_parse(m, field, eq); + + if (streq(field, "DisableControllers")) + return bus_append_strv(m, "DisableControllers", eq, EXTRACT_UNQUOTE); + + if (streq(field, "Delegate")) { + r = parse_boolean(eq); + if (r < 0) + return bus_append_strv(m, "DelegateControllers", eq, EXTRACT_UNQUOTE); + + r = sd_bus_message_append(m, "(sv)", "Delegate", "b", r); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "MemoryMin", + "DefaultMemoryLow", + "DefaultMemoryMin", + "MemoryLow", + "MemoryHigh", + "MemoryMax", + "MemorySwapMax", + "MemoryLimit", + "TasksMax")) { + + if (streq(eq, "infinity")) { + r = sd_bus_message_append(m, "(sv)", field, "t", CGROUP_LIMIT_MAX); + if (r < 0) + return bus_log_create_error(r); + return 1; + } else if (isempty(eq)) { + uint64_t empty_value = STR_IN_SET(field, + "DefaultMemoryLow", + "DefaultMemoryMin", + "MemoryLow", + "MemoryMin") ? + CGROUP_LIMIT_MIN : + CGROUP_LIMIT_MAX; + + r = sd_bus_message_append(m, "(sv)", field, "t", empty_value); + if (r < 0) + return bus_log_create_error(r); + return 1; + } + + r = parse_permille(eq); + if (r >= 0) { + char *n; + + /* When this is a percentage we'll convert this into a relative value in the range 0…UINT32_MAX + * and pass it in the MemoryLowScale property (and related ones). This way the physical memory + * size can be determined server-side. */ + + n = strjoina(field, "Scale"); + r = sd_bus_message_append(m, "(sv)", n, "u", (uint32_t) (((uint64_t) r * UINT32_MAX) / 1000U)); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "TasksMax")) + return bus_append_safe_atou64(m, field, eq); + + return bus_append_parse_size(m, field, eq, 1024); + } + + if (streq(field, "CPUQuota")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", USEC_INFINITY); + else { + r = parse_permille_unbounded(eq); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(ERANGE), + "CPU quota too small."); + if (r < 0) + return log_error_errno(r, "CPU quota '%s' invalid.", eq); + + r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", (((uint64_t) r * USEC_PER_SEC) / 1000U)); + } + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "CPUQuotaPeriodSec")) { + usec_t u = USEC_INFINITY; + + r = parse_sec_def_infinity(eq, &u); + if (r < 0) + return log_error_errno(r, "CPU quota period '%s' invalid.", eq); + + r = sd_bus_message_append(m, "(sv)", "CPUQuotaPeriodUSec", "t", u); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "DeviceAllow")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 0); + else { + const char *path = eq, *rwm = NULL, *e; + + e = strchr(eq, ' '); + if (e) { + path = strndupa(eq, e - eq); + rwm = e+1; + } + + r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 1, path, strempty(rwm)); + } + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (cgroup_io_limit_type_from_string(field) >= 0 || STR_IN_SET(field, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0); + else { + const char *path, *bandwidth, *e; + uint64_t bytes; + + e = strchr(eq, ' '); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse %s value %s.", + field, eq); + + path = strndupa(eq, e - eq); + bandwidth = e+1; + + if (streq(bandwidth, "infinity")) + bytes = CGROUP_LIMIT_MAX; + else { + r = parse_size(bandwidth, 1000, &bytes); + if (r < 0) + return log_error_errno(r, "Failed to parse byte value %s: %m", bandwidth); + } + + r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, bytes); + } + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "IODeviceWeight", + "BlockIODeviceWeight")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0); + else { + const char *path, *weight, *e; + uint64_t u; + + e = strchr(eq, ' '); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse %s value %s.", + field, eq); + + path = strndupa(eq, e - eq); + weight = e+1; + + r = safe_atou64(weight, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value %s: %m", field, weight); + + r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, u); + } + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "IODeviceLatencyTargetSec")) { + const char *field_usec = "IODeviceLatencyTargetUSec"; + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", USEC_INFINITY); + else { + const char *path, *target, *e; + usec_t usec; + + e = strchr(eq, ' '); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse %s value %s.", + field, eq); + + path = strndupa(eq, e - eq); + target = e+1; + + r = parse_sec(target, &usec); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value %s: %m", field, target); + + r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", 1, path, usec); + } + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "IPAddressAllow", + "IPAddressDeny")) { + unsigned char prefixlen; + union in_addr_union prefix = {}; + int family; + + if (isempty(eq)) { + r = sd_bus_message_append(m, "(sv)", field, "a(iayu)", 0); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(iayu)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(iayu)"); + if (r < 0) + return bus_log_create_error(r); + + if (streq(eq, "any")) { + /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */ + + r = bus_append_ip_address_access(m, AF_INET, &prefix, 0); + if (r < 0) + return bus_log_create_error(r); + + r = bus_append_ip_address_access(m, AF_INET6, &prefix, 0); + if (r < 0) + return bus_log_create_error(r); + + } else if (is_localhost(eq)) { + /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */ + + prefix.in.s_addr = htobe32(0x7f000000); + r = bus_append_ip_address_access(m, AF_INET, &prefix, 8); + if (r < 0) + return bus_log_create_error(r); + + prefix.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT; + r = bus_append_ip_address_access(m, AF_INET6, &prefix, 128); + if (r < 0) + return r; + + } else if (streq(eq, "link-local")) { + /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */ + + prefix.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16)); + r = bus_append_ip_address_access(m, AF_INET, &prefix, 16); + if (r < 0) + return bus_log_create_error(r); + + prefix.in6 = (struct in6_addr) { + .s6_addr32[0] = htobe32(0xfe800000) + }; + r = bus_append_ip_address_access(m, AF_INET6, &prefix, 64); + if (r < 0) + return bus_log_create_error(r); + + } else if (streq(eq, "multicast")) { + /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */ + + prefix.in.s_addr = htobe32((UINT32_C(224) << 24)); + r = bus_append_ip_address_access(m, AF_INET, &prefix, 4); + if (r < 0) + return bus_log_create_error(r); + + prefix.in6 = (struct in6_addr) { + .s6_addr32[0] = htobe32(0xff000000) + }; + r = bus_append_ip_address_access(m, AF_INET6, &prefix, 8); + if (r < 0) + return bus_log_create_error(r); + + } else { + for (;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&eq, &word, NULL, 0); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_error_errno(r, "Failed to parse %s: %s", field, eq); + + r = in_addr_prefix_from_string_auto(word, &family, &prefix, &prefixlen); + if (r < 0) + return log_error_errno(r, "Failed to parse IP address prefix: %s", word); + + r = bus_append_ip_address_access(m, family, &prefix, prefixlen); + if (r < 0) + return bus_log_create_error(r); + } + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "IPIngressFilterPath", + "IPEgressFilterPath")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field, "as", 0); + else + r = sd_bus_message_append(m, "(sv)", field, "as", 1, eq); + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +static int bus_append_automount_property(sd_bus_message *m, const char *field, const char *eq) { + if (streq(field, "Where")) + return bus_append_string(m, field, eq); + + if (streq(field, "DirectoryMode")) + return bus_append_parse_mode(m, field, eq); + + if (streq(field, "TimeoutIdleSec")) + return bus_append_parse_sec_rename(m, field, eq); + + return 0; +} + +static int bus_append_execute_property(sd_bus_message *m, const char *field, const char *eq) { + const char *suffix; + int r; + + if (STR_IN_SET(field, "User", + "Group", + "UtmpIdentifier", + "UtmpMode", + "PAMName", + "TTYPath", + "WorkingDirectory", + "RootDirectory", + "SyslogIdentifier", + "ProtectSystem", + "ProtectHome", + "SELinuxContext", + "RootImage", + "RootVerity", + "RuntimeDirectoryPreserve", + "Personality", + "KeyringMode", + "ProtectProc", + "ProcSubset", + "NetworkNamespacePath", + "LogNamespace")) + return bus_append_string(m, field, eq); + + if (STR_IN_SET(field, "IgnoreSIGPIPE", + "TTYVHangup", + "TTYReset", + "TTYVTDisallocate", + "PrivateTmp", + "PrivateDevices", + "PrivateNetwork", + "PrivateUsers", + "PrivateMounts", + "NoNewPrivileges", + "SyslogLevelPrefix", + "MemoryDenyWriteExecute", + "RestrictRealtime", + "DynamicUser", + "RemoveIPC", + "ProtectKernelTunables", + "ProtectKernelModules", + "ProtectKernelLogs", + "ProtectClock", + "ProtectControlGroups", + "MountAPIVFS", + "CPUSchedulingResetOnFork", + "LockPersonality", + "ProtectHostname", + "RestrictSUIDSGID")) + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "ReadWriteDirectories", + "ReadOnlyDirectories", + "InaccessibleDirectories", + "ReadWritePaths", + "ReadOnlyPaths", + "InaccessiblePaths", + "RuntimeDirectory", + "StateDirectory", + "CacheDirectory", + "LogsDirectory", + "ConfigurationDirectory", + "SupplementaryGroups", + "SystemCallArchitectures")) + return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE); + + if (STR_IN_SET(field, "SyslogLevel", + "LogLevelMax")) + return bus_append_log_level_from_string(m, field, eq); + + if (streq(field, "SyslogFacility")) + return bus_append_log_facility_unshifted_from_string(m, field, eq); + + if (streq(field, "SecureBits")) + return bus_append_secure_bits_from_string(m, field, eq); + + if (streq(field, "CPUSchedulingPolicy")) + return bus_append_sched_policy_from_string(m, field, eq); + + if (STR_IN_SET(field, "CPUSchedulingPriority", + "OOMScoreAdjust")) + return bus_append_safe_atoi(m, field, eq); + + if (streq(field, "CoredumpFilter")) + return bus_append_coredump_filter_mask_from_string(m, field, eq); + + if (streq(field, "Nice")) + return bus_append_parse_nice(m, field, eq); + + if (streq(field, "SystemCallErrorNumber")) + return bus_append_seccomp_parse_errno_or_action(m, field, eq); + + if (streq(field, "IOSchedulingClass")) + return bus_append_ioprio_class_from_string(m, field, eq); + + if (streq(field, "IOSchedulingPriority")) + return bus_append_ioprio_parse_priority(m, field, eq); + + if (STR_IN_SET(field, "RuntimeDirectoryMode", + "StateDirectoryMode", + "CacheDirectoryMode", + "LogsDirectoryMode", + "ConfigurationDirectoryMode", + "UMask")) + return bus_append_parse_mode(m, field, eq); + + if (streq(field, "TimerSlackNSec")) + return bus_append_parse_nsec(m, field, eq); + + if (streq(field, "LogRateLimitIntervalSec")) + return bus_append_parse_sec_rename(m, field, eq); + + if (streq(field, "LogRateLimitBurst")) + return bus_append_safe_atou(m, field, eq); + + if (streq(field, "MountFlags")) + return bus_append_mount_propagation_flags_from_string(m, field, eq); + + if (STR_IN_SET(field, "Environment", + "UnsetEnvironment", + "PassEnvironment")) + return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE); + + if (streq(field, "EnvironmentFile")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 0); + else + r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 1, + eq[0] == '-' ? eq + 1 : eq, + eq[0] == '-'); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "SetCredential")) { + r = sd_bus_message_open_container(m, 'r', "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, 's', "SetCredential"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(say)"); + if (r < 0) + return bus_log_create_error(r); + + if (isempty(eq)) + r = sd_bus_message_append(m, "a(say)", 0); + else { + _cleanup_free_ char *word = NULL, *unescaped = NULL; + const char *p = eq; + int l; + + r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_error_errno(r, "Failed to parse SetCredential= parameter: %s", eq); + if (r == 0 || !p) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing argument to SetCredential=."); + + l = cunescape(p, UNESCAPE_ACCEPT_NUL, &unescaped); + if (l < 0) + return log_error_errno(l, "Failed to unescape SetCredential= value: %s", p); + + r = sd_bus_message_open_container(m, 'a', "(say)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'r', "say"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "s", word); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_array(m, 'y', unescaped, l); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + } + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "LoadCredential")) { + r = sd_bus_message_open_container(m, 'r', "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, 's', "LoadCredential"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(ss)"); + if (r < 0) + return bus_log_create_error(r); + + if (isempty(eq)) + r = sd_bus_message_append(m, "a(ss)", 0); + else { + _cleanup_free_ char *word = NULL; + const char *p = eq; + + r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_error_errno(r, "Failed to parse LoadCredential= parameter: %s", eq); + if (r == 0 || !p) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing argument to LoadCredential=."); + + r = sd_bus_message_append(m, "a(ss)", 1, word, p); + } + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "LogExtraFields")) { + r = sd_bus_message_open_container(m, 'r', "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, 's', "LogExtraFields"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "aay"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "ay"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_array(m, 'y', eq, strlen(eq)); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "StandardInput", + "StandardOutput", + "StandardError")) { + const char *n, *appended; + + if ((n = startswith(eq, "fd:"))) { + appended = strjoina(field, "FileDescriptorName"); + r = sd_bus_message_append(m, "(sv)", appended, "s", n); + } else if ((n = startswith(eq, "file:"))) { + appended = strjoina(field, "File"); + r = sd_bus_message_append(m, "(sv)", appended, "s", n); + } else if ((n = startswith(eq, "append:"))) { + appended = strjoina(field, "FileToAppend"); + r = sd_bus_message_append(m, "(sv)", appended, "s", n); + } else + r = sd_bus_message_append(m, "(sv)", field, "s", eq); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "StandardInputText")) { + _cleanup_free_ char *unescaped = NULL; + + r = cunescape(eq, 0, &unescaped); + if (r < 0) + return log_error_errno(r, "Failed to unescape text '%s': %m", eq); + + if (!strextend(&unescaped, "\n", NULL)) + return log_oom(); + + /* Note that we don't expand specifiers here, but that should be OK, as this is a programmatic + * interface anyway */ + + return bus_append_byte_array(m, field, unescaped, strlen(unescaped)); + } + + if (streq(field, "StandardInputData")) { + _cleanup_free_ void *decoded = NULL; + size_t sz; + + r = unbase64mem(eq, (size_t) -1, &decoded, &sz); + if (r < 0) + return log_error_errno(r, "Failed to decode base64 data '%s': %m", eq); + + return bus_append_byte_array(m, field, decoded, sz); + } + + if ((suffix = startswith(field, "Limit"))) { + int rl; + + rl = rlimit_from_string(suffix); + if (rl >= 0) { + const char *sn; + struct rlimit l; + + r = rlimit_parse(rl, eq, &l); + if (r < 0) + return log_error_errno(r, "Failed to parse resource limit: %s", eq); + + r = sd_bus_message_append(m, "(sv)", field, "t", l.rlim_max); + if (r < 0) + return bus_log_create_error(r); + + sn = strjoina(field, "Soft"); + r = sd_bus_message_append(m, "(sv)", sn, "t", l.rlim_cur); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + } + + if (STR_IN_SET(field, "AppArmorProfile", + "SmackProcessLabel")) { + int ignore = 0; + const char *s = eq; + + if (eq[0] == '-') { + ignore = 1; + s = eq + 1; + } + + r = sd_bus_message_append(m, "(sv)", field, "(bs)", ignore, s); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "CapabilityBoundingSet", + "AmbientCapabilities")) { + uint64_t sum = 0; + bool invert = false; + const char *p = eq; + + if (*p == '~') { + invert = true; + p++; + } + + r = capability_set_from_string(p, &sum); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value %s: %m", field, eq); + + sum = invert ? ~sum : sum; + + r = sd_bus_message_append(m, "(sv)", field, "t", sum); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "CPUAffinity")) { + _cleanup_(cpu_set_reset) CPUSet cpuset = {}; + _cleanup_free_ uint8_t *array = NULL; + size_t allocated; + + if (eq && streq(eq, "numa")) { + r = sd_bus_message_append(m, "(sv)", "CPUAffinityFromNUMA", "b", true); + if (r < 0) + return bus_log_create_error(r); + return r; + } + + r = parse_cpu_set(eq, &cpuset); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value: %s", field, eq); + + r = cpu_set_to_dbus(&cpuset, &array, &allocated); + if (r < 0) + return log_error_errno(r, "Failed to serialize CPUAffinity: %m"); + + return bus_append_byte_array(m, field, array, allocated); + } + + if (streq(field, "NUMAPolicy")) { + r = mpol_from_string(eq); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value: %s", field, eq); + + r = sd_bus_message_append(m, "(sv)", field, "i", (int32_t) r); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "NUMAMask")) { + _cleanup_(cpu_set_reset) CPUSet nodes = {}; + _cleanup_free_ uint8_t *array = NULL; + size_t allocated; + + if (eq && streq(eq, "all")) { + r = numa_mask_add_all(&nodes); + if (r < 0) + return log_error_errno(r, "Failed to create NUMA mask representing \"all\" NUMA nodes: %m"); + } else { + r = parse_cpu_set(eq, &nodes); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value: %s", field, eq); + } + + r = cpu_set_to_dbus(&nodes, &array, &allocated); + if (r < 0) + return log_error_errno(r, "Failed to serialize NUMAMask: %m"); + + return bus_append_byte_array(m, field, array, allocated); + } + + if (STR_IN_SET(field, "RestrictAddressFamilies", + "SystemCallFilter", + "SystemCallLog")) { + int allow_list = 1; + const char *p = eq; + + if (*p == '~') { + allow_list = 0; + p++; + } + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "(bas)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'r', "bas"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, 'b', &allow_list); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "s"); + if (r < 0) + return bus_log_create_error(r); + + for (;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_error_errno(r, "Invalid syntax: %s", eq); + + r = sd_bus_message_append_basic(m, 's', word); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "RestrictNamespaces")) { + bool invert = false; + unsigned long flags; + + r = parse_boolean(eq); + if (r > 0) + flags = 0; + else if (r == 0) + flags = NAMESPACE_FLAGS_ALL; + else { + if (eq[0] == '~') { + invert = true; + eq++; + } + + r = namespace_flags_from_string(eq, &flags); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value %s.", field, eq); + } + + if (invert) + flags = (~flags) & NAMESPACE_FLAGS_ALL; + + r = sd_bus_message_append(m, "(sv)", field, "t", (uint64_t) flags); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "BindPaths", + "BindReadOnlyPaths")) { + const char *p = eq; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(ssbt)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(ssbt)"); + if (r < 0) + return bus_log_create_error(r); + + for (;;) { + _cleanup_free_ char *source = NULL, *destination = NULL; + char *s = NULL, *d = NULL; + bool ignore_enoent = false; + uint64_t flags = MS_REC; + + r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_UNQUOTE|EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + if (r == 0) + break; + + s = source; + if (s[0] == '-') { + ignore_enoent = true; + s++; + } + + if (p && p[-1] == ':') { + r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_UNQUOTE|EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Missing argument after ':': %s", + eq); + + d = destination; + + if (p && p[-1] == ':') { + _cleanup_free_ char *options = NULL; + + r = extract_first_word(&p, &options, NULL, EXTRACT_UNQUOTE); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + + if (isempty(options) || streq(options, "rbind")) + flags = MS_REC; + else if (streq(options, "norbind")) + flags = 0; + else + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Unknown options: %s", + eq); + } + } else + d = s; + + r = sd_bus_message_append(m, "(ssbt)", s, d, ignore_enoent, flags); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "TemporaryFileSystem")) { + const char *p = eq; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(ss)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(ss)"); + if (r < 0) + return bus_log_create_error(r); + + for (;;) { + _cleanup_free_ char *word = NULL, *path = NULL; + const char *w; + + r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + if (r == 0) + break; + + w = word; + r = extract_first_word(&w, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse argument: %s", + p); + + r = sd_bus_message_append(m, "(ss)", path, w); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "RootHash")) { + _cleanup_free_ void *roothash_decoded = NULL; + size_t roothash_decoded_size = 0; + + /* We have the path to a roothash to load and decode, eg: RootHash=/foo/bar.roothash */ + if (path_is_absolute(eq)) + return bus_append_string(m, "RootHashPath", eq); + + /* We have a roothash to decode, eg: RootHash=012345789abcdef */ + r = unhexmem(eq, strlen(eq), &roothash_decoded, &roothash_decoded_size); + if (r < 0) + return log_error_errno(r, "Failed to decode RootHash= '%s': %m", eq); + if (roothash_decoded_size < sizeof(sd_id128_t)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "RootHash= '%s' is too short: %m", eq); + + return bus_append_byte_array(m, field, roothash_decoded, roothash_decoded_size); + } + + if (streq(field, "RootHashSignature")) { + _cleanup_free_ void *roothash_sig_decoded = NULL; + char *value; + size_t roothash_sig_decoded_size = 0; + + /* We have the path to a roothash signature to load and decode, eg: RootHash=/foo/bar.roothash.p7s */ + if (path_is_absolute(eq)) + return bus_append_string(m, "RootHashSignaturePath", eq); + + if (!(value = startswith(eq, "base64:"))) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to decode RootHashSignature= '%s', not a path but doesn't start with 'base64:': %m", eq); + + /* We have a roothash signature to decode, eg: RootHashSignature=base64:012345789abcdef */ + r = unbase64mem(value, strlen(value), &roothash_sig_decoded, &roothash_sig_decoded_size); + if (r < 0) + return log_error_errno(r, "Failed to decode RootHashSignature= '%s': %m", eq); + + return bus_append_byte_array(m, field, roothash_sig_decoded, roothash_sig_decoded_size); + } + + if (streq(field, "RootImageOptions")) { + _cleanup_strv_free_ char **l = NULL; + char **first = NULL, **second = NULL; + const char *p = eq; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(ss)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(ss)"); + if (r < 0) + return bus_log_create_error(r); + + r = strv_split_colon_pairs(&l, p); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + + STRV_FOREACH_PAIR(first, second, l) { + /* Format is either 'root:foo' or 'foo' (root is implied) */ + if (!isempty(*second) && partition_designator_from_string(*first) < 0) + return bus_log_create_error(-EINVAL); + + r = sd_bus_message_append(m, "(ss)", + !isempty(*second) ? *first : "root", + !isempty(*second) ? *second : *first); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "MountImages")) { + const char *p = eq; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(ssba(ss))"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(ssba(ss))"); + if (r < 0) + return bus_log_create_error(r); + + for (;;) { + _cleanup_free_ char *first = NULL, *second = NULL, *tuple = NULL; + const char *q = NULL, *source = NULL; + bool permissive = false; + + r = extract_first_word(&p, &tuple, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE); + if (r < 0) + return r; + if (r == 0) + break; + + q = tuple; + r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &first, &second, NULL); + if (r < 0) + return r; + if (r == 0) + continue; + + source = first; + if (source[0] == '-') { + permissive = true; + source++; + } + + if (isempty(second)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Missing argument after ':': %s", + eq); + + r = sd_bus_message_open_container(m, 'r', "ssba(ss)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "ssb", source, second, permissive); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(ss)"); + if (r < 0) + return bus_log_create_error(r); + + for (;;) { + _cleanup_free_ char *partition = NULL, *mount_options = NULL; + + r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options, NULL); + if (r < 0) + return r; + if (r == 0) + break; + /* Single set of options, applying to the root partition/single filesystem */ + if (r == 1) { + r = sd_bus_message_append(m, "(ss)", "root", partition); + if (r < 0) + return bus_log_create_error(r); + + break; + } + + if (partition_designator_from_string(partition) < 0) + return bus_log_create_error(-EINVAL); + + r = sd_bus_message_append(m, "(ss)", partition, mount_options); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +static int bus_append_kill_property(sd_bus_message *m, const char *field, const char *eq) { + if (streq(field, "KillMode")) + return bus_append_string(m, field, eq); + + if (STR_IN_SET(field, "SendSIGHUP", + "SendSIGKILL")) + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "KillSignal", + "RestartKillSignal", + "FinalKillSignal", + "WatchdogSignal")) + return bus_append_signal_from_string(m, field, eq); + + return 0; +} + +static int bus_append_mount_property(sd_bus_message *m, const char *field, const char *eq) { + + if (STR_IN_SET(field, "What", + "Where", + "Options", + "Type")) + return bus_append_string(m, field, eq); + + if (streq(field, "TimeoutSec")) + return bus_append_parse_sec_rename(m, field, eq); + + if (streq(field, "DirectoryMode")) + return bus_append_parse_mode(m, field, eq); + + if (STR_IN_SET(field, "SloppyOptions", + "LazyUnmount", + "ForceUnmount", + "ReadwriteOnly")) + return bus_append_parse_boolean(m, field, eq); + + return 0; +} + +static int bus_append_path_property(sd_bus_message *m, const char *field, const char *eq) { + int r; + + if (streq(field, "MakeDirectory")) + return bus_append_parse_boolean(m, field, eq); + + if (streq(field, "DirectoryMode")) + return bus_append_parse_mode(m, field, eq); + + if (STR_IN_SET(field, "PathExists", + "PathExistsGlob", + "PathChanged", + "PathModified", + "DirectoryNotEmpty")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 0); + else + r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 1, field, eq); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +static int bus_append_scope_property(sd_bus_message *m, const char *field, const char *eq) { + if (streq(field, "RuntimeMaxSec")) + return bus_append_parse_sec_rename(m, field, eq); + + if (streq(field, "TimeoutStopSec")) + return bus_append_parse_sec_rename(m, field, eq); + + return 0; +} + +static int bus_append_service_property(sd_bus_message *m, const char *field, const char *eq) { + int r; + + if (STR_IN_SET(field, "PIDFile", + "Type", + "Restart", + "BusName", + "NotifyAccess", + "USBFunctionDescriptors", + "USBFunctionStrings", + "OOMPolicy", + "TimeoutStartFailureMode", + "TimeoutStopFailureMode")) + return bus_append_string(m, field, eq); + + if (STR_IN_SET(field, "PermissionsStartOnly", + "RootDirectoryStartOnly", + "RemainAfterExit", + "GuessMainPID")) + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "RestartSec", + "TimeoutStartSec", + "TimeoutStopSec", + "TimeoutAbortSec", + "RuntimeMaxSec", + "WatchdogSec")) + return bus_append_parse_sec_rename(m, field, eq); + + if (streq(field, "TimeoutSec")) { + r = bus_append_parse_sec_rename(m, "TimeoutStartSec", eq); + if (r < 0) + return r; + + return bus_append_parse_sec_rename(m, "TimeoutStopSec", eq); + } + + if (streq(field, "FileDescriptorStoreMax")) + return bus_append_safe_atou(m, field, eq); + + if (STR_IN_SET(field, "ExecCondition", + "ExecStartPre", + "ExecStart", + "ExecStartPost", + "ExecConditionEx", + "ExecStartPreEx", + "ExecStartEx", + "ExecStartPostEx", + "ExecReload", + "ExecStop", + "ExecStopPost", + "ExecReloadEx", + "ExecStopEx", + "ExecStopPostEx")) + return bus_append_exec_command(m, field, eq); + + if (STR_IN_SET(field, "RestartPreventExitStatus", + "RestartForceExitStatus", + "SuccessExitStatus")) { + _cleanup_free_ int *status = NULL, *signal = NULL; + size_t n_status = 0, n_signal = 0; + const char *p; + + for (p = eq;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_error_errno(r, "Invalid syntax in %s: %s", field, eq); + + /* We need to call exit_status_from_string() first, because we want + * to parse numbers as exit statuses, not signals. */ + + r = exit_status_from_string(word); + if (r >= 0) { + assert(r >= 0 && r < 256); + + status = reallocarray(status, n_status + 1, sizeof(int)); + if (!status) + return log_oom(); + + status[n_status++] = r; + + } else if ((r = signal_from_string(word)) >= 0) { + signal = reallocarray(signal, n_signal + 1, sizeof(int)); + if (!signal) + return log_oom(); + + signal[n_signal++] = r; + + } else + /* original r from exit_status_to_string() */ + return log_error_errno(r, "Invalid status or signal %s in %s: %m", + word, field); + } + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "(aiai)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'r', "aiai"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_array(m, 'i', status, n_status * sizeof(int)); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_array(m, 'i', signal, n_signal * sizeof(int)); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +static int bus_append_socket_property(sd_bus_message *m, const char *field, const char *eq) { + int r; + + if (STR_IN_SET(field, "Accept", + "FlushPending", + "Writable", + "KeepAlive", + "NoDelay", + "FreeBind", + "Transparent", + "Broadcast", + "PassCredentials", + "PassSecurity", + "PassPacketInfo", + "ReusePort", + "RemoveOnStop", + "SELinuxContextFromNet")) + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "Priority", + "IPTTL", + "Mark")) + return bus_append_safe_atoi(m, field, eq); + + if (streq(field, "IPTOS")) + return bus_append_ip_tos_from_string(m, field, eq); + + if (STR_IN_SET(field, "Backlog", + "MaxConnections", + "MaxConnectionsPerSource", + "KeepAliveProbes", + "TriggerLimitBurst")) + return bus_append_safe_atou(m, field, eq); + + if (STR_IN_SET(field, "SocketMode", + "DirectoryMode")) + return bus_append_parse_mode(m, field, eq); + + if (STR_IN_SET(field, "MessageQueueMaxMessages", + "MessageQueueMessageSize")) + return bus_append_safe_atoi64(m, field, eq); + + if (STR_IN_SET(field, "TimeoutSec", + "KeepAliveTimeSec", + "KeepAliveIntervalSec", + "DeferAcceptSec", + "TriggerLimitIntervalSec")) + return bus_append_parse_sec_rename(m, field, eq); + + if (STR_IN_SET(field, "ReceiveBuffer", + "SendBuffer", + "PipeSize")) + return bus_append_parse_size(m, field, eq, 1024); + + if (STR_IN_SET(field, "ExecStartPre", + "ExecStartPost", + "ExecReload", + "ExecStopPost")) + return bus_append_exec_command(m, field, eq); + + if (STR_IN_SET(field, "SmackLabel", + "SmackLabelIPIn", + "SmackLabelIPOut", + "TCPCongestion", + "BindToDevice", + "BindIPv6Only", + "FileDescriptorName", + "SocketUser", + "SocketGroup", + "Timestamping")) + return bus_append_string(m, field, eq); + + if (streq(field, "Symlinks")) + return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE); + + if (streq(field, "SocketProtocol")) + return bus_append_parse_ip_protocol(m, field, eq); + + if (STR_IN_SET(field, "ListenStream", + "ListenDatagram", + "ListenSequentialPacket", + "ListenNetlink", + "ListenSpecial", + "ListenMessageQueue", + "ListenFIFO", + "ListenUSBFunction")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 0); + else + r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 1, field + STRLEN("Listen"), eq); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} +static int bus_append_timer_property(sd_bus_message *m, const char *field, const char *eq) { + int r; + + if (STR_IN_SET(field, "WakeSystem", + "RemainAfterElapse", + "Persistent", + "OnTimezoneChange", + "OnClockChange", + "FixedRandomDelay")) + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "AccuracySec", + "RandomizedDelaySec")) + return bus_append_parse_sec_rename(m, field, eq); + + if (STR_IN_SET(field, "OnActiveSec", + "OnBootSec", + "OnStartupSec", + "OnUnitActiveSec", + "OnUnitInactiveSec")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 0); + else { + usec_t t; + r = parse_sec(eq, &t); + if (r < 0) + return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); + + r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 1, field, t); + } + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "OnCalendar")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 0); + else + r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 1, field, eq); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +static int bus_append_unit_property(sd_bus_message *m, const char *field, const char *eq) { + ConditionType t = _CONDITION_TYPE_INVALID; + bool is_condition = false; + int r; + + if (STR_IN_SET(field, "Description", + "SourcePath", + "OnFailureJobMode", + "JobTimeoutAction", + "JobTimeoutRebootArgument", + "StartLimitAction", + "FailureAction", + "SuccessAction", + "RebootArgument", + "CollectMode")) + return bus_append_string(m, field, eq); + + if (STR_IN_SET(field, "StopWhenUnneeded", + "RefuseManualStart", + "RefuseManualStop", + "AllowIsolate", + "IgnoreOnIsolate", + "DefaultDependencies")) + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "JobTimeoutSec", + "JobRunningTimeoutSec", + "StartLimitIntervalSec")) + return bus_append_parse_sec_rename(m, field, eq); + + if (streq(field, "StartLimitBurst")) + return bus_append_safe_atou(m, field, eq); + + if (STR_IN_SET(field, "SuccessActionExitStatus", + "FailureActionExitStatus")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field, "i", -1); + else { + uint8_t u; + + r = safe_atou8(eq, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse %s=%s", field, eq); + + r = sd_bus_message_append(m, "(sv)", field, "i", (int) u); + } + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (unit_dependency_from_string(field) >= 0 || + STR_IN_SET(field, "Documentation", + "RequiresMountsFor")) + return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE); + + t = condition_type_from_string(field); + if (t >= 0) + is_condition = true; + else + t = assert_type_from_string(field); + if (t >= 0) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 0); + else { + const char *p = eq; + int trigger, negate; + + trigger = *p == '|'; + if (trigger) + p++; + + negate = *p == '!'; + if (negate) + p++; + + r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 1, + field, trigger, negate, p); + } + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment) { + const char *eq, *field; + int r; + + assert(m); + assert(assignment); + + eq = strchr(assignment, '='); + if (!eq) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Not an assignment: %s", assignment); + + field = strndupa(assignment, eq - assignment); + eq++; + + switch (t) { + case UNIT_SERVICE: + r = bus_append_cgroup_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_execute_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_kill_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_service_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_SOCKET: + r = bus_append_cgroup_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_execute_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_kill_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_socket_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_TIMER: + r = bus_append_timer_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_PATH: + r = bus_append_path_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_SLICE: + r = bus_append_cgroup_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_SCOPE: + r = bus_append_cgroup_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_kill_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_scope_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_MOUNT: + r = bus_append_cgroup_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_execute_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_kill_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_mount_property(m, field, eq); + if (r != 0) + return r; + + break; + + case UNIT_AUTOMOUNT: + r = bus_append_automount_property(m, field, eq); + if (r != 0) + return r; + + break; + + case UNIT_TARGET: + case UNIT_DEVICE: + case UNIT_SWAP: + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Not supported unit type"); + + default: + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Invalid unit type"); + } + + r = bus_append_unit_property(m, field, eq); + if (r != 0) + return r; + + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Unknown assignment: %s", assignment); +} + +int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l) { + char **i; + int r; + + assert(m); + + STRV_FOREACH(i, l) { + r = bus_append_unit_property_assignment(m, t, *i); + if (r < 0) + return r; + } + + return 0; +} + +int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes) { + const char *type, *path, *source; + int r; + + /* changes is dereferenced when calling unit_file_dump_changes() later, + * so we have to make sure this is not NULL. */ + assert(changes); + assert(n_changes); + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sss)"); + if (r < 0) + return bus_log_parse_error(r); + + while ((r = sd_bus_message_read(m, "(sss)", &type, &path, &source)) > 0) { + /* We expect only "success" changes to be sent over the bus. + Hence, reject anything negative. */ + UnitFileChangeType ch = unit_file_change_type_from_string(type); + + if (ch < 0) { + log_notice("Manager reported unknown change type \"%s\" for path \"%s\", ignoring.", type, path); + continue; + } + + r = unit_file_changes_add(changes, n_changes, ch, path, source); + if (r < 0) + return r; + } + if (r < 0) + return bus_log_parse_error(r); + + r = sd_bus_message_exit_container(m); + if (r < 0) + return bus_log_parse_error(r); + + unit_file_dump_changes(0, NULL, *changes, *n_changes, quiet); + return 0; +} + +int unit_load_state(sd_bus *bus, const char *name, char **load_state) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_free_ char *path = NULL; + int r; + + path = unit_dbus_path_from_name(name); + if (!path) + return log_oom(); + + /* This function warns on it's own, because otherwise it'd be awkward to pass + * the dbus error message around. */ + + r = sd_bus_get_property_string( + bus, + "org.freedesktop.systemd1", + path, + "org.freedesktop.systemd1.Unit", + "LoadState", + &error, + load_state); + if (r < 0) + return log_error_errno(r, "Failed to get load state of %s: %s", name, bus_error_message(&error, r)); + + return 0; +} + +int unit_info_compare(const UnitInfo *a, const UnitInfo *b) { + int r; + + /* First, order by machine */ + r = strcasecmp_ptr(a->machine, b->machine); + if (r != 0) + return r; + + /* Second, order by unit type */ + r = strcasecmp_ptr(strrchr(a->id, '.'), strrchr(b->id, '.')); + if (r != 0) + return r; + + /* Third, order by name */ + return strcasecmp(a->id, b->id); +} diff --git a/src/shared/bus-unit-util.h b/src/shared/bus-unit-util.h new file mode 100644 index 0000000..999caf6 --- /dev/null +++ b/src/shared/bus-unit-util.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +#include "install.h" +#include "unit-def.h" + +typedef struct UnitInfo { + const char *machine; + const char *id; + const char *description; + const char *load_state; + const char *active_state; + const char *sub_state; + const char *following; + const char *unit_path; + uint32_t job_id; + const char *job_type; + const char *job_path; +} UnitInfo; + +int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u); + +int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment); +int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l); + +int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes); + +int unit_load_state(sd_bus *bus, const char *name, char **load_state); + +int unit_info_compare(const UnitInfo *a, const UnitInfo *b); diff --git a/src/shared/bus-util.c b/src/shared/bus-util.c new file mode 100644 index 0000000..fbda218 --- /dev/null +++ b/src/shared/bus-util.c @@ -0,0 +1,577 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <unistd.h> + +#include "sd-bus.h" +#include "sd-daemon.h" +#include "sd-event.h" +#include "sd-id128.h" + +#include "bus-common-errors.h" +#include "bus-internal.h" +#include "bus-label.h" +#include "bus-util.h" +#include "path-util.h" +#include "socket-util.h" +#include "stdio-util.h" + +static int name_owner_change_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { + sd_event *e = userdata; + + assert(m); + assert(e); + + sd_bus_close(sd_bus_message_get_bus(m)); + sd_event_exit(e, 0); + + return 1; +} + +int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name) { + const char *match; + const char *unique; + int r; + + assert(e); + assert(bus); + assert(name); + + /* We unregister the name here and then wait for the + * NameOwnerChanged signal for this event to arrive before we + * quit. We do this in order to make sure that any queued + * requests are still processed before we really exit. */ + + r = sd_bus_get_unique_name(bus, &unique); + if (r < 0) + return r; + + match = strjoina( + "sender='org.freedesktop.DBus'," + "type='signal'," + "interface='org.freedesktop.DBus'," + "member='NameOwnerChanged'," + "path='/org/freedesktop/DBus'," + "arg0='", name, "',", + "arg1='", unique, "',", + "arg2=''"); + + r = sd_bus_add_match_async(bus, NULL, match, name_owner_change_callback, NULL, e); + if (r < 0) + return r; + + r = sd_bus_release_name_async(bus, NULL, name, NULL, NULL); + if (r < 0) + return r; + + return 0; +} + +int bus_event_loop_with_idle( + sd_event *e, + sd_bus *bus, + const char *name, + usec_t timeout, + check_idle_t check_idle, + void *userdata) { + bool exiting = false; + int r, code; + + assert(e); + assert(bus); + assert(name); + + for (;;) { + bool idle; + + r = sd_event_get_state(e); + if (r < 0) + return r; + if (r == SD_EVENT_FINISHED) + break; + + if (check_idle) + idle = check_idle(userdata); + else + idle = true; + + r = sd_event_run(e, exiting || !idle ? (uint64_t) -1 : timeout); + if (r < 0) + return r; + + if (r == 0 && !exiting && idle) { + /* Inform the service manager that we are going down, so that it will queue all + * further start requests, instead of assuming we are already running. */ + sd_notify(false, "STOPPING=1"); + + r = bus_async_unregister_and_exit(e, bus, name); + if (r < 0) + return r; + + exiting = true; + continue; + } + } + + r = sd_event_get_exit_code(e, &code); + if (r < 0) + return r; + + return code; +} + +int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *rep = NULL; + int r, has_owner = 0; + + assert(c); + assert(name); + + r = sd_bus_call_method(c, + "org.freedesktop.DBus", + "/org/freedesktop/dbus", + "org.freedesktop.DBus", + "NameHasOwner", + error, + &rep, + "s", + name); + if (r < 0) + return r; + + r = sd_bus_message_read_basic(rep, 'b', &has_owner); + if (r < 0) + return sd_bus_error_set_errno(error, r); + + return has_owner; +} + +bool bus_error_is_unknown_service(const sd_bus_error *error) { + return sd_bus_error_has_names(error, + SD_BUS_ERROR_SERVICE_UNKNOWN, + SD_BUS_ERROR_NAME_HAS_NO_OWNER, + BUS_ERROR_NO_SUCH_UNIT); +} + +int bus_check_peercred(sd_bus *c) { + struct ucred ucred; + int fd, r; + + assert(c); + + fd = sd_bus_get_fd(c); + if (fd < 0) + return fd; + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + if (ucred.uid != 0 && ucred.uid != geteuid()) + return -EPERM; + + return 1; +} + +int bus_connect_system_systemd(sd_bus **_bus) { + _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL; + int r; + + assert(_bus); + + if (geteuid() != 0) + return sd_bus_default_system(_bus); + + /* If we are root then let's talk directly to the system + * instance, instead of going via the bus */ + + r = sd_bus_new(&bus); + if (r < 0) + return r; + + r = sd_bus_set_address(bus, "unix:path=/run/systemd/private"); + if (r < 0) + return r; + + r = sd_bus_start(bus); + if (r < 0) + return sd_bus_default_system(_bus); + + r = bus_check_peercred(bus); + if (r < 0) + return r; + + *_bus = TAKE_PTR(bus); + + return 0; +} + +int bus_connect_user_systemd(sd_bus **_bus) { + _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL; + _cleanup_free_ char *ee = NULL; + const char *e; + int r; + + assert(_bus); + + e = secure_getenv("XDG_RUNTIME_DIR"); + if (!e) + return sd_bus_default_user(_bus); + + ee = bus_address_escape(e); + if (!ee) + return -ENOMEM; + + r = sd_bus_new(&bus); + if (r < 0) + return r; + + bus->address = strjoin("unix:path=", ee, "/systemd/private"); + if (!bus->address) + return -ENOMEM; + + r = sd_bus_start(bus); + if (r < 0) + return sd_bus_default_user(_bus); + + r = bus_check_peercred(bus); + if (r < 0) + return r; + + *_bus = TAKE_PTR(bus); + + return 0; +} + +int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **ret) { + _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL; + int r; + + assert(transport >= 0); + assert(transport < _BUS_TRANSPORT_MAX); + assert(ret); + + assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL); + assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP); + + switch (transport) { + + case BUS_TRANSPORT_LOCAL: + if (user) + r = sd_bus_default_user(&bus); + else { + if (sd_booted() <= 0) + /* Print a friendly message when the local system is actually not running systemd as PID 1. */ + return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN), + "System has not been booted with systemd as init system (PID 1). Can't operate."); + r = sd_bus_default_system(&bus); + } + break; + + case BUS_TRANSPORT_REMOTE: + r = sd_bus_open_system_remote(&bus, host); + break; + + case BUS_TRANSPORT_MACHINE: + r = sd_bus_open_system_machine(&bus, host); + break; + + default: + assert_not_reached("Hmm, unknown transport type."); + } + if (r < 0) + return r; + + r = sd_bus_set_exit_on_disconnect(bus, true); + if (r < 0) + return r; + + *ret = TAKE_PTR(bus); + + return 0; +} + +int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus) { + int r; + + assert(transport >= 0); + assert(transport < _BUS_TRANSPORT_MAX); + assert(bus); + + assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL); + assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP); + + switch (transport) { + + case BUS_TRANSPORT_LOCAL: + if (user) + r = bus_connect_user_systemd(bus); + else { + if (sd_booted() <= 0) + /* Print a friendly message when the local system is actually not running systemd as PID 1. */ + return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN), + "System has not been booted with systemd as init system (PID 1). Can't operate."); + r = bus_connect_system_systemd(bus); + } + break; + + case BUS_TRANSPORT_REMOTE: + r = sd_bus_open_system_remote(bus, host); + break; + + case BUS_TRANSPORT_MACHINE: + r = sd_bus_open_system_machine(bus, host); + break; + + default: + assert_not_reached("Hmm, unknown transport type."); + } + + return r; +} + +/** + * bus_path_encode_unique() - encode unique object path + * @b: bus connection or NULL + * @prefix: object path prefix + * @sender_id: unique-name of client, or NULL + * @external_id: external ID to be chosen by client, or NULL + * @ret_path: storage for encoded object path pointer + * + * Whenever we provide a bus API that allows clients to create and manage + * server-side objects, we need to provide a unique name for these objects. If + * we let the server choose the name, we suffer from a race condition: If a + * client creates an object asynchronously, it cannot destroy that object until + * it received the method reply. It cannot know the name of the new object, + * thus, it cannot destroy it. Furthermore, it enforces a round-trip. + * + * Therefore, many APIs allow the client to choose the unique name for newly + * created objects. There're two problems to solve, though: + * 1) Object names are usually defined via dbus object paths, which are + * usually globally namespaced. Therefore, multiple clients must be able + * to choose unique object names without interference. + * 2) If multiple libraries share the same bus connection, they must be + * able to choose unique object names without interference. + * The first problem is solved easily by prefixing a name with the + * unique-bus-name of a connection. The server side must enforce this and + * reject any other name. The second problem is solved by providing unique + * suffixes from within sd-bus. + * + * This helper allows clients to create unique object-paths. It uses the + * template '/prefix/sender_id/external_id' and returns the new path in + * @ret_path (must be freed by the caller). + * If @sender_id is NULL, the unique-name of @b is used. If @external_id is + * NULL, this function allocates a unique suffix via @b (by requesting a new + * cookie). If both @sender_id and @external_id are given, @b can be passed as + * NULL. + * + * Returns: 0 on success, negative error code on failure. + */ +int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path) { + _cleanup_free_ char *sender_label = NULL, *external_label = NULL; + char external_buf[DECIMAL_STR_MAX(uint64_t)], *p; + int r; + + assert_return(b || (sender_id && external_id), -EINVAL); + assert_return(sd_bus_object_path_is_valid(prefix), -EINVAL); + assert_return(ret_path, -EINVAL); + + if (!sender_id) { + r = sd_bus_get_unique_name(b, &sender_id); + if (r < 0) + return r; + } + + if (!external_id) { + xsprintf(external_buf, "%"PRIu64, ++b->cookie); + external_id = external_buf; + } + + sender_label = bus_label_escape(sender_id); + if (!sender_label) + return -ENOMEM; + + external_label = bus_label_escape(external_id); + if (!external_label) + return -ENOMEM; + + p = path_join(prefix, sender_label, external_label); + if (!p) + return -ENOMEM; + + *ret_path = p; + return 0; +} + +/** + * bus_path_decode_unique() - decode unique object path + * @path: object path to decode + * @prefix: object path prefix + * @ret_sender: output parameter for sender-id label + * @ret_external: output parameter for external-id label + * + * This does the reverse of bus_path_encode_unique() (see its description for + * details). Both trailing labels, sender-id and external-id, are unescaped and + * returned in the given output parameters (the caller must free them). + * + * Note that this function returns 0 if the path does not match the template + * (see bus_path_encode_unique()), 1 if it matched. + * + * Returns: Negative error code on failure, 0 if the given object path does not + * match the template (return parameters are set to NULL), 1 if it was + * parsed successfully (return parameters contain allocated labels). + */ +int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external) { + const char *p, *q; + char *sender, *external; + + assert(sd_bus_object_path_is_valid(path)); + assert(sd_bus_object_path_is_valid(prefix)); + assert(ret_sender); + assert(ret_external); + + p = object_path_startswith(path, prefix); + if (!p) { + *ret_sender = NULL; + *ret_external = NULL; + return 0; + } + + q = strchr(p, '/'); + if (!q) { + *ret_sender = NULL; + *ret_external = NULL; + return 0; + } + + sender = bus_label_unescape_n(p, q - p); + external = bus_label_unescape(q + 1); + if (!sender || !external) { + free(sender); + free(external); + return -ENOMEM; + } + + *ret_sender = sender; + *ret_external = external; + return 1; +} + +int bus_track_add_name_many(sd_bus_track *t, char **l) { + int r = 0; + char **i; + + assert(t); + + /* Continues adding after failure, and returns the first failure. */ + + STRV_FOREACH(i, l) { + int k; + + k = sd_bus_track_add_name(t, *i); + if (k < 0 && r >= 0) + r = k; + } + + return r; +} + +int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description) { + _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL; + const char *e; + int r; + + assert(ret); + + /* Match like sd_bus_open_system(), but with the "watch_bind" feature and the Connected() signal + * turned on. */ + + r = sd_bus_new(&bus); + if (r < 0) + return r; + + if (description) { + r = sd_bus_set_description(bus, description); + if (r < 0) + return r; + } + + e = secure_getenv("DBUS_SYSTEM_BUS_ADDRESS"); + if (!e) + e = DEFAULT_SYSTEM_BUS_ADDRESS; + + r = sd_bus_set_address(bus, e); + if (r < 0) + return r; + + r = sd_bus_set_bus_client(bus, true); + if (r < 0) + return r; + + r = sd_bus_negotiate_creds(bus, true, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS); + if (r < 0) + return r; + + r = sd_bus_set_watch_bind(bus, true); + if (r < 0) + return r; + + r = sd_bus_set_connected_signal(bus, true); + if (r < 0) + return r; + + r = sd_bus_start(bus); + if (r < 0) + return r; + + *ret = TAKE_PTR(bus); + + return 0; +} + +int bus_reply_pair_array(sd_bus_message *m, char **l) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + char **k, **v; + int r; + + assert(m); + + /* Reply to the specified message with a message containing a dictionary put together from the + * specified strv */ + + r = sd_bus_message_new_method_return(m, &reply); + if (r < 0) + return r; + + r = sd_bus_message_open_container(reply, 'a', "{ss}"); + if (r < 0) + return r; + + STRV_FOREACH_PAIR(k, v, l) { + r = sd_bus_message_append(reply, "{ss}", *k, *v); + if (r < 0) + return r; + } + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + return sd_bus_send(NULL, reply, NULL); +} + +static void bus_message_unref_wrapper(void *m) { + sd_bus_message_unref(m); +} + +const struct hash_ops bus_message_hash_ops = { + .hash = trivial_hash_func, + .compare = trivial_compare_func, + .free_value = bus_message_unref_wrapper, +}; diff --git a/src/shared/bus-util.h b/src/shared/bus-util.h new file mode 100644 index 0000000..27dd6c1 --- /dev/null +++ b/src/shared/bus-util.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <sys/types.h> + +#include "sd-bus.h" +#include "sd-event.h" + +#include "errno-util.h" +#include "macro.h" +#include "string-util.h" +#include "time-util.h" + +typedef enum BusTransport { + BUS_TRANSPORT_LOCAL, + BUS_TRANSPORT_REMOTE, + BUS_TRANSPORT_MACHINE, + _BUS_TRANSPORT_MAX, + _BUS_TRANSPORT_INVALID = -1 +} BusTransport; + +int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name); + +typedef bool (*check_idle_t)(void *userdata); + +int bus_event_loop_with_idle(sd_event *e, sd_bus *bus, const char *name, usec_t timeout, check_idle_t check_idle, void *userdata); + +int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error); +bool bus_error_is_unknown_service(const sd_bus_error *error); + +int bus_check_peercred(sd_bus *c); + +int bus_connect_system_systemd(sd_bus **_bus); +int bus_connect_user_systemd(sd_bus **_bus); + +int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **bus); +int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus); + +#define bus_log_address_error(r) \ + ({ \ + int _k = (r); \ + log_error_errno(_k, \ + _k == -ENOMEDIUM ? "Failed to set bus address: $DBUS_SESSION_BUS_ADDRESS and $XDG_RUNTIME_DIR not defined (consider using --machine=<user>@.host --user to connect to bus of other user)" : \ + "Failed to set bus address: %m"); \ + }) + +#define bus_log_connect_error(r) \ + ({ \ + int _k = (r); \ + log_error_errno(_k, \ + _k == -ENOMEDIUM ? "Failed to connect to bus: $DBUS_SESSION_BUS_ADDRESS and $XDG_RUNTIME_DIR not defined (consider using --machine=<user>@.host --user to connect to bus of other user)" : \ + ERRNO_IS_PRIVILEGE(_k) ? "Failed to connect to bus: Operation not permitted (consider using --machine=<user>@.host --user to connect to bus of other user)" : \ + "Failed to connect to bus: %m"); \ + }) + +#define bus_log_parse_error(r) \ + log_error_errno(r, "Failed to parse bus message: %m") + +#define bus_log_create_error(r) \ + log_error_errno(r, "Failed to create bus message: %m") + +int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path); +int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external); + +int bus_track_add_name_many(sd_bus_track *t, char **l); + +int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description); +static inline int bus_open_system_watch_bind(sd_bus **ret) { + return bus_open_system_watch_bind_with_description(ret, NULL); +} + +int bus_reply_pair_array(sd_bus_message *m, char **l); + +extern const struct hash_ops bus_message_hash_ops; diff --git a/src/shared/bus-wait-for-jobs.c b/src/shared/bus-wait-for-jobs.c new file mode 100644 index 0000000..b2a9e03 --- /dev/null +++ b/src/shared/bus-wait-for-jobs.c @@ -0,0 +1,331 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "bus-wait-for-jobs.h" +#include "set.h" +#include "bus-util.h" +#include "bus-internal.h" +#include "unit-def.h" +#include "escape.h" +#include "strv.h" + +typedef struct BusWaitForJobs { + sd_bus *bus; + + /* The set of jobs to wait for, as bus object paths */ + Set *jobs; + + /* The unit name and job result of the last Job message */ + char *name; + char *result; + + sd_bus_slot *slot_job_removed; + sd_bus_slot *slot_disconnected; +} BusWaitForJobs; + +static int match_disconnected(sd_bus_message *m, void *userdata, sd_bus_error *error) { + assert(m); + + log_error("Warning! D-Bus connection terminated."); + sd_bus_close(sd_bus_message_get_bus(m)); + + return 0; +} + +static int match_job_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) { + const char *path, *unit, *result; + BusWaitForJobs *d = userdata; + uint32_t id; + char *found; + int r; + + assert(m); + assert(d); + + r = sd_bus_message_read(m, "uoss", &id, &path, &unit, &result); + if (r < 0) { + bus_log_parse_error(r); + return 0; + } + + found = set_remove(d->jobs, (char*) path); + if (!found) + return 0; + + free(found); + + (void) free_and_strdup(&d->result, empty_to_null(result)); + + (void) free_and_strdup(&d->name, empty_to_null(unit)); + + return 0; +} + +void bus_wait_for_jobs_free(BusWaitForJobs *d) { + if (!d) + return; + + set_free(d->jobs); + + sd_bus_slot_unref(d->slot_disconnected); + sd_bus_slot_unref(d->slot_job_removed); + + sd_bus_unref(d->bus); + + free(d->name); + free(d->result); + + free(d); +} + +int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret) { + _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *d = NULL; + int r; + + assert(bus); + assert(ret); + + d = new(BusWaitForJobs, 1); + if (!d) + return -ENOMEM; + + *d = (BusWaitForJobs) { + .bus = sd_bus_ref(bus), + }; + + /* When we are a bus client we match by sender. Direct + * connections OTOH have no initialized sender field, and + * hence we ignore the sender then */ + r = sd_bus_match_signal_async( + bus, + &d->slot_job_removed, + bus->bus_client ? "org.freedesktop.systemd1" : NULL, + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "JobRemoved", + match_job_removed, NULL, d); + if (r < 0) + return r; + + r = sd_bus_match_signal_async( + bus, + &d->slot_disconnected, + "org.freedesktop.DBus.Local", + NULL, + "org.freedesktop.DBus.Local", + "Disconnected", + match_disconnected, NULL, d); + if (r < 0) + return r; + + *ret = TAKE_PTR(d); + + return 0; +} + +static int bus_process_wait(sd_bus *bus) { + int r; + + for (;;) { + r = sd_bus_process(bus, NULL); + if (r < 0) + return r; + if (r > 0) + return 0; + + r = sd_bus_wait(bus, (uint64_t) -1); + if (r < 0) + return r; + } +} + +static int bus_job_get_service_result(BusWaitForJobs *d, char **result) { + _cleanup_free_ char *dbus_path = NULL; + + assert(d); + assert(d->name); + assert(result); + + if (!endswith(d->name, ".service")) + return -EINVAL; + + dbus_path = unit_dbus_path_from_name(d->name); + if (!dbus_path) + return -ENOMEM; + + return sd_bus_get_property_string(d->bus, + "org.freedesktop.systemd1", + dbus_path, + "org.freedesktop.systemd1.Service", + "Result", + NULL, + result); +} + +static void log_job_error_with_service_result(const char* service, const char *result, const char* const* extra_args) { + _cleanup_free_ char *service_shell_quoted = NULL; + const char *systemctl = "systemctl", *journalctl = "journalctl"; + + static const struct { + const char *result, *explanation; + } explanations[] = { + { "resources", "of unavailable resources or another system error" }, + { "protocol", "the service did not take the steps required by its unit configuration" }, + { "timeout", "a timeout was exceeded" }, + { "exit-code", "the control process exited with error code" }, + { "signal", "a fatal signal was delivered to the control process" }, + { "core-dump", "a fatal signal was delivered causing the control process to dump core" }, + { "watchdog", "the service failed to send watchdog ping" }, + { "start-limit", "start of the service was attempted too often" } + }; + + assert(service); + + service_shell_quoted = shell_maybe_quote(service, ESCAPE_BACKSLASH); + + if (!strv_isempty((char**) extra_args)) { + _cleanup_free_ char *t; + + t = strv_join((char**) extra_args, " "); + systemctl = strjoina("systemctl ", t ? : "<args>"); + journalctl = strjoina("journalctl ", t ? : "<args>"); + } + + if (!isempty(result)) { + size_t i; + + for (i = 0; i < ELEMENTSOF(explanations); ++i) + if (streq(result, explanations[i].result)) + break; + + if (i < ELEMENTSOF(explanations)) { + log_error("Job for %s failed because %s.\n" + "See \"%s status %s\" and \"%s -xe\" for details.\n", + service, + explanations[i].explanation, + systemctl, + service_shell_quoted ?: "<service>", + journalctl); + goto finish; + } + } + + log_error("Job for %s failed.\n" + "See \"%s status %s\" and \"%s -xe\" for details.\n", + service, + systemctl, + service_shell_quoted ?: "<service>", + journalctl); + +finish: + /* For some results maybe additional explanation is required */ + if (streq_ptr(result, "start-limit")) + log_info("To force a start use \"%1$s reset-failed %2$s\"\n" + "followed by \"%1$s start %2$s\" again.", + systemctl, + service_shell_quoted ?: "<service>"); +} + +static int check_wait_response(BusWaitForJobs *d, bool quiet, const char* const* extra_args) { + assert(d); + assert(d->name); + assert(d->result); + + if (!quiet) { + if (streq(d->result, "canceled")) + log_error("Job for %s canceled.", strna(d->name)); + else if (streq(d->result, "timeout")) + log_error("Job for %s timed out.", strna(d->name)); + else if (streq(d->result, "dependency")) + log_error("A dependency job for %s failed. See 'journalctl -xe' for details.", strna(d->name)); + else if (streq(d->result, "invalid")) + log_error("%s is not active, cannot reload.", strna(d->name)); + else if (streq(d->result, "assert")) + log_error("Assertion failed on job for %s.", strna(d->name)); + else if (streq(d->result, "unsupported")) + log_error("Operation on or unit type of %s not supported on this system.", strna(d->name)); + else if (streq(d->result, "collected")) + log_error("Queued job for %s was garbage collected.", strna(d->name)); + else if (streq(d->result, "once")) + log_error("Unit %s was started already once and can't be started again.", strna(d->name)); + else if (!STR_IN_SET(d->result, "done", "skipped")) { + + if (d->name && endswith(d->name, ".service")) { + _cleanup_free_ char *result = NULL; + int q; + + q = bus_job_get_service_result(d, &result); + if (q < 0) + log_debug_errno(q, "Failed to get Result property of unit %s: %m", d->name); + + log_job_error_with_service_result(d->name, result, extra_args); + } else + log_error("Job failed. See \"journalctl -xe\" for details."); + } + } + + if (STR_IN_SET(d->result, "canceled", "collected")) + return -ECANCELED; + else if (streq(d->result, "timeout")) + return -ETIME; + else if (streq(d->result, "dependency")) + return -EIO; + else if (streq(d->result, "invalid")) + return -ENOEXEC; + else if (streq(d->result, "assert")) + return -EPROTO; + else if (streq(d->result, "unsupported")) + return -EOPNOTSUPP; + else if (streq(d->result, "once")) + return -ESTALE; + else if (STR_IN_SET(d->result, "done", "skipped")) + return 0; + + return log_debug_errno(SYNTHETIC_ERRNO(EIO), + "Unexpected job result, assuming server side newer than us: %s", d->result); +} + +int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args) { + int r = 0; + + assert(d); + + while (!set_isempty(d->jobs)) { + int q; + + q = bus_process_wait(d->bus); + if (q < 0) + return log_error_errno(q, "Failed to wait for response: %m"); + + if (d->name && d->result) { + q = check_wait_response(d, quiet, extra_args); + /* Return the first error as it is most likely to be + * meaningful. */ + if (q < 0 && r == 0) + r = q; + + log_debug_errno(q, "Got result %s/%m for job %s", d->result, d->name); + } + + d->name = mfree(d->name); + d->result = mfree(d->result); + } + + return r; +} + +int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path) { + assert(d); + + return set_put_strdup(&d->jobs, path); +} + +int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet) { + int r; + + r = bus_wait_for_jobs_add(d, path); + if (r < 0) + return log_oom(); + + return bus_wait_for_jobs(d, quiet, NULL); +} diff --git a/src/shared/bus-wait-for-jobs.h b/src/shared/bus-wait-for-jobs.h new file mode 100644 index 0000000..0155887 --- /dev/null +++ b/src/shared/bus-wait-for-jobs.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-bus.h" + +#include "macro.h" + +typedef struct BusWaitForJobs BusWaitForJobs; + +int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret); +void bus_wait_for_jobs_free(BusWaitForJobs *d); +int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path); +int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args); +int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet); + +DEFINE_TRIVIAL_CLEANUP_FUNC(BusWaitForJobs*, bus_wait_for_jobs_free); diff --git a/src/shared/bus-wait-for-units.c b/src/shared/bus-wait-for-units.c new file mode 100644 index 0000000..4f1c505 --- /dev/null +++ b/src/shared/bus-wait-for-units.c @@ -0,0 +1,429 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "bus-map-properties.h" +#include "bus-wait-for-units.h" +#include "hashmap.h" +#include "string-util.h" +#include "strv.h" +#include "unit-def.h" + +typedef struct WaitForItem { + BusWaitForUnits *parent; + + BusWaitForUnitsFlags flags; + + char *bus_path; + + sd_bus_slot *slot_get_all; + sd_bus_slot *slot_properties_changed; + + bus_wait_for_units_unit_callback unit_callback; + void *userdata; + + char *active_state; + uint32_t job_id; + char *clean_result; +} WaitForItem; + +typedef struct BusWaitForUnits { + sd_bus *bus; + sd_bus_slot *slot_disconnected; + + Hashmap *items; + + bus_wait_for_units_ready_callback ready_callback; + void *userdata; + + WaitForItem *current; + + BusWaitForUnitsState state; + bool has_failed:1; +} BusWaitForUnits; + +static WaitForItem *wait_for_item_free(WaitForItem *item) { + int r; + + if (!item) + return NULL; + + if (item->parent) { + if (FLAGS_SET(item->flags, BUS_WAIT_REFFED) && item->bus_path && item->parent->bus) { + r = sd_bus_call_method_async( + item->parent->bus, + NULL, + "org.freedesktop.systemd1", + item->bus_path, + "org.freedesktop.systemd1.Unit", + "Unref", + NULL, + NULL, + NULL); + if (r < 0) + log_debug_errno(r, "Failed to drop reference to unit %s, ignoring: %m", item->bus_path); + } + + assert_se(hashmap_remove(item->parent->items, item->bus_path) == item); + + if (item->parent->current == item) + item->parent->current = NULL; + } + + sd_bus_slot_unref(item->slot_properties_changed); + sd_bus_slot_unref(item->slot_get_all); + + free(item->bus_path); + free(item->active_state); + free(item->clean_result); + + return mfree(item); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(WaitForItem*, wait_for_item_free); + +static void call_unit_callback_and_wait(BusWaitForUnits *d, WaitForItem *item, bool good) { + d->current = item; + + if (item->unit_callback) + item->unit_callback(d, item->bus_path, good, item->userdata); + + wait_for_item_free(item); +} + +static void bus_wait_for_units_clear(BusWaitForUnits *d) { + WaitForItem *item; + + assert(d); + + d->slot_disconnected = sd_bus_slot_unref(d->slot_disconnected); + d->bus = sd_bus_unref(d->bus); + + while ((item = hashmap_first(d->items))) + call_unit_callback_and_wait(d, item, false); + + d->items = hashmap_free(d->items); +} + +static int match_disconnected(sd_bus_message *m, void *userdata, sd_bus_error *error) { + BusWaitForUnits *d = userdata; + + assert(m); + assert(d); + + log_error("Warning! D-Bus connection terminated."); + + bus_wait_for_units_clear(d); + + if (d->ready_callback) + d->ready_callback(d, false, d->userdata); + else /* If no ready callback is specified close the connection so that the event loop exits */ + sd_bus_close(sd_bus_message_get_bus(m)); + + return 0; +} + +int bus_wait_for_units_new(sd_bus *bus, BusWaitForUnits **ret) { + _cleanup_(bus_wait_for_units_freep) BusWaitForUnits *d = NULL; + int r; + + assert(bus); + assert(ret); + + d = new(BusWaitForUnits, 1); + if (!d) + return -ENOMEM; + + *d = (BusWaitForUnits) { + .state = BUS_WAIT_SUCCESS, + .bus = sd_bus_ref(bus), + }; + + r = sd_bus_match_signal_async( + bus, + &d->slot_disconnected, + "org.freedesktop.DBus.Local", + NULL, + "org.freedesktop.DBus.Local", + "Disconnected", + match_disconnected, NULL, d); + if (r < 0) + return r; + + *ret = TAKE_PTR(d); + return 0; +} + +BusWaitForUnits* bus_wait_for_units_free(BusWaitForUnits *d) { + if (!d) + return NULL; + + bus_wait_for_units_clear(d); + sd_bus_slot_unref(d->slot_disconnected); + sd_bus_unref(d->bus); + + return mfree(d); +} + +static bool bus_wait_for_units_is_ready(BusWaitForUnits *d) { + assert(d); + + if (!d->bus) /* Disconnected? */ + return true; + + return hashmap_isempty(d->items); +} + +void bus_wait_for_units_set_ready_callback(BusWaitForUnits *d, bus_wait_for_units_ready_callback callback, void *userdata) { + assert(d); + + d->ready_callback = callback; + d->userdata = userdata; +} + +static void bus_wait_for_units_check_ready(BusWaitForUnits *d) { + assert(d); + + if (!bus_wait_for_units_is_ready(d)) + return; + + d->state = d->has_failed ? BUS_WAIT_FAILURE : BUS_WAIT_SUCCESS; + + if (d->ready_callback) + d->ready_callback(d, d->state, d->userdata); +} + +static void wait_for_item_check_ready(WaitForItem *item) { + BusWaitForUnits *d; + + assert(item); + assert_se(d = item->parent); + + if (FLAGS_SET(item->flags, BUS_WAIT_FOR_MAINTENANCE_END)) { + + if (item->clean_result && !streq(item->clean_result, "success")) + d->has_failed = true; + + if (!item->active_state || streq(item->active_state, "maintenance")) + return; + } + + if (FLAGS_SET(item->flags, BUS_WAIT_NO_JOB) && item->job_id != 0) + return; + + if (FLAGS_SET(item->flags, BUS_WAIT_FOR_INACTIVE)) { + + if (streq_ptr(item->active_state, "failed")) + d->has_failed = true; + else if (!streq_ptr(item->active_state, "inactive")) + return; + } + + call_unit_callback_and_wait(d, item, true); + bus_wait_for_units_check_ready(d); +} + +static int property_map_job( + sd_bus *bus, + const char *member, + sd_bus_message *m, + sd_bus_error *error, + void *userdata) { + + WaitForItem *item = userdata; + const char *path; + uint32_t id; + int r; + + assert(item); + + r = sd_bus_message_read(m, "(uo)", &id, &path); + if (r < 0) + return r; + + item->job_id = id; + return 0; +} + +static int wait_for_item_parse_properties(WaitForItem *item, sd_bus_message *m) { + + static const struct bus_properties_map map[] = { + { "ActiveState", "s", NULL, offsetof(WaitForItem, active_state) }, + { "Job", "(uo)", property_map_job, 0 }, + { "CleanResult", "s", NULL, offsetof(WaitForItem, clean_result) }, + {} + }; + + int r; + + assert(item); + assert(m); + + r = bus_message_map_all_properties(m, map, BUS_MAP_STRDUP, NULL, item); + if (r < 0) + return r; + + wait_for_item_check_ready(item); + return 0; +} + +static int on_properties_changed(sd_bus_message *m, void *userdata, sd_bus_error *error) { + WaitForItem *item = userdata; + const char *interface; + int r; + + assert(item); + + r = sd_bus_message_read(m, "s", &interface); + if (r < 0) { + log_debug_errno(r, "Failed to parse PropertiesChanged signal: %m"); + return 0; + } + + if (!streq(interface, "org.freedesktop.systemd1.Unit")) + return 0; + + r = wait_for_item_parse_properties(item, m); + if (r < 0) + log_debug_errno(r, "Failed to process PropertiesChanged signal: %m"); + + return 0; +} + +static int on_get_all_properties(sd_bus_message *m, void *userdata, sd_bus_error *error) { + WaitForItem *item = userdata; + int r; + + assert(item); + + if (sd_bus_error_is_set(error)) { + BusWaitForUnits *d = item->parent; + + d->has_failed = true; + + log_debug_errno(sd_bus_error_get_errno(error), "GetAll() failed for %s: %s", + item->bus_path, error->message); + + call_unit_callback_and_wait(d, item, false); + bus_wait_for_units_check_ready(d); + return 0; + } + + r = wait_for_item_parse_properties(item, m); + if (r < 0) + log_debug_errno(r, "Failed to process GetAll method reply: %m"); + + return 0; +} + +int bus_wait_for_units_add_unit( + BusWaitForUnits *d, + const char *unit, + BusWaitForUnitsFlags flags, + bus_wait_for_units_unit_callback callback, + void *userdata) { + + _cleanup_(wait_for_item_freep) WaitForItem *item = NULL; + int r; + + assert(d); + assert(unit); + + assert(flags != 0); + + r = hashmap_ensure_allocated(&d->items, &string_hash_ops); + if (r < 0) + return r; + + item = new(WaitForItem, 1); + if (!item) + return -ENOMEM; + + *item = (WaitForItem) { + .flags = flags, + .bus_path = unit_dbus_path_from_name(unit), + .unit_callback = callback, + .userdata = userdata, + .job_id = UINT32_MAX, + }; + + if (!item->bus_path) + return -ENOMEM; + + if (!FLAGS_SET(item->flags, BUS_WAIT_REFFED)) { + r = sd_bus_call_method_async( + d->bus, + NULL, + "org.freedesktop.systemd1", + item->bus_path, + "org.freedesktop.systemd1.Unit", + "Ref", + NULL, + NULL, + NULL); + if (r < 0) + return log_debug_errno(r, "Failed to add reference to unit %s: %m", unit); + + item->flags |= BUS_WAIT_REFFED; + } + + r = sd_bus_match_signal_async( + d->bus, + &item->slot_properties_changed, + "org.freedesktop.systemd1", + item->bus_path, + "org.freedesktop.DBus.Properties", + "PropertiesChanged", + on_properties_changed, + NULL, + item); + if (r < 0) + return log_debug_errno(r, "Failed to request match for PropertiesChanged signal: %m"); + + r = sd_bus_call_method_async( + d->bus, + &item->slot_get_all, + "org.freedesktop.systemd1", + item->bus_path, + "org.freedesktop.DBus.Properties", + "GetAll", + on_get_all_properties, + item, + "s", FLAGS_SET(item->flags, BUS_WAIT_FOR_MAINTENANCE_END) ? NULL : "org.freedesktop.systemd1.Unit"); + if (r < 0) + return log_debug_errno(r, "Failed to request properties of unit %s: %m", unit); + + r = hashmap_put(d->items, item->bus_path, item); + if (r < 0) + return r; + + d->state = BUS_WAIT_RUNNING; + item->parent = d; + TAKE_PTR(item); + return 0; +} + +int bus_wait_for_units_run(BusWaitForUnits *d) { + int r; + + assert(d); + + while (d->state == BUS_WAIT_RUNNING) { + + r = sd_bus_process(d->bus, NULL); + if (r < 0) + return r; + if (r > 0) + continue; + + r = sd_bus_wait(d->bus, (uint64_t) -1); + if (r < 0) + return r; + } + + return d->state; +} + +BusWaitForUnitsState bus_wait_for_units_state(BusWaitForUnits *d) { + assert(d); + + return d->state; +} diff --git a/src/shared/bus-wait-for-units.h b/src/shared/bus-wait-for-units.h new file mode 100644 index 0000000..f7ab666 --- /dev/null +++ b/src/shared/bus-wait-for-units.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" +#include "sd-bus.h" + +typedef struct BusWaitForUnits BusWaitForUnits; + +typedef enum BusWaitForUnitsState { + BUS_WAIT_SUCCESS, /* Nothing to wait for anymore and nothing failed */ + BUS_WAIT_FAILURE, /* dito, but something failed */ + BUS_WAIT_RUNNING, /* Still something to wait for */ + _BUS_WAIT_FOR_UNITS_STATE_MAX, + _BUS_WAIT_FOR_UNITS_STATE_INVALID = -1, +} BusWaitForUnitsState; + +typedef enum BusWaitForUnitsFlags { + BUS_WAIT_FOR_MAINTENANCE_END = 1 << 0, /* Wait until the unit is no longer in maintenance state */ + BUS_WAIT_FOR_INACTIVE = 1 << 1, /* Wait until the unit is back in inactive or dead state */ + BUS_WAIT_NO_JOB = 1 << 2, /* Wait until there's no more job pending */ + BUS_WAIT_REFFED = 1 << 3, /* The unit is already reffed with RefUnit() */ +} BusWaitForUnitsFlags; + +typedef void (*bus_wait_for_units_ready_callback)(BusWaitForUnits *d, BusWaitForUnitsState state, void *userdata); +typedef void (*bus_wait_for_units_unit_callback)(BusWaitForUnits *d, const char *unit_path, bool good, void *userdata); + +int bus_wait_for_units_new(sd_bus *bus, BusWaitForUnits **ret); +BusWaitForUnits* bus_wait_for_units_free(BusWaitForUnits *d); + +BusWaitForUnitsState bus_wait_for_units_state(BusWaitForUnits *d); +void bus_wait_for_units_set_ready_callback(BusWaitForUnits *d, bus_wait_for_units_ready_callback callback, void *userdata); +int bus_wait_for_units_add_unit(BusWaitForUnits *d, const char *unit, BusWaitForUnitsFlags flags, bus_wait_for_units_unit_callback callback, void *userdata); +int bus_wait_for_units_run(BusWaitForUnits *d); + +DEFINE_TRIVIAL_CLEANUP_FUNC(BusWaitForUnits*, bus_wait_for_units_free); diff --git a/src/shared/calendarspec.c b/src/shared/calendarspec.c new file mode 100644 index 0000000..7162592 --- /dev/null +++ b/src/shared/calendarspec.c @@ -0,0 +1,1405 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include "alloc-util.h" +#include "calendarspec.h" +#include "errno-util.h" +#include "fileio.h" +#include "macro.h" +#include "parse-util.h" +#include "process-util.h" +#include "sort-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" + +#define BITS_WEEKDAYS 127 +#define MIN_YEAR 1970 +#define MAX_YEAR 2199 + +/* An arbitrary limit on the length of the chains of components. We don't want to + * build a very long linked list, which would be slow to iterate over and might cause + * our stack to overflow. It's unlikely that legitimate uses require more than a few + * linked compenents anyway. */ +#define CALENDARSPEC_COMPONENTS_MAX 240 + +/* Let's make sure that the microsecond component is safe to be stored in an 'int' */ +assert_cc(INT_MAX >= USEC_PER_SEC); + +static void chain_free(CalendarComponent *c) { + CalendarComponent *n; + + while (c) { + n = c->next; + free(c); + c = n; + } +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(CalendarComponent*, chain_free); + +CalendarSpec* calendar_spec_free(CalendarSpec *c) { + + if (!c) + return NULL; + + chain_free(c->year); + chain_free(c->month); + chain_free(c->day); + chain_free(c->hour); + chain_free(c->minute); + chain_free(c->microsecond); + free(c->timezone); + + return mfree(c); +} + +static int component_compare(CalendarComponent * const *a, CalendarComponent * const *b) { + int r; + + r = CMP((*a)->start, (*b)->start); + if (r != 0) + return r; + + r = CMP((*a)->stop, (*b)->stop); + if (r != 0) + return r; + + return CMP((*a)->repeat, (*b)->repeat); +} + +static void normalize_chain(CalendarComponent **c) { + CalendarComponent **b, *i, **j, *next; + size_t n = 0, k; + + assert(c); + + for (i = *c; i; i = i->next) { + n++; + + /* + * While we're counting the chain, also normalize `stop` + * so the length of the range is a multiple of `repeat` + */ + if (i->stop > i->start && i->repeat > 0) + i->stop -= (i->stop - i->start) % i->repeat; + + /* If a repeat value is specified, but it cannot even be triggered once, let's suppress + * it. + * + * Similar, if the stop value is the same as the start value, then let's just make this a + * non-repeating chain element */ + if ((i->stop > i->start && i->repeat > 0 && i->start + i->repeat > i->stop) || + i->start == i->stop) { + i->repeat = 0; + i->stop = -1; + } + } + + if (n <= 1) + return; + + j = b = newa(CalendarComponent*, n); + for (i = *c; i; i = i->next) + *(j++) = i; + + typesafe_qsort(b, n, component_compare); + + b[n-1]->next = NULL; + next = b[n-1]; + + /* Drop non-unique entries */ + for (k = n-1; k > 0; k--) { + if (component_compare(&b[k-1], &next) == 0) { + free(b[k-1]); + continue; + } + + b[k-1]->next = next; + next = b[k-1]; + } + + *c = next; +} + +static void fix_year(CalendarComponent *c) { + /* Turns 12 → 2012, 89 → 1989 */ + + while (c) { + if (c->start >= 0 && c->start < 70) + c->start += 2000; + + if (c->stop >= 0 && c->stop < 70) + c->stop += 2000; + + if (c->start >= 70 && c->start < 100) + c->start += 1900; + + if (c->stop >= 70 && c->stop < 100) + c->stop += 1900; + + c = c->next; + } +} + +int calendar_spec_normalize(CalendarSpec *c) { + assert(c); + + if (streq_ptr(c->timezone, "UTC")) { + c->utc = true; + c->timezone = mfree(c->timezone); + } + + if (c->weekdays_bits <= 0 || c->weekdays_bits >= BITS_WEEKDAYS) + c->weekdays_bits = -1; + + if (c->end_of_month && !c->day) + c->end_of_month = false; + + fix_year(c->year); + + normalize_chain(&c->year); + normalize_chain(&c->month); + normalize_chain(&c->day); + normalize_chain(&c->hour); + normalize_chain(&c->minute); + normalize_chain(&c->microsecond); + + return 0; +} + +static bool chain_valid(CalendarComponent *c, int from, int to, bool end_of_month) { + assert(to >= from); + + if (!c) + return true; + + /* Forbid dates more than 28 days from the end of the month */ + if (end_of_month) + to -= 3; + + if (c->start < from || c->start > to) + return false; + + /* Avoid overly large values that could cause overflow */ + if (c->repeat > to - from) + return false; + + /* + * c->repeat must be short enough so at least one repetition may + * occur before the end of the interval. For dates scheduled + * relative to the end of the month, c->start and c->stop + * correspond to the Nth last day of the month. + */ + if (c->stop >= 0) { + if (c->stop < from || c ->stop > to) + return false; + + if (c->start + c->repeat > c->stop) + return false; + } else { + if (end_of_month && c->start - c->repeat < from) + return false; + + if (!end_of_month && c->start + c->repeat > to) + return false; + } + + if (c->next) + return chain_valid(c->next, from, to, end_of_month); + + return true; +} + +_pure_ bool calendar_spec_valid(CalendarSpec *c) { + assert(c); + + if (c->weekdays_bits > BITS_WEEKDAYS) + return false; + + if (!chain_valid(c->year, MIN_YEAR, MAX_YEAR, false)) + return false; + + if (!chain_valid(c->month, 1, 12, false)) + return false; + + if (!chain_valid(c->day, 1, 31, c->end_of_month)) + return false; + + if (!chain_valid(c->hour, 0, 23, false)) + return false; + + if (!chain_valid(c->minute, 0, 59, false)) + return false; + + if (!chain_valid(c->microsecond, 0, 60*USEC_PER_SEC-1, false)) + return false; + + return true; +} + +static void format_weekdays(FILE *f, const CalendarSpec *c) { + static const char *const days[] = { + "Mon", + "Tue", + "Wed", + "Thu", + "Fri", + "Sat", + "Sun" + }; + + int l, x; + bool need_comma = false; + + assert(f); + assert(c); + assert(c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS); + + for (x = 0, l = -1; x < (int) ELEMENTSOF(days); x++) { + + if (c->weekdays_bits & (1 << x)) { + + if (l < 0) { + if (need_comma) + fputc(',', f); + else + need_comma = true; + + fputs(days[x], f); + l = x; + } + + } else if (l >= 0) { + + if (x > l + 1) { + fputs(x > l + 2 ? ".." : ",", f); + fputs(days[x-1], f); + } + + l = -1; + } + } + + if (l >= 0 && x > l + 1) { + fputs(x > l + 2 ? ".." : ",", f); + fputs(days[x-1], f); + } +} + +static void format_chain(FILE *f, int space, const CalendarComponent *c, bool usec) { + int d = usec ? (int) USEC_PER_SEC : 1; + + assert(f); + + if (!c) { + fputc('*', f); + return; + } + + if (usec && c->start == 0 && c->repeat == USEC_PER_SEC && !c->next) { + fputc('*', f); + return; + } + + assert(c->start >= 0); + + fprintf(f, "%0*i", space, c->start / d); + if (c->start % d > 0) + fprintf(f, ".%06i", c->start % d); + + if (c->stop > 0) + fprintf(f, "..%0*i", space, c->stop / d); + if (c->stop % d > 0) + fprintf(f, ".%06i", c->stop % d); + + if (c->repeat > 0 && !(c->stop > 0 && c->repeat == d)) + fprintf(f, "/%i", c->repeat / d); + if (c->repeat % d > 0) + fprintf(f, ".%06i", c->repeat % d); + + if (c->next) { + fputc(',', f); + format_chain(f, space, c->next, usec); + } +} + +int calendar_spec_to_string(const CalendarSpec *c, char **p) { + char *buf = NULL; + size_t sz = 0; + FILE *f; + int r; + + assert(c); + assert(p); + + f = open_memstream_unlocked(&buf, &sz); + if (!f) + return -ENOMEM; + + if (c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS) { + format_weekdays(f, c); + fputc(' ', f); + } + + format_chain(f, 4, c->year, false); + fputc('-', f); + format_chain(f, 2, c->month, false); + fputc(c->end_of_month ? '~' : '-', f); + format_chain(f, 2, c->day, false); + fputc(' ', f); + format_chain(f, 2, c->hour, false); + fputc(':', f); + format_chain(f, 2, c->minute, false); + fputc(':', f); + format_chain(f, 2, c->microsecond, true); + + if (c->utc) + fputs(" UTC", f); + else if (c->timezone) { + fputc(' ', f); + fputs(c->timezone, f); + } else if (IN_SET(c->dst, 0, 1)) { + + /* If daylight saving is explicitly on or off, let's show the used timezone. */ + + tzset(); + + if (!isempty(tzname[c->dst])) { + fputc(' ', f); + fputs(tzname[c->dst], f); + } + } + + r = fflush_and_check(f); + fclose(f); + + if (r < 0) { + free(buf); + return r; + } + + *p = buf; + return 0; +} + +static int parse_weekdays(const char **p, CalendarSpec *c) { + static const struct { + const char *name; + const int nr; + } day_nr[] = { + { "Monday", 0 }, + { "Mon", 0 }, + { "Tuesday", 1 }, + { "Tue", 1 }, + { "Wednesday", 2 }, + { "Wed", 2 }, + { "Thursday", 3 }, + { "Thu", 3 }, + { "Friday", 4 }, + { "Fri", 4 }, + { "Saturday", 5 }, + { "Sat", 5 }, + { "Sunday", 6 }, + { "Sun", 6 } + }; + + int l = -1; + bool first = true; + + assert(p); + assert(*p); + assert(c); + + for (;;) { + size_t i; + + for (i = 0; i < ELEMENTSOF(day_nr); i++) { + size_t skip; + + if (!startswith_no_case(*p, day_nr[i].name)) + continue; + + skip = strlen(day_nr[i].name); + + if (!IN_SET((*p)[skip], 0, '-', '.', ',', ' ')) + return -EINVAL; + + c->weekdays_bits |= 1 << day_nr[i].nr; + + if (l >= 0) { + int j; + + if (l > day_nr[i].nr) + return -EINVAL; + + for (j = l + 1; j < day_nr[i].nr; j++) + c->weekdays_bits |= 1 << j; + } + + *p += skip; + break; + } + + /* Couldn't find this prefix, so let's assume the + weekday was not specified and let's continue with + the date */ + if (i >= ELEMENTSOF(day_nr)) + return first ? 0 : -EINVAL; + + /* We reached the end of the string */ + if (**p == 0) + return 0; + + /* We reached the end of the weekday spec part */ + if (**p == ' ') { + *p += strspn(*p, " "); + return 0; + } + + if (**p == '.') { + if (l >= 0) + return -EINVAL; + + if ((*p)[1] != '.') + return -EINVAL; + + l = day_nr[i].nr; + *p += 2; + + /* Support ranges with "-" for backwards compatibility */ + } else if (**p == '-') { + if (l >= 0) + return -EINVAL; + + l = day_nr[i].nr; + *p += 1; + + } else if (**p == ',') { + l = -1; + *p += 1; + } + + /* Allow a trailing comma but not an open range */ + if (IN_SET(**p, 0, ' ')) { + *p += strspn(*p, " "); + return l < 0 ? 0 : -EINVAL; + } + + first = false; + } +} + +static int parse_one_number(const char *p, const char **e, unsigned long *ret) { + char *ee = NULL; + unsigned long value; + + errno = 0; + value = strtoul(p, &ee, 10); + if (errno > 0) + return -errno; + if (ee == p) + return -EINVAL; + + *ret = value; + *e = ee; + return 0; +} + +static int parse_component_decimal(const char **p, bool usec, int *res) { + unsigned long value; + const char *e = NULL; + int r; + + if (!isdigit(**p)) + return -EINVAL; + + r = parse_one_number(*p, &e, &value); + if (r < 0) + return r; + + if (usec) { + if (value * USEC_PER_SEC / USEC_PER_SEC != value) + return -ERANGE; + + value *= USEC_PER_SEC; + + /* One "." is a decimal point, but ".." is a range separator */ + if (e[0] == '.' && e[1] != '.') { + unsigned add; + + e++; + r = parse_fractional_part_u(&e, 6, &add); + if (r < 0) + return r; + + if (add + value < value) + return -ERANGE; + value += add; + } + } + + if (value > INT_MAX) + return -ERANGE; + + *p = e; + *res = value; + + return 0; +} + +static int const_chain(int value, CalendarComponent **c) { + CalendarComponent *cc = NULL; + + assert(c); + + cc = new(CalendarComponent, 1); + if (!cc) + return -ENOMEM; + + *cc = (CalendarComponent) { + .start = value, + .stop = -1, + .repeat = 0, + .next = *c, + }; + + *c = cc; + + return 0; +} + +static int calendarspec_from_time_t(CalendarSpec *c, time_t time) { + _cleanup_(chain_freep) CalendarComponent + *year = NULL, *month = NULL, *day = NULL, + *hour = NULL, *minute = NULL, *us = NULL; + struct tm tm; + int r; + + if (!gmtime_r(&time, &tm)) + return -ERANGE; + + if (tm.tm_year > INT_MAX - 1900) + return -ERANGE; + + r = const_chain(tm.tm_year + 1900, &year); + if (r < 0) + return r; + + r = const_chain(tm.tm_mon + 1, &month); + if (r < 0) + return r; + + r = const_chain(tm.tm_mday, &day); + if (r < 0) + return r; + + r = const_chain(tm.tm_hour, &hour); + if (r < 0) + return r; + + r = const_chain(tm.tm_min, &minute); + if (r < 0) + return r; + + r = const_chain(tm.tm_sec * USEC_PER_SEC, &us); + if (r < 0) + return r; + + c->utc = true; + c->year = TAKE_PTR(year); + c->month = TAKE_PTR(month); + c->day = TAKE_PTR(day); + c->hour = TAKE_PTR(hour); + c->minute = TAKE_PTR(minute); + c->microsecond = TAKE_PTR(us); + return 0; +} + +static int prepend_component(const char **p, bool usec, unsigned nesting, CalendarComponent **c) { + int r, start, stop = -1, repeat = 0; + CalendarComponent *cc; + const char *e = *p; + + assert(p); + assert(c); + + if (nesting > CALENDARSPEC_COMPONENTS_MAX) + return -ENOBUFS; + + r = parse_component_decimal(&e, usec, &start); + if (r < 0) + return r; + + if (e[0] == '.' && e[1] == '.') { + e += 2; + r = parse_component_decimal(&e, usec, &stop); + if (r < 0) + return r; + + repeat = usec ? USEC_PER_SEC : 1; + } + + if (*e == '/') { + e++; + r = parse_component_decimal(&e, usec, &repeat); + if (r < 0) + return r; + + if (repeat == 0) + return -ERANGE; + } else { + /* If no repeat value is specified for the µs component, then let's explicitly refuse ranges + * below 1s because our default repeat granularity is beyond that. */ + + /* Overflow check */ + if (start > INT_MAX - repeat) + return -ERANGE; + + if (usec && stop >= 0 && start + repeat > stop) + return -EINVAL; + } + + if (!IN_SET(*e, 0, ' ', ',', '-', '~', ':')) + return -EINVAL; + + cc = new(CalendarComponent, 1); + if (!cc) + return -ENOMEM; + + *cc = (CalendarComponent) { + .start = start, + .stop = stop, + .repeat = repeat, + .next = *c, + }; + + *p = e; + *c = cc; + + if (*e ==',') { + *p += 1; + return prepend_component(p, usec, nesting + 1, c); + } + + return 0; +} + +static int parse_chain(const char **p, bool usec, CalendarComponent **c) { + _cleanup_(chain_freep) CalendarComponent *cc = NULL; + const char *t; + int r; + + assert(p); + assert(c); + + t = *p; + + if (t[0] == '*') { + if (usec) { + r = const_chain(0, c); + if (r < 0) + return r; + (*c)->repeat = USEC_PER_SEC; + } else + *c = NULL; + + *p = t + 1; + return 0; + } + + r = prepend_component(&t, usec, 0, &cc); + if (r < 0) + return r; + + *p = t; + *c = TAKE_PTR(cc); + return 0; +} + +static int parse_date(const char **p, CalendarSpec *c) { + _cleanup_(chain_freep) CalendarComponent *first = NULL, *second = NULL, *third = NULL; + const char *t; + int r; + + assert(p); + assert(*p); + assert(c); + + t = *p; + + if (*t == 0) + return 0; + + /* @TIMESTAMP — UNIX time in seconds since the epoch */ + if (*t == '@') { + unsigned long value; + time_t time; + + r = parse_one_number(t + 1, &t, &value); + if (r < 0) + return r; + + time = value; + if ((unsigned long) time != value) + return -ERANGE; + + r = calendarspec_from_time_t(c, time); + if (r < 0) + return r; + + *p = t; + return 1; /* finito, don't parse H:M:S after that */ + } + + r = parse_chain(&t, false, &first); + if (r < 0) + return r; + + /* Already the end? A ':' as separator? In that case this was a time, not a date */ + if (IN_SET(*t, 0, ':')) + return 0; + + if (*t == '~') + c->end_of_month = true; + else if (*t != '-') + return -EINVAL; + + t++; + r = parse_chain(&t, false, &second); + if (r < 0) + return r; + + /* Got two parts, hence it's month and day */ + if (IN_SET(*t, 0, ' ')) { + *p = t + strspn(t, " "); + c->month = TAKE_PTR(first); + c->day = TAKE_PTR(second); + return 0; + } else if (c->end_of_month) + return -EINVAL; + + if (*t == '~') + c->end_of_month = true; + else if (*t != '-') + return -EINVAL; + + t++; + r = parse_chain(&t, false, &third); + if (r < 0) + return r; + + if (!IN_SET(*t, 0, ' ')) + return -EINVAL; + + /* Got three parts, hence it is year, month and day */ + *p = t + strspn(t, " "); + c->year = TAKE_PTR(first); + c->month = TAKE_PTR(second); + c->day = TAKE_PTR(third); + return 0; +} + +static int parse_calendar_time(const char **p, CalendarSpec *c) { + _cleanup_(chain_freep) CalendarComponent *h = NULL, *m = NULL, *s = NULL; + const char *t; + int r; + + assert(p); + assert(*p); + assert(c); + + t = *p; + + /* If no time is specified at all, then this means 00:00:00 */ + if (*t == 0) + goto null_hour; + + r = parse_chain(&t, false, &h); + if (r < 0) + return r; + + if (*t != ':') + return -EINVAL; + + t++; + r = parse_chain(&t, false, &m); + if (r < 0) + return r; + + /* Already at the end? Then it's hours and minutes, and seconds are 0 */ + if (*t == 0) + goto null_second; + + if (*t != ':') + return -EINVAL; + + t++; + r = parse_chain(&t, true, &s); + if (r < 0) + return r; + + /* At the end? Then it's hours, minutes and seconds */ + if (*t == 0) + goto finish; + + return -EINVAL; + +null_hour: + r = const_chain(0, &h); + if (r < 0) + return r; + + r = const_chain(0, &m); + if (r < 0) + return r; + +null_second: + r = const_chain(0, &s); + if (r < 0) + return r; + +finish: + *p = t; + c->hour = TAKE_PTR(h); + c->minute = TAKE_PTR(m); + c->microsecond = TAKE_PTR(s); + + return 0; +} + +int calendar_spec_from_string(const char *p, CalendarSpec **spec) { + const char *utc; + _cleanup_(calendar_spec_freep) CalendarSpec *c = NULL; + _cleanup_free_ char *p_tmp = NULL; + int r; + + assert(p); + + c = new(CalendarSpec, 1); + if (!c) + return -ENOMEM; + + *c = (CalendarSpec) { + .dst = -1, + .timezone = NULL, + }; + + utc = endswith_no_case(p, " UTC"); + if (utc) { + c->utc = true; + p = p_tmp = strndup(p, utc - p); + if (!p) + return -ENOMEM; + } else { + const char *e = NULL; + int j; + + tzset(); + + /* Check if the local timezone was specified? */ + for (j = 0; j <= 1; j++) { + if (isempty(tzname[j])) + continue; + + e = endswith_no_case(p, tzname[j]); + if (!e) + continue; + if (e == p) + continue; + if (e[-1] != ' ') + continue; + + break; + } + + /* Found one of the two timezones specified? */ + if (IN_SET(j, 0, 1)) { + p = p_tmp = strndup(p, e - p - 1); + if (!p) + return -ENOMEM; + + c->dst = j; + } else { + const char *last_space; + + last_space = strrchr(p, ' '); + if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG)) { + c->timezone = strdup(last_space + 1); + if (!c->timezone) + return -ENOMEM; + + p = p_tmp = strndup(p, last_space - p); + if (!p) + return -ENOMEM; + } + } + } + + if (isempty(p)) + return -EINVAL; + + if (strcaseeq(p, "minutely")) { + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "hourly")) { + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "daily")) { + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "monthly")) { + r = const_chain(1, &c->day); + if (r < 0) + return r; + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (STRCASE_IN_SET(p, + "annually", + "yearly", + "anually") /* backwards compatibility */ ) { + + r = const_chain(1, &c->month); + if (r < 0) + return r; + r = const_chain(1, &c->day); + if (r < 0) + return r; + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "weekly")) { + + c->weekdays_bits = 1; + + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "quarterly")) { + + r = const_chain(1, &c->month); + if (r < 0) + return r; + r = const_chain(4, &c->month); + if (r < 0) + return r; + r = const_chain(7, &c->month); + if (r < 0) + return r; + r = const_chain(10, &c->month); + if (r < 0) + return r; + r = const_chain(1, &c->day); + if (r < 0) + return r; + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (STRCASE_IN_SET(p, + "biannually", + "bi-annually", + "semiannually", + "semi-annually")) { + + r = const_chain(1, &c->month); + if (r < 0) + return r; + r = const_chain(7, &c->month); + if (r < 0) + return r; + r = const_chain(1, &c->day); + if (r < 0) + return r; + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else { + r = parse_weekdays(&p, c); + if (r < 0) + return r; + + r = parse_date(&p, c); + if (r < 0) + return r; + + if (r == 0) { + r = parse_calendar_time(&p, c); + if (r < 0) + return r; + } + + if (*p != 0) + return -EINVAL; + } + + r = calendar_spec_normalize(c); + if (r < 0) + return r; + + if (!calendar_spec_valid(c)) + return -EINVAL; + + if (spec) + *spec = TAKE_PTR(c); + return 0; +} + +static int find_end_of_month(struct tm *tm, bool utc, int day) { + struct tm t = *tm; + + t.tm_mon++; + t.tm_mday = 1 - day; + + if (mktime_or_timegm(&t, utc) < 0 || + t.tm_mon != tm->tm_mon) + return -1; + + return t.tm_mday; +} + +static int find_matching_component(const CalendarSpec *spec, const CalendarComponent *c, + struct tm *tm, int *val) { + const CalendarComponent *p = c; + int start, stop, d = -1; + bool d_set = false; + int r; + + assert(val); + + if (!c) + return 0; + + while (c) { + start = c->start; + stop = c->stop; + + if (spec->end_of_month && p == spec->day) { + start = find_end_of_month(tm, spec->utc, start); + stop = find_end_of_month(tm, spec->utc, stop); + + if (stop > 0) + SWAP_TWO(start, stop); + } + + if (start >= *val) { + + if (!d_set || start < d) { + d = start; + d_set = true; + } + + } else if (c->repeat > 0) { + int k; + + k = start + c->repeat * DIV_ROUND_UP(*val - start, c->repeat); + + if ((!d_set || k < d) && (stop < 0 || k <= stop)) { + d = k; + d_set = true; + } + } + + c = c->next; + } + + if (!d_set) + return -ENOENT; + + r = *val != d; + *val = d; + return r; +} + +static int tm_within_bounds(struct tm *tm, bool utc) { + struct tm t; + assert(tm); + + /* + * Set an upper bound on the year so impossible dates like "*-02-31" + * don't cause find_next() to loop forever. tm_year contains years + * since 1900, so adjust it accordingly. + */ + if (tm->tm_year + 1900 > MAX_YEAR) + return -ERANGE; + + t = *tm; + if (mktime_or_timegm(&t, utc) < 0) + return negative_errno(); + + /* Did any normalization take place? If so, it was out of bounds before */ + bool good = t.tm_year == tm->tm_year && + t.tm_mon == tm->tm_mon && + t.tm_mday == tm->tm_mday && + t.tm_hour == tm->tm_hour && + t.tm_min == tm->tm_min && + t.tm_sec == tm->tm_sec; + if (!good) + *tm = t; + return good; +} + +static bool matches_weekday(int weekdays_bits, const struct tm *tm, bool utc) { + struct tm t; + int k; + + if (weekdays_bits < 0 || weekdays_bits >= BITS_WEEKDAYS) + return true; + + t = *tm; + if (mktime_or_timegm(&t, utc) < 0) + return false; + + k = t.tm_wday == 0 ? 6 : t.tm_wday - 1; + return (weekdays_bits & (1 << k)); +} + +static int find_next(const CalendarSpec *spec, struct tm *tm, usec_t *usec) { + struct tm c; + int tm_usec; + int r; + + /* Returns -ENOENT if the expression is not going to elapse anymore */ + + assert(spec); + assert(tm); + + c = *tm; + tm_usec = *usec; + + for (;;) { + /* Normalize the current date */ + (void) mktime_or_timegm(&c, spec->utc); + c.tm_isdst = spec->dst; + + c.tm_year += 1900; + r = find_matching_component(spec, spec->year, &c, &c.tm_year); + c.tm_year -= 1900; + + if (r > 0) { + c.tm_mon = 0; + c.tm_mday = 1; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + } + if (r < 0) + return r; + if (tm_within_bounds(&c, spec->utc) <= 0) + return -ENOENT; + + c.tm_mon += 1; + r = find_matching_component(spec, spec->month, &c, &c.tm_mon); + c.tm_mon -= 1; + + if (r > 0) { + c.tm_mday = 1; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + } + if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) { + c.tm_year++; + c.tm_mon = 0; + c.tm_mday = 1; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + continue; + } + if (r == 0) + continue; + + r = find_matching_component(spec, spec->day, &c, &c.tm_mday); + if (r > 0) + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) { + c.tm_mon++; + c.tm_mday = 1; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + continue; + } + if (r == 0) + continue; + + if (!matches_weekday(spec->weekdays_bits, &c, spec->utc)) { + c.tm_mday++; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + continue; + } + + r = find_matching_component(spec, spec->hour, &c, &c.tm_hour); + if (r > 0) + c.tm_min = c.tm_sec = tm_usec = 0; + if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) { + c.tm_mday++; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + continue; + } + if (r == 0) + /* The next hour we set might be missing if there + * are time zone changes. Let's try again starting at + * normalized time. */ + continue; + + r = find_matching_component(spec, spec->minute, &c, &c.tm_min); + if (r > 0) + c.tm_sec = tm_usec = 0; + if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) { + c.tm_hour++; + c.tm_min = c.tm_sec = tm_usec = 0; + continue; + } + if (r == 0) + continue; + + c.tm_sec = c.tm_sec * USEC_PER_SEC + tm_usec; + r = find_matching_component(spec, spec->microsecond, &c, &c.tm_sec); + tm_usec = c.tm_sec % USEC_PER_SEC; + c.tm_sec /= USEC_PER_SEC; + + if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) { + c.tm_min++; + c.tm_sec = tm_usec = 0; + continue; + } + if (r == 0) + continue; + + *tm = c; + *usec = tm_usec; + return 0; + } +} + +static int calendar_spec_next_usec_impl(const CalendarSpec *spec, usec_t usec, usec_t *ret_next) { + struct tm tm; + time_t t; + int r; + usec_t tm_usec; + + assert(spec); + + if (usec > USEC_TIMESTAMP_FORMATTABLE_MAX) + return -EINVAL; + + usec++; + t = (time_t) (usec / USEC_PER_SEC); + assert_se(localtime_or_gmtime_r(&t, &tm, spec->utc)); + tm_usec = usec % USEC_PER_SEC; + + r = find_next(spec, &tm, &tm_usec); + if (r < 0) + return r; + + t = mktime_or_timegm(&tm, spec->utc); + if (t < 0) + return -EINVAL; + + if (ret_next) + *ret_next = (usec_t) t * USEC_PER_SEC + tm_usec; + + return 0; +} + +typedef struct SpecNextResult { + usec_t next; + int return_value; +} SpecNextResult; + +int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *ret_next) { + SpecNextResult *shared, tmp; + int r; + + assert(spec); + + if (isempty(spec->timezone)) + return calendar_spec_next_usec_impl(spec, usec, ret_next); + + shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); + if (shared == MAP_FAILED) + return negative_errno(); + + r = safe_fork("(sd-calendar)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT, NULL); + if (r < 0) { + (void) munmap(shared, sizeof *shared); + return r; + } + if (r == 0) { + char *colon_tz; + + /* tzset(3) says $TZ should be prefixed with ":" if we reference timezone files */ + colon_tz = strjoina(":", spec->timezone); + + if (setenv("TZ", colon_tz, 1) != 0) { + shared->return_value = negative_errno(); + _exit(EXIT_FAILURE); + } + + tzset(); + + shared->return_value = calendar_spec_next_usec_impl(spec, usec, &shared->next); + + _exit(EXIT_SUCCESS); + } + + tmp = *shared; + if (munmap(shared, sizeof *shared) < 0) + return negative_errno(); + + if (tmp.return_value == 0 && ret_next) + *ret_next = tmp.next; + + return tmp.return_value; +} diff --git a/src/shared/calendarspec.h b/src/shared/calendarspec.h new file mode 100644 index 0000000..3bfe82d --- /dev/null +++ b/src/shared/calendarspec.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* A structure for specifying (possibly repetitive) points in calendar + * time, a la cron */ + +#include <stdbool.h> + +#include "time-util.h" +#include "util.h" + +typedef struct CalendarComponent { + int start; + int stop; + int repeat; + + struct CalendarComponent *next; +} CalendarComponent; + +typedef struct CalendarSpec { + int weekdays_bits; + bool end_of_month:1; + bool utc:1; + signed int dst:2; + char *timezone; + + CalendarComponent *year; + CalendarComponent *month; + CalendarComponent *day; + + CalendarComponent *hour; + CalendarComponent *minute; + CalendarComponent *microsecond; +} CalendarSpec; + +CalendarSpec* calendar_spec_free(CalendarSpec *c); + +int calendar_spec_normalize(CalendarSpec *spec); +bool calendar_spec_valid(CalendarSpec *spec); + +int calendar_spec_to_string(const CalendarSpec *spec, char **p); +int calendar_spec_from_string(const char *p, CalendarSpec **spec); + +int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *next); + +DEFINE_TRIVIAL_CLEANUP_FUNC(CalendarSpec*, calendar_spec_free); diff --git a/src/shared/cgroup-setup.c b/src/shared/cgroup-setup.c new file mode 100644 index 0000000..f197f71 --- /dev/null +++ b/src/shared/cgroup-setup.c @@ -0,0 +1,841 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <ftw.h> +#include <unistd.h> + +#include "cgroup-setup.h" +#include "cgroup-util.h" +#include "errno-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "proc-cmdline.h" +#include "stdio-util.h" +#include "string-util.h" +#include "fs-util.h" +#include "mkdir.h" +#include "process-util.h" +#include "fileio.h" +#include "user-util.h" +#include "fd-util.h" + +bool cg_is_unified_wanted(void) { + static thread_local int wanted = -1; + bool b; + const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL; + _cleanup_free_ char *c = NULL; + int r; + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* If the hierarchy is already mounted, then follow whatever was chosen for it. */ + r = cg_unified_cached(true); + if (r >= 0) + return (wanted = r >= CGROUP_UNIFIED_ALL); + + /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */ + r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b); + if (r > 0) + return (wanted = b); + + /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to + * use hybrid or legacy hierarchy. */ + r = proc_cmdline_get_key("cgroup_no_v1", 0, &c); + if (r > 0 && streq_ptr(c, "all")) + return (wanted = true); + + return (wanted = is_default); +} + +bool cg_is_legacy_wanted(void) { + static thread_local int wanted = -1; + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* Check if we have cgroup v2 already mounted. */ + if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL) + return (wanted = false); + + /* Otherwise, assume that at least partial legacy is wanted, + * since cgroup v2 should already be mounted at this point. */ + return (wanted = true); +} + +bool cg_is_hybrid_wanted(void) { + static thread_local int wanted = -1; + int r; + bool b; + const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD; + /* We default to true if the default is "hybrid", obviously, but also when the default is "unified", + * because if we get called, it means that unified hierarchy was not mounted. */ + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* If the hierarchy is already mounted, then follow whatever was chosen for it. */ + if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL) + return (wanted = false); + + /* Otherwise, let's see what the kernel command line has to say. Since checking is expensive, cache + * a non-error result. */ + r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b); + + /* The meaning of the kernel option is reversed wrt. to the return value of this function, hence the + * negation. */ + return (wanted = r > 0 ? !b : is_default); +} + +int cg_weight_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_WEIGHT_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + +int cg_cpu_shares_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_CPU_SHARES_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + +int cg_blkio_weight_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_BLKIO_WEIGHT_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + +static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { + assert(path); + assert(sb); + assert(ftwbuf); + + if (typeflag != FTW_DP) + return 0; + + if (ftwbuf->level < 1) + return 0; + + (void) rmdir(path); + return 0; +} + +int cg_trim(const char *controller, const char *path, bool delete_root) { + _cleanup_free_ char *fs = NULL; + int r, q; + + assert(path); + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + errno = 0; + if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) { + if (errno == ENOENT) + r = 0; + else + r = errno_or_else(EIO); + } + + if (delete_root) { + if (rmdir(fs) < 0 && errno != ENOENT) + return -errno; + } + + q = cg_hybrid_unified(); + if (q < 0) + return q; + if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root); + if (q < 0) + log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path); + } + + return r; +} + +/* Create a cgroup in the hierarchy of controller. + * Returns 0 if the group already existed, 1 on success, negative otherwise. + */ +int cg_create(const char *controller, const char *path) { + _cleanup_free_ char *fs = NULL; + int r; + + r = cg_get_path_and_check(controller, path, NULL, &fs); + if (r < 0) + return r; + + r = mkdir_parents(fs, 0755); + if (r < 0) + return r; + + r = mkdir_errno_wrapper(fs, 0755); + if (r == -EEXIST) + return 0; + if (r < 0) + return r; + + r = cg_hybrid_unified(); + if (r < 0) + return r; + + if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path); + } + + return 1; +} + +int cg_create_and_attach(const char *controller, const char *path, pid_t pid) { + int r, q; + + assert(pid >= 0); + + r = cg_create(controller, path); + if (r < 0) + return r; + + q = cg_attach(controller, path, pid); + if (q < 0) + return q; + + /* This does not remove the cgroup on failure */ + return r; +} + +int cg_attach(const char *controller, const char *path, pid_t pid) { + _cleanup_free_ char *fs = NULL; + char c[DECIMAL_STR_MAX(pid_t) + 2]; + int r; + + assert(path); + assert(pid >= 0); + + r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs); + if (r < 0) + return r; + + if (pid == 0) + pid = getpid_cached(); + + xsprintf(c, PID_FMT "\n", pid); + + r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + r = cg_hybrid_unified(); + if (r < 0) + return r; + + if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid); + if (r < 0) + log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path); + } + + return 0; +} + +int cg_attach_fallback(const char *controller, const char *path, pid_t pid) { + int r; + + assert(controller); + assert(path); + assert(pid >= 0); + + r = cg_attach(controller, path, pid); + if (r < 0) { + char prefix[strlen(path) + 1]; + + /* This didn't work? Then let's try all prefixes of + * the destination */ + + PATH_FOREACH_PREFIX(prefix, path) { + int q; + + q = cg_attach(controller, prefix, pid); + if (q >= 0) + return q; + } + } + + return r; +} + +int cg_set_access( + const char *controller, + const char *path, + uid_t uid, + gid_t gid) { + + struct Attribute { + const char *name; + bool fatal; + }; + + /* cgroup v1, aka legacy/non-unified */ + static const struct Attribute legacy_attributes[] = { + { "cgroup.procs", true }, + { "tasks", false }, + { "cgroup.clone_children", false }, + {}, + }; + + /* cgroup v2, aka unified */ + static const struct Attribute unified_attributes[] = { + { "cgroup.procs", true }, + { "cgroup.subtree_control", true }, + { "cgroup.threads", false }, + {}, + }; + + static const struct Attribute* const attributes[] = { + [false] = legacy_attributes, + [true] = unified_attributes, + }; + + _cleanup_free_ char *fs = NULL; + const struct Attribute *i; + int r, unified; + + assert(path); + + if (uid == UID_INVALID && gid == GID_INVALID) + return 0; + + unified = cg_unified_controller(controller); + if (unified < 0) + return unified; + + /* Configure access to the cgroup itself */ + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + r = chmod_and_chown(fs, 0755, uid, gid); + if (r < 0) + return r; + + /* Configure access to the cgroup's attributes */ + for (i = attributes[unified]; i->name; i++) { + fs = mfree(fs); + + r = cg_get_path(controller, path, i->name, &fs); + if (r < 0) + return r; + + r = chmod_and_chown(fs, 0644, uid, gid); + if (r < 0) { + if (i->fatal) + return r; + + log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs); + } + } + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_hybrid_unified(); + if (r < 0) + return r; + if (r > 0) { + /* Always propagate access mode from unified to legacy controller */ + r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid); + if (r < 0) + log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path); + } + } + + return 0; +} + +int cg_migrate( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + bool done = false; + _cleanup_set_free_ Set *s = NULL; + int r, ret = 0; + pid_t my_pid; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + s = set_new(NULL); + if (!s) + return -ENOMEM; + + my_pid = getpid_cached(); + + do { + _cleanup_fclose_ FILE *f = NULL; + pid_t pid = 0; + done = true; + + r = cg_enumerate_processes(cfrom, pfrom, &f); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_pid(f, &pid)) > 0) { + + /* This might do weird stuff if we aren't a + * single-threaded program. However, we + * luckily know we are not */ + if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid) + continue; + + if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) + continue; + + /* Ignore kernel threads. Since they can only + * exist in the root cgroup, we only check for + * them there. */ + if (cfrom && + empty_or_root(pfrom) && + is_kernel_thread(pid) > 0) + continue; + + r = cg_attach(cto, pto, pid); + if (r < 0) { + if (ret >= 0 && r != -ESRCH) + ret = r; + } else if (ret == 0) + ret = 1; + + done = false; + + r = set_put(s, PID_TO_PTR(pid)); + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + } + + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + } while (!done); + + return ret; +} + +int cg_migrate_recursive( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + _cleanup_closedir_ DIR *d = NULL; + int r, ret = 0; + char *fn; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + ret = cg_migrate(cfrom, pfrom, cto, pto, flags); + + r = cg_enumerate_subgroups(cfrom, pfrom, &d); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + _cleanup_free_ char *p = NULL; + + p = path_join(empty_to_root(pfrom), fn); + free(fn); + if (!p) + return -ENOMEM; + + r = cg_migrate_recursive(cfrom, p, cto, pto, flags); + if (r != 0 && ret >= 0) + ret = r; + } + + if (r < 0 && ret >= 0) + ret = r; + + if (flags & CGROUP_REMOVE) { + r = cg_rmdir(cfrom, pfrom); + if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY)) + return r; + } + + return ret; +} + +int cg_migrate_recursive_fallback( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + int r; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags); + if (r < 0) { + char prefix[strlen(pto) + 1]; + + /* This didn't work? Then let's try all prefixes of the destination */ + + PATH_FOREACH_PREFIX(prefix, pto) { + int q; + + q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags); + if (q >= 0) + return q; + } + } + + return r; +} + +int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { + CGroupController c; + CGroupMask done; + bool created; + int r; + + /* This one will create a cgroup in our private tree, but also + * duplicate it in the trees specified in mask, and remove it + * in all others. + * + * Returns 0 if the group already existed in the systemd hierarchy, + * 1 on success, negative otherwise. + */ + + /* First create the cgroup in our own hierarchy. */ + r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path); + if (r < 0) + return r; + created = r; + + /* If we are in the unified hierarchy, we are done now */ + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) + return created; + + supported &= CGROUP_MASK_V1; + mask = CGROUP_MASK_EXTEND_JOINED(mask); + done = 0; + + /* Otherwise, do the same in the other hierarchies */ + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *n; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + n = cgroup_controller_to_string(c); + if (FLAGS_SET(mask, bit)) + (void) cg_create(n, path); + + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return created; +} + +int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) { + int r; + + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid); + if (r < 0) + return r; + + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) + return 0; + + supported &= CGROUP_MASK_V1; + CGroupMask done = 0; + + for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *p = NULL; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + if (path_callback) + p = path_callback(bit, userdata); + if (!p) + p = path; + + (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid); + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return 0; +} + +int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) { + CGroupController c; + CGroupMask done; + int r = 0, q; + + assert(to_callback); + + supported &= CGROUP_MASK_V1; + mask = CGROUP_MASK_EXTEND_JOINED(mask); + done = 0; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *to = NULL; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + if (!FLAGS_SET(mask, bit)) + continue; + + to = to_callback(bit, userdata); + + /* Remember first error and try continuing */ + q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0); + r = (r < 0) ? r : q; + } + + return r; +} + +int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) { + int r, q; + + r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root); + if (r < 0) + return r; + + q = cg_all_unified(); + if (q < 0) + return q; + if (q > 0) + return r; + + return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root); +} + +int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) { + CGroupController c; + CGroupMask done; + int r = 0, q; + + supported &= CGROUP_MASK_V1; + mask = CGROUP_MASK_EXTEND_JOINED(mask); + done = 0; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + if (FLAGS_SET(mask, bit)) { + /* Remember first error and try continuing */ + q = cg_trim(cgroup_controller_to_string(c), path, delete_root); + r = (r < 0) ? r : q; + } + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return r; +} + +int cg_enable_everywhere( + CGroupMask supported, + CGroupMask mask, + const char *p, + CGroupMask *ret_result_mask) { + + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *fs = NULL; + CGroupController c; + CGroupMask ret = 0; + int r; + + assert(p); + + if (supported == 0) { + if (ret_result_mask) + *ret_result_mask = 0; + return 0; + } + + r = cg_all_unified(); + if (r < 0) + return r; + if (r == 0) { + /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim + * complete success right away. (If you wonder why we return the full mask here, rather than zero: the + * caller tends to use the returned mask later on to compare if all controllers where properly joined, + * and if not requeues realization. This use is the primary purpose of the return value, hence let's + * minimize surprises here and reduce triggers for re-realization by always saying we fully + * succeeded.) */ + if (ret_result_mask) + *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with + * CGROUP_MASK_V2: The 'supported' mask + * might contain pure-V1 or BPF + * controllers, and we never want to + * claim that we could enable those with + * cgroup.subtree_control */ + return 0; + } + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs); + if (r < 0) + return r; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *n; + + if (!FLAGS_SET(CGROUP_MASK_V2, bit)) + continue; + + if (!FLAGS_SET(supported, bit)) + continue; + + n = cgroup_controller_to_string(c); + { + char s[1 + strlen(n) + 1]; + + s[0] = FLAGS_SET(mask, bit) ? '+' : '-'; + strcpy(s + 1, n); + + if (!f) { + f = fopen(fs, "we"); + if (!f) + return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p); + } + + r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) { + log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m", + FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs); + clearerr(f); + + /* If we can't turn off a controller, leave it on in the reported resulting mask. This + * happens for example when we attempt to turn off a controller up in the tree that is + * used down in the tree. */ + if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY + * only here, and not follow the same logic + * for other errors such as EINVAL or + * EOPNOTSUPP or anything else. That's + * because EBUSY indicates that the + * controllers is currently enabled and + * cannot be disabled because something down + * the hierarchy is still using it. Any other + * error most likely means something like "I + * never heard of this controller" or + * similar. In the former case it's hence + * safe to assume the controller is still on + * after the failed operation, while in the + * latter case it's safer to assume the + * controller is unknown and hence certainly + * not enabled. */ + ret |= bit; + } else { + /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */ + if (FLAGS_SET(mask, bit)) + ret |= bit; + } + } + } + + /* Let's return the precise set of controllers now enabled for the cgroup. */ + if (ret_result_mask) + *ret_result_mask = ret; + + return 0; +} diff --git a/src/shared/cgroup-setup.h b/src/shared/cgroup-setup.h new file mode 100644 index 0000000..7eabce2 --- /dev/null +++ b/src/shared/cgroup-setup.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include <sys/types.h> + +#include "cgroup-util.h" + +bool cg_is_unified_wanted(void); +bool cg_is_legacy_wanted(void); +bool cg_is_hybrid_wanted(void); + +int cg_weight_parse(const char *s, uint64_t *ret); +int cg_cpu_shares_parse(const char *s, uint64_t *ret); +int cg_blkio_weight_parse(const char *s, uint64_t *ret); + +int cg_trim(const char *controller, const char *path, bool delete_root); + +int cg_create(const char *controller, const char *path); +int cg_attach(const char *controller, const char *path, pid_t pid); +int cg_attach_fallback(const char *controller, const char *path, pid_t pid); +int cg_create_and_attach(const char *controller, const char *path, pid_t pid); + +int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); +int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); +int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); + +int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path); +int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata); +int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata); +int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root); +int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root); +int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask); diff --git a/src/shared/cgroup-show.c b/src/shared/cgroup-show.c new file mode 100644 index 0000000..f7c24f8 --- /dev/null +++ b/src/shared/cgroup-show.c @@ -0,0 +1,400 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <dirent.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> + +#include "alloc-util.h" +#include "bus-error.h" +#include "bus-util.h" +#include "cgroup-show.h" +#include "cgroup-util.h" +#include "env-file.h" +#include "fd-util.h" +#include "format-util.h" +#include "locale-util.h" +#include "macro.h" +#include "output-mode.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "sort-util.h" +#include "string-util.h" +#include "terminal-util.h" +#include "unit-name.h" +#include "xattr-util.h" + +static void show_pid_array( + pid_t pids[], + unsigned n_pids, + const char *prefix, + size_t n_columns, + bool extra, + bool more, + OutputFlags flags) { + + unsigned i, j, pid_width; + + if (n_pids == 0) + return; + + typesafe_qsort(pids, n_pids, pid_compare_func); + + /* Filter duplicates */ + for (j = 0, i = 1; i < n_pids; i++) { + if (pids[i] == pids[j]) + continue; + pids[++j] = pids[i]; + } + n_pids = j + 1; + pid_width = DECIMAL_STR_WIDTH(pids[j]); + + if (flags & OUTPUT_FULL_WIDTH) + n_columns = SIZE_MAX; + else { + if (n_columns > pid_width + 3) /* something like "├─1114784 " */ + n_columns -= pid_width + 3; + else + n_columns = 20; + } + for (i = 0; i < n_pids; i++) { + _cleanup_free_ char *t = NULL; + + (void) get_process_cmdline(pids[i], n_columns, + PROCESS_CMDLINE_COMM_FALLBACK | PROCESS_CMDLINE_USE_LOCALE, + &t); + + if (extra) + printf("%s%s ", prefix, special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET)); + else + printf("%s%s", prefix, special_glyph(((more || i < n_pids-1) ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT))); + + printf("%s%*"PID_PRI" %s%s\n", ansi_grey(), pid_width, pids[i], strna(t), ansi_normal()); + } +} + +static int show_cgroup_one_by_path( + const char *path, + const char *prefix, + size_t n_columns, + bool more, + OutputFlags flags) { + + char *fn; + _cleanup_fclose_ FILE *f = NULL; + size_t n = 0, n_allocated = 0; + _cleanup_free_ pid_t *pids = NULL; + _cleanup_free_ char *p = NULL; + pid_t pid; + int r; + + r = cg_mangle_path(path, &p); + if (r < 0) + return r; + + fn = strjoina(p, "/cgroup.procs"); + f = fopen(fn, "re"); + if (!f) + return -errno; + + while ((r = cg_read_pid(f, &pid)) > 0) { + + if (!(flags & OUTPUT_KERNEL_THREADS) && is_kernel_thread(pid) > 0) + continue; + + if (!GREEDY_REALLOC(pids, n_allocated, n + 1)) + return -ENOMEM; + + assert(n < n_allocated); + pids[n++] = pid; + } + + if (r < 0) + return r; + + show_pid_array(pids, n, prefix, n_columns, false, more, flags); + + return 0; +} + +static int show_cgroup_name( + const char *path, + const char *prefix, + const char *glyph) { + + _cleanup_free_ char *b = NULL; + bool delegate = false; + int r; + + r = getxattr_malloc(path, "trusted.delegate", &b, false); + if (r < 0) { + if (r != -ENODATA) + log_debug_errno(r, "Failed to read trusted.delegate extended attribute: %m"); + } else { + r = parse_boolean(b); + if (r < 0) + log_debug_errno(r, "Failed to parse trusted.delegate extended attribute boolean value: %m"); + else + delegate = r > 0; + + b = mfree(b); + } + + b = strdup(basename(path)); + if (!b) + return -ENOMEM; + + printf("%s%s%s%s%s %s%s%s\n", + prefix, glyph, + delegate ? ansi_underline() : "", + cg_unescape(b), + delegate ? ansi_normal() : "", + delegate ? ansi_highlight() : "", + delegate ? special_glyph(SPECIAL_GLYPH_ELLIPSIS) : "", + delegate ? ansi_normal() : ""); + return 0; +} + +int show_cgroup_by_path( + const char *path, + const char *prefix, + size_t n_columns, + OutputFlags flags) { + + _cleanup_free_ char *fn = NULL, *p1 = NULL, *last = NULL, *p2 = NULL; + _cleanup_closedir_ DIR *d = NULL; + bool shown_pids = false; + char *gn = NULL; + int r; + + assert(path); + + if (n_columns <= 0) + n_columns = columns(); + + prefix = strempty(prefix); + + r = cg_mangle_path(path, &fn); + if (r < 0) + return r; + + d = opendir(fn); + if (!d) + return -errno; + + while ((r = cg_read_subgroup(d, &gn)) > 0) { + _cleanup_free_ char *k = NULL; + + k = path_join(fn, gn); + free(gn); + if (!k) + return -ENOMEM; + + if (!(flags & OUTPUT_SHOW_ALL) && cg_is_empty_recursive(NULL, k) > 0) + continue; + + if (!shown_pids) { + show_cgroup_one_by_path(path, prefix, n_columns, true, flags); + shown_pids = true; + } + + if (last) { + r = show_cgroup_name(last, prefix, special_glyph(SPECIAL_GLYPH_TREE_BRANCH)); + if (r < 0) + return r; + + if (!p1) { + p1 = strjoin(prefix, special_glyph(SPECIAL_GLYPH_TREE_VERTICAL)); + if (!p1) + return -ENOMEM; + } + + show_cgroup_by_path(last, p1, n_columns-2, flags); + free(last); + } + + last = TAKE_PTR(k); + } + + if (r < 0) + return r; + + if (!shown_pids) + show_cgroup_one_by_path(path, prefix, n_columns, !!last, flags); + + if (last) { + r = show_cgroup_name(last, prefix, special_glyph(SPECIAL_GLYPH_TREE_RIGHT)); + if (r < 0) + return r; + + if (!p2) { + p2 = strjoin(prefix, " "); + if (!p2) + return -ENOMEM; + } + + show_cgroup_by_path(last, p2, n_columns-2, flags); + } + + return 0; +} + +int show_cgroup(const char *controller, + const char *path, + const char *prefix, + size_t n_columns, + OutputFlags flags) { + _cleanup_free_ char *p = NULL; + int r; + + assert(path); + + r = cg_get_path(controller, path, NULL, &p); + if (r < 0) + return r; + + return show_cgroup_by_path(p, prefix, n_columns, flags); +} + +static int show_extra_pids( + const char *controller, + const char *path, + const char *prefix, + size_t n_columns, + const pid_t pids[], + unsigned n_pids, + OutputFlags flags) { + + _cleanup_free_ pid_t *copy = NULL; + unsigned i, j; + int r; + + assert(path); + + if (n_pids <= 0) + return 0; + + if (n_columns <= 0) + n_columns = columns(); + + prefix = strempty(prefix); + + copy = new(pid_t, n_pids); + if (!copy) + return -ENOMEM; + + for (i = 0, j = 0; i < n_pids; i++) { + _cleanup_free_ char *k = NULL; + + r = cg_pid_get_path(controller, pids[i], &k); + if (r < 0) + return r; + + if (path_startswith(k, path)) + continue; + + copy[j++] = pids[i]; + } + + show_pid_array(copy, j, prefix, n_columns, true, false, flags); + + return 0; +} + +int show_cgroup_and_extra( + const char *controller, + const char *path, + const char *prefix, + size_t n_columns, + const pid_t extra_pids[], + unsigned n_extra_pids, + OutputFlags flags) { + + int r; + + assert(path); + + r = show_cgroup(controller, path, prefix, n_columns, flags); + if (r < 0) + return r; + + return show_extra_pids(controller, path, prefix, n_columns, extra_pids, n_extra_pids, flags); +} + +int show_cgroup_get_unit_path_and_warn( + sd_bus *bus, + const char *unit, + char **ret) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_free_ char *path = NULL; + int r; + + path = unit_dbus_path_from_name(unit); + if (!path) + return log_oom(); + + r = sd_bus_get_property_string( + bus, + "org.freedesktop.systemd1", + path, + unit_dbus_interface_from_name(unit), + "ControlGroup", + &error, + ret); + if (r < 0) + return log_error_errno(r, "Failed to query unit control group path: %s", + bus_error_message(&error, r)); + + return 0; +} + +int show_cgroup_get_path_and_warn( + const char *machine, + const char *prefix, + char **ret) { + + int r; + _cleanup_free_ char *root = NULL; + + if (machine) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_free_ char *unit = NULL; + const char *m; + + m = strjoina("/run/systemd/machines/", machine); + r = parse_env_file(NULL, m, "SCOPE", &unit); + if (r < 0) + return log_error_errno(r, "Failed to load machine data: %m"); + + r = bus_connect_transport_systemd(BUS_TRANSPORT_LOCAL, NULL, false, &bus); + if (r < 0) + return bus_log_connect_error(r); + + r = show_cgroup_get_unit_path_and_warn(bus, unit, &root); + if (r < 0) + return r; + } else { + r = cg_get_root_path(&root); + if (r == -ENOMEDIUM) + return log_error_errno(r, "Failed to get root control group path.\n" + "No cgroup filesystem mounted on /sys/fs/cgroup"); + else if (r < 0) + return log_error_errno(r, "Failed to get root control group path: %m"); + } + + if (prefix) { + char *t; + + t = strjoin(root, prefix); + if (!t) + return log_oom(); + + *ret = t; + } else + *ret = TAKE_PTR(root); + + return 0; +} diff --git a/src/shared/cgroup-show.h b/src/shared/cgroup-show.h new file mode 100644 index 0000000..fbbf766 --- /dev/null +++ b/src/shared/cgroup-show.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <sys/types.h> + +#include "sd-bus.h" + +#include "logs-show.h" +#include "output-mode.h" + +int show_cgroup_by_path(const char *path, const char *prefix, size_t n_columns, OutputFlags flags); +int show_cgroup(const char *controller, const char *path, const char *prefix, size_t n_columns, OutputFlags flags); + +int show_cgroup_and_extra(const char *controller, const char *path, const char *prefix, size_t n_columns, const pid_t extra_pids[], unsigned n_extra_pids, OutputFlags flags); + +int show_cgroup_get_unit_path_and_warn( + sd_bus *bus, + const char *unit, + char **ret); +int show_cgroup_get_path_and_warn( + const char *machine, + const char *prefix, + char **ret); diff --git a/src/shared/chown-recursive.c b/src/shared/chown-recursive.c new file mode 100644 index 0000000..4563729 --- /dev/null +++ b/src/shared/chown-recursive.c @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/xattr.h> + +#include "chown-recursive.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "macro.h" +#include "stdio-util.h" +#include "strv.h" +#include "user-util.h" + +static int chown_one( + int fd, + const struct stat *st, + uid_t uid, + gid_t gid, + mode_t mask) { + + char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1]; + const char *n; + int r; + + assert(fd >= 0); + assert(st); + + /* We change ACLs through the /proc/self/fd/%i path, so that we have a stable reference that works + * with O_PATH. */ + xsprintf(procfs_path, "/proc/self/fd/%i", fd); + + /* Drop any ACL if there is one */ + FOREACH_STRING(n, "system.posix_acl_access", "system.posix_acl_default") + if (removexattr(procfs_path, n) < 0) + if (!IN_SET(errno, ENODATA, EOPNOTSUPP, ENOSYS, ENOTTY)) + return -errno; + + r = fchmod_and_chown(fd, st->st_mode & mask, uid, gid); + if (r < 0) + return r; + + return 1; +} + +static int chown_recursive_internal( + int fd, + const struct stat *st, + uid_t uid, + gid_t gid, + mode_t mask) { + + _cleanup_closedir_ DIR *d = NULL; + bool changed = false; + struct dirent *de; + int r; + + assert(fd >= 0); + assert(st); + + d = fdopendir(fd); + if (!d) { + safe_close(fd); + return -errno; + } + + FOREACH_DIRENT_ALL(de, d, return -errno) { + _cleanup_close_ int path_fd = -1; + struct stat fst; + + if (dot_or_dot_dot(de->d_name)) + continue; + + /* Let's pin the child inode we want to fix now with an O_PATH fd, so that it cannot be swapped out + * while we manipulate it. */ + path_fd = openat(dirfd(d), de->d_name, O_PATH|O_CLOEXEC|O_NOFOLLOW); + if (path_fd < 0) + return -errno; + + if (fstat(path_fd, &fst) < 0) + return -errno; + + if (S_ISDIR(fst.st_mode)) { + int subdir_fd; + + /* Convert it to a "real" (i.e. non-O_PATH) fd now */ + subdir_fd = fd_reopen(path_fd, O_RDONLY|O_CLOEXEC|O_NOATIME); + if (subdir_fd < 0) + return subdir_fd; + + r = chown_recursive_internal(subdir_fd, &fst, uid, gid, mask); /* takes possession of subdir_fd even on failure */ + if (r < 0) + return r; + if (r > 0) + changed = true; + } else { + r = chown_one(path_fd, &fst, uid, gid, mask); + if (r < 0) + return r; + if (r > 0) + changed = true; + } + } + + r = chown_one(dirfd(d), st, uid, gid, mask); + if (r < 0) + return r; + + return r > 0 || changed; +} + +int path_chown_recursive( + const char *path, + uid_t uid, + gid_t gid, + mode_t mask) { + + _cleanup_close_ int fd = -1; + struct stat st; + + fd = open(path, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME); + if (fd < 0) + return -errno; + + if (!uid_is_valid(uid) && !gid_is_valid(gid) && FLAGS_SET(mask, 07777)) + return 0; /* nothing to do */ + + if (fstat(fd, &st) < 0) + return -errno; + + /* Let's take a shortcut: if the top-level directory is properly owned, we don't descend into the + * whole tree, under the assumption that all is OK anyway. */ + if ((!uid_is_valid(uid) || st.st_uid == uid) && + (!gid_is_valid(gid) || st.st_gid == gid) && + ((st.st_mode & ~mask & 07777) == 0)) + return 0; + + return chown_recursive_internal(TAKE_FD(fd), &st, uid, gid, mask); /* we donate the fd to the call, regardless if it succeeded or failed */ +} + +int fd_chown_recursive( + int fd, + uid_t uid, + gid_t gid, + mode_t mask) { + + int duplicated_fd = -1; + struct stat st; + + /* Note that the slightly different order of fstat() and the checks here and in + * path_chown_recursive(). That's because when we open the directory ourselves we can specify + * O_DIRECTORY and we always want to ensure we are operating on a directory before deciding whether + * the operation is otherwise redundant. */ + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISDIR(st.st_mode)) + return -ENOTDIR; + + if (!uid_is_valid(uid) && !gid_is_valid(gid) && FLAGS_SET(mask, 07777)) + return 0; /* nothing to do */ + + /* Shortcut, as above */ + if ((!uid_is_valid(uid) || st.st_uid == uid) && + (!gid_is_valid(gid) || st.st_gid == gid) && + ((st.st_mode & ~mask & 07777) == 0)) + return 0; + + /* Let's duplicate the fd here, as opendir() wants to take possession of it and close it afterwards */ + duplicated_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (duplicated_fd < 0) + return -errno; + + return chown_recursive_internal(duplicated_fd, &st, uid, gid, mask); /* fd donated even on failure */ +} diff --git a/src/shared/chown-recursive.h b/src/shared/chown-recursive.h new file mode 100644 index 0000000..00038c3 --- /dev/null +++ b/src/shared/chown-recursive.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <sys/types.h> + +int path_chown_recursive(const char *path, uid_t uid, gid_t gid, mode_t mask); + +int fd_chown_recursive(int fd, uid_t uid, gid_t gid, mode_t mask); diff --git a/src/shared/clean-ipc.c b/src/shared/clean-ipc.c new file mode 100644 index 0000000..77fe227 --- /dev/null +++ b/src/shared/clean-ipc.c @@ -0,0 +1,454 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <mqueue.h> +#include <stdbool.h> +#include <stdio.h> +#include <sys/ipc.h> +#include <sys/msg.h> +#include <sys/sem.h> +#include <sys/shm.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "clean-ipc.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "log.h" +#include "macro.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" + +static bool match_uid_gid(uid_t subject_uid, gid_t subject_gid, uid_t delete_uid, gid_t delete_gid) { + + if (uid_is_valid(delete_uid) && subject_uid == delete_uid) + return true; + + if (gid_is_valid(delete_gid) && subject_gid == delete_gid) + return true; + + return false; +} + +static int clean_sysvipc_shm(uid_t delete_uid, gid_t delete_gid, bool rm) { + _cleanup_fclose_ FILE *f = NULL; + bool first = true; + int ret = 0, r; + + f = fopen("/proc/sysvipc/shm", "re"); + if (!f) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to open /proc/sysvipc/shm: %m"); + } + + for (;;) { + _cleanup_free_ char *line = NULL; + unsigned n_attached; + pid_t cpid, lpid; + uid_t uid, cuid; + gid_t gid, cgid; + int shmid; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_warning_errno(errno, "Failed to read /proc/sysvipc/shm: %m"); + if (r == 0) + break; + + if (first) { + first = false; + continue; + } + + if (sscanf(line, "%*i %i %*o %*u " PID_FMT " " PID_FMT " %u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT, + &shmid, &cpid, &lpid, &n_attached, &uid, &gid, &cuid, &cgid) != 8) + continue; + + if (n_attached > 0) + continue; + + if (!match_uid_gid(uid, gid, delete_uid, delete_gid)) + continue; + + if (!rm) + return 1; + + if (shmctl(shmid, IPC_RMID, NULL) < 0) { + + /* Ignore entries that are already deleted */ + if (IN_SET(errno, EIDRM, EINVAL)) + continue; + + ret = log_warning_errno(errno, + "Failed to remove SysV shared memory segment %i: %m", + shmid); + } else { + log_debug("Removed SysV shared memory segment %i.", shmid); + if (ret == 0) + ret = 1; + } + } + + return ret; +} + +static int clean_sysvipc_sem(uid_t delete_uid, gid_t delete_gid, bool rm) { + _cleanup_fclose_ FILE *f = NULL; + bool first = true; + int ret = 0, r; + + f = fopen("/proc/sysvipc/sem", "re"); + if (!f) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to open /proc/sysvipc/sem: %m"); + } + + for (;;) { + _cleanup_free_ char *line = NULL; + uid_t uid, cuid; + gid_t gid, cgid; + int semid; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_warning_errno(r, "Failed to read /proc/sysvipc/sem: %m"); + if (r == 0) + break; + + if (first) { + first = false; + continue; + } + + if (sscanf(line, "%*i %i %*o %*u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT, + &semid, &uid, &gid, &cuid, &cgid) != 5) + continue; + + if (!match_uid_gid(uid, gid, delete_uid, delete_gid)) + continue; + + if (!rm) + return 1; + + if (semctl(semid, 0, IPC_RMID) < 0) { + + /* Ignore entries that are already deleted */ + if (IN_SET(errno, EIDRM, EINVAL)) + continue; + + ret = log_warning_errno(errno, + "Failed to remove SysV semaphores object %i: %m", + semid); + } else { + log_debug("Removed SysV semaphore %i.", semid); + if (ret == 0) + ret = 1; + } + } + + return ret; +} + +static int clean_sysvipc_msg(uid_t delete_uid, gid_t delete_gid, bool rm) { + _cleanup_fclose_ FILE *f = NULL; + bool first = true; + int ret = 0, r; + + f = fopen("/proc/sysvipc/msg", "re"); + if (!f) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to open /proc/sysvipc/msg: %m"); + } + + for (;;) { + _cleanup_free_ char *line = NULL; + uid_t uid, cuid; + gid_t gid, cgid; + pid_t cpid, lpid; + int msgid; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_warning_errno(r, "Failed to read /proc/sysvipc/msg: %m"); + if (r == 0) + break; + + if (first) { + first = false; + continue; + } + + if (sscanf(line, "%*i %i %*o %*u %*u " PID_FMT " " PID_FMT " " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT, + &msgid, &cpid, &lpid, &uid, &gid, &cuid, &cgid) != 7) + continue; + + if (!match_uid_gid(uid, gid, delete_uid, delete_gid)) + continue; + + if (!rm) + return 1; + + if (msgctl(msgid, IPC_RMID, NULL) < 0) { + + /* Ignore entries that are already deleted */ + if (IN_SET(errno, EIDRM, EINVAL)) + continue; + + ret = log_warning_errno(errno, + "Failed to remove SysV message queue %i: %m", + msgid); + } else { + log_debug("Removed SysV message queue %i.", msgid); + if (ret == 0) + ret = 1; + } + } + + return ret; +} + +static int clean_posix_shm_internal(const char *dirname, DIR *dir, uid_t uid, gid_t gid, bool rm) { + struct dirent *de; + int ret = 0, r; + + assert(dir); + + FOREACH_DIRENT_ALL(de, dir, goto fail) { + struct stat st; + + if (dot_or_dot_dot(de->d_name)) + continue; + + if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) { + if (errno == ENOENT) + continue; + + ret = log_warning_errno(errno, "Failed to stat() POSIX shared memory segment %s/%s: %m", + dirname, de->d_name); + continue; + } + + if (S_ISDIR(st.st_mode)) { + _cleanup_closedir_ DIR *kid; + + kid = xopendirat(dirfd(dir), de->d_name, O_NOFOLLOW|O_NOATIME); + if (!kid) { + if (errno != ENOENT) + ret = log_warning_errno(errno, "Failed to enter shared memory directory %s/%s: %m", + dirname, de->d_name); + } else { + r = clean_posix_shm_internal(de->d_name, kid, uid, gid, rm); + if (r < 0) + ret = r; + } + + if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid)) + continue; + + if (!rm) + return 1; + + if (unlinkat(dirfd(dir), de->d_name, AT_REMOVEDIR) < 0) { + + if (errno == ENOENT) + continue; + + ret = log_warning_errno(errno, "Failed to remove POSIX shared memory directory %s/%s: %m", + dirname, de->d_name); + } else { + log_debug("Removed POSIX shared memory directory %s", de->d_name); + if (ret == 0) + ret = 1; + } + } else { + + if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid)) + continue; + + if (!rm) + return 1; + + if (unlinkat(dirfd(dir), de->d_name, 0) < 0) { + + if (errno == ENOENT) + continue; + + ret = log_warning_errno(errno, "Failed to remove POSIX shared memory segment %s: %m", de->d_name); + } else { + log_debug("Removed POSIX shared memory segment %s", de->d_name); + if (ret == 0) + ret = 1; + } + } + } + + return ret; + +fail: + return log_warning_errno(errno, "Failed to read /dev/shm: %m"); +} + +static int clean_posix_shm(uid_t uid, gid_t gid, bool rm) { + _cleanup_closedir_ DIR *dir = NULL; + + dir = opendir("/dev/shm"); + if (!dir) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to open /dev/shm: %m"); + } + + return clean_posix_shm_internal("/dev/shm", dir, uid, gid, rm); +} + +static int clean_posix_mq(uid_t uid, gid_t gid, bool rm) { + _cleanup_closedir_ DIR *dir = NULL; + struct dirent *de; + int ret = 0; + + dir = opendir("/dev/mqueue"); + if (!dir) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to open /dev/mqueue: %m"); + } + + FOREACH_DIRENT_ALL(de, dir, goto fail) { + struct stat st; + char fn[1+strlen(de->d_name)+1]; + + if (dot_or_dot_dot(de->d_name)) + continue; + + if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) { + if (errno == ENOENT) + continue; + + ret = log_warning_errno(errno, + "Failed to stat() MQ segment %s: %m", + de->d_name); + continue; + } + + if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid)) + continue; + + if (!rm) + return 1; + + fn[0] = '/'; + strcpy(fn+1, de->d_name); + + if (mq_unlink(fn) < 0) { + if (errno == ENOENT) + continue; + + ret = log_warning_errno(errno, + "Failed to unlink POSIX message queue %s: %m", + fn); + } else { + log_debug("Removed POSIX message queue %s", fn); + if (ret == 0) + ret = 1; + } + } + + return ret; + +fail: + return log_warning_errno(errno, "Failed to read /dev/mqueue: %m"); +} + +int clean_ipc_internal(uid_t uid, gid_t gid, bool rm) { + int ret = 0, r; + + /* If 'rm' is true, clean all IPC objects owned by either the specified UID or the specified GID. Return the + * last error encountered or == 0 if no matching IPC objects have been found or > 0 if matching IPC objects + * have been found and have been removed. + * + * If 'rm' is false, just search for IPC objects owned by either the specified UID or the specified GID. In + * this case we return < 0 on error, > 0 if we found a matching object, == 0 if we didn't. + * + * As special rule: if UID/GID is specified as root we'll silently not clean up things, and always claim that + * there are IPC objects for it. */ + + if (uid == 0) { + if (!rm) + return 1; + + uid = UID_INVALID; + } + if (gid == 0) { + if (!rm) + return 1; + + gid = GID_INVALID; + } + + /* Anything to do? */ + if (!uid_is_valid(uid) && !gid_is_valid(gid)) + return 0; + + r = clean_sysvipc_shm(uid, gid, rm); + if (r != 0) { + if (!rm) + return r; + if (ret == 0) + ret = r; + } + + r = clean_sysvipc_sem(uid, gid, rm); + if (r != 0) { + if (!rm) + return r; + if (ret == 0) + ret = r; + } + + r = clean_sysvipc_msg(uid, gid, rm); + if (r != 0) { + if (!rm) + return r; + if (ret == 0) + ret = r; + } + + r = clean_posix_shm(uid, gid, rm); + if (r != 0) { + if (!rm) + return r; + if (ret == 0) + ret = r; + } + + r = clean_posix_mq(uid, gid, rm); + if (r != 0) { + if (!rm) + return r; + if (ret == 0) + ret = r; + } + + return ret; +} + +int clean_ipc_by_uid(uid_t uid) { + return clean_ipc_internal(uid, GID_INVALID, true); +} + +int clean_ipc_by_gid(gid_t gid) { + return clean_ipc_internal(UID_INVALID, gid, true); +} diff --git a/src/shared/clean-ipc.h b/src/shared/clean-ipc.h new file mode 100644 index 0000000..ed348fb --- /dev/null +++ b/src/shared/clean-ipc.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <sys/types.h> + +#include "user-util.h" + +int clean_ipc_internal(uid_t uid, gid_t gid, bool rm); + +/* Remove all IPC objects owned by the specified UID or GID */ +int clean_ipc_by_uid(uid_t uid); +int clean_ipc_by_gid(gid_t gid); + +/* Check if any IPC object owned by the specified UID or GID exists, returns > 0 if so, == 0 if not */ +static inline int search_ipc(uid_t uid, gid_t gid) { + return clean_ipc_internal(uid, gid, false); +} diff --git a/src/shared/clock-util.c b/src/shared/clock-util.c new file mode 100644 index 0000000..2caa70f --- /dev/null +++ b/src/shared/clock-util.c @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <time.h> +#include <linux/rtc.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/time.h> + +#include "alloc-util.h" +#include "clock-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "macro.h" +#include "string-util.h" + +int clock_get_hwclock(struct tm *tm) { + _cleanup_close_ int fd = -1; + + assert(tm); + + fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC); + if (fd < 0) + return -errno; + + /* This leaves the timezone fields of struct tm + * uninitialized! */ + if (ioctl(fd, RTC_RD_TIME, tm) < 0) + return -errno; + + /* We don't know daylight saving, so we reset this in order not + * to confuse mktime(). */ + tm->tm_isdst = -1; + + return 0; +} + +int clock_set_hwclock(const struct tm *tm) { + _cleanup_close_ int fd = -1; + + assert(tm); + + fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC); + if (fd < 0) + return -errno; + + if (ioctl(fd, RTC_SET_TIME, tm) < 0) + return -errno; + + return 0; +} + +int clock_is_localtime(const char* adjtime_path) { + _cleanup_fclose_ FILE *f; + int r; + + if (!adjtime_path) + adjtime_path = "/etc/adjtime"; + + /* + * The third line of adjtime is "UTC" or "LOCAL" or nothing. + * # /etc/adjtime + * 0.0 0 0 + * 0 + * UTC + */ + f = fopen(adjtime_path, "re"); + if (f) { + _cleanup_free_ char *line = NULL; + unsigned i; + + for (i = 0; i < 2; i++) { /* skip the first two lines */ + r = read_line(f, LONG_LINE_MAX, NULL); + if (r < 0) + return r; + if (r == 0) + return false; /* less than three lines → default to UTC */ + } + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + return false; /* less than three lines → default to UTC */ + + return streq(line, "LOCAL"); + + } else if (errno != ENOENT) + return -errno; + + /* adjtime not present → default to UTC */ + return false; +} + +int clock_set_timezone(int *min) { + const struct timeval *tv_null = NULL; + struct timespec ts; + struct tm tm; + int minutesdelta; + struct timezone tz; + + assert_se(clock_gettime(CLOCK_REALTIME, &ts) == 0); + assert_se(localtime_r(&ts.tv_sec, &tm)); + minutesdelta = tm.tm_gmtoff / 60; + + tz.tz_minuteswest = -minutesdelta; + tz.tz_dsttime = 0; /* DST_NONE */ + + /* + * If the RTC does not run in UTC but in local time, the very first + * call to settimeofday() will set the kernel's timezone and will warp the + * system clock, so that it runs in UTC instead of the local time we + * have read from the RTC. + */ + if (settimeofday(tv_null, &tz) < 0) + return negative_errno(); + + if (min) + *min = minutesdelta; + return 0; +} + +int clock_reset_timewarp(void) { + const struct timeval *tv_null = NULL; + struct timezone tz; + + tz.tz_minuteswest = 0; + tz.tz_dsttime = 0; /* DST_NONE */ + + /* + * The very first call to settimeofday() does time warp magic. Do a + * dummy call here, so the time warping is sealed and all later calls + * behave as expected. + */ + if (settimeofday(tv_null, &tz) < 0) + return -errno; + + return 0; +} + +#define EPOCH_FILE "/usr/lib/clock-epoch" + +int clock_apply_epoch(void) { + struct stat st; + struct timespec ts; + usec_t epoch_usec; + + if (stat(EPOCH_FILE, &st) < 0) { + if (errno != ENOENT) + log_warning_errno(errno, "Cannot stat " EPOCH_FILE ": %m"); + + epoch_usec = (usec_t) TIME_EPOCH * USEC_PER_SEC; + } else + epoch_usec = timespec_load(&st.st_mtim); + + if (now(CLOCK_REALTIME) >= epoch_usec) + return 0; + + if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, epoch_usec)) < 0) + return -errno; + + return 1; +} diff --git a/src/shared/clock-util.h b/src/shared/clock-util.h new file mode 100644 index 0000000..3f1ae7a --- /dev/null +++ b/src/shared/clock-util.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <time.h> + +int clock_is_localtime(const char* adjtime_path); +int clock_set_timezone(int *min); +int clock_reset_timewarp(void); +int clock_get_hwclock(struct tm *tm); +int clock_set_hwclock(const struct tm *tm); +int clock_apply_epoch(void); diff --git a/src/shared/condition.c b/src/shared/condition.c new file mode 100644 index 0000000..b2ec690 --- /dev/null +++ b/src/shared/condition.c @@ -0,0 +1,973 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <limits.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/utsname.h> +#include <time.h> +#include <unistd.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "apparmor-util.h" +#include "architecture.h" +#include "audit-util.h" +#include "cap-list.h" +#include "cgroup-util.h" +#include "condition.h" +#include "cpu-set-util.h" +#include "efi-loader.h" +#include "env-file.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "glob-util.h" +#include "hostname-util.h" +#include "ima-util.h" +#include "limits-util.h" +#include "list.h" +#include "macro.h" +#include "mountpoint-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "proc-cmdline.h" +#include "process-util.h" +#include "selinux-util.h" +#include "smack-util.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "tomoyo-util.h" +#include "user-record.h" +#include "user-util.h" +#include "util.h" +#include "virt.h" + +Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate) { + Condition *c; + + assert(type >= 0); + assert(type < _CONDITION_TYPE_MAX); + assert(parameter); + + c = new(Condition, 1); + if (!c) + return NULL; + + *c = (Condition) { + .type = type, + .trigger = trigger, + .negate = negate, + }; + + if (parameter) { + c->parameter = strdup(parameter); + if (!c->parameter) + return mfree(c); + } + + return c; +} + +Condition* condition_free(Condition *c) { + assert(c); + + free(c->parameter); + return mfree(c); +} + +Condition* condition_free_list_type(Condition *head, ConditionType type) { + Condition *c, *n; + + LIST_FOREACH_SAFE(conditions, c, n, head) + if (type < 0 || c->type == type) { + LIST_REMOVE(conditions, head, c); + condition_free(c); + } + + assert(type >= 0 || !head); + return head; +} + +static int condition_test_kernel_command_line(Condition *c, char **env) { + _cleanup_free_ char *line = NULL; + const char *p; + bool equal; + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_KERNEL_COMMAND_LINE); + + r = proc_cmdline(&line); + if (r < 0) + return r; + + equal = strchr(c->parameter, '='); + + for (p = line;;) { + _cleanup_free_ char *word = NULL; + bool found; + + r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX); + if (r < 0) + return r; + if (r == 0) + break; + + if (equal) + found = streq(word, c->parameter); + else { + const char *f; + + f = startswith(word, c->parameter); + found = f && IN_SET(*f, 0, '='); + } + + if (found) + return true; + } + + return false; +} + +typedef enum { + /* Listed in order of checking. Note that some comparators are prefixes of others, hence the longest + * should be listed first. */ + ORDER_LOWER_OR_EQUAL, + ORDER_GREATER_OR_EQUAL, + ORDER_LOWER, + ORDER_GREATER, + ORDER_EQUAL, + ORDER_UNEQUAL, + _ORDER_MAX, + _ORDER_INVALID = -1 +} OrderOperator; + +static OrderOperator parse_order(const char **s) { + + static const char *const prefix[_ORDER_MAX] = { + [ORDER_LOWER_OR_EQUAL] = "<=", + [ORDER_GREATER_OR_EQUAL] = ">=", + [ORDER_LOWER] = "<", + [ORDER_GREATER] = ">", + [ORDER_EQUAL] = "=", + [ORDER_UNEQUAL] = "!=", + }; + + OrderOperator i; + + for (i = 0; i < _ORDER_MAX; i++) { + const char *e; + + e = startswith(*s, prefix[i]); + if (e) { + *s = e; + return i; + } + } + + return _ORDER_INVALID; +} + +static bool test_order(int k, OrderOperator p) { + + switch (p) { + + case ORDER_LOWER: + return k < 0; + + case ORDER_LOWER_OR_EQUAL: + return k <= 0; + + case ORDER_EQUAL: + return k == 0; + + case ORDER_UNEQUAL: + return k != 0; + + case ORDER_GREATER_OR_EQUAL: + return k >= 0; + + case ORDER_GREATER: + return k > 0; + + default: + assert_not_reached("unknown order"); + + } +} + +static int condition_test_kernel_version(Condition *c, char **env) { + OrderOperator order; + struct utsname u; + const char *p; + bool first = true; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_KERNEL_VERSION); + + assert_se(uname(&u) >= 0); + + p = c->parameter; + + for (;;) { + _cleanup_free_ char *word = NULL; + const char *s; + int r; + + r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE); + if (r < 0) + return log_debug_errno(r, "Failed to parse condition string \"%s\": %m", p); + if (r == 0) + break; + + s = strstrip(word); + order = parse_order(&s); + if (order >= 0) { + s += strspn(s, WHITESPACE); + if (isempty(s)) { + if (first) { + /* For backwards compatibility, allow whitespace between the operator and + * value, without quoting, but only in the first expression. */ + word = mfree(word); + r = extract_first_word(&p, &word, NULL, 0); + if (r < 0) + return log_debug_errno(r, "Failed to parse condition string \"%s\": %m", p); + if (r == 0) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected end of expression: %s", p); + s = word; + } else + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected end of expression: %s", p); + } + + r = test_order(str_verscmp(u.release, s), order); + } else + /* No prefix? Then treat as glob string */ + r = fnmatch(s, u.release, 0) == 0; + + if (r == 0) + return false; + + first = false; + } + + return true; +} + +static int condition_test_memory(Condition *c, char **env) { + OrderOperator order; + uint64_t m, k; + const char *p; + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_MEMORY); + + m = physical_memory(); + + p = c->parameter; + order = parse_order(&p); + if (order < 0) + order = ORDER_GREATER_OR_EQUAL; /* default to >= check, if nothing is specified. */ + + r = safe_atou64(p, &k); + if (r < 0) + return log_debug_errno(r, "Failed to parse size: %m"); + + return test_order(CMP(m, k), order); +} + +static int condition_test_cpus(Condition *c, char **env) { + OrderOperator order; + const char *p; + unsigned k; + int r, n; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_CPUS); + + n = cpus_in_affinity_mask(); + if (n < 0) + return log_debug_errno(n, "Failed to determine CPUs in affinity mask: %m"); + + p = c->parameter; + order = parse_order(&p); + if (order < 0) + order = ORDER_GREATER_OR_EQUAL; /* default to >= check, if nothing is specified. */ + + r = safe_atou(p, &k); + if (r < 0) + return log_debug_errno(r, "Failed to parse number of CPUs: %m"); + + return test_order(CMP((unsigned) n, k), order); +} + +static int condition_test_user(Condition *c, char **env) { + uid_t id; + int r; + _cleanup_free_ char *username = NULL; + const char *u; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_USER); + + r = parse_uid(c->parameter, &id); + if (r >= 0) + return id == getuid() || id == geteuid(); + + if (streq("@system", c->parameter)) + return uid_is_system(getuid()) || uid_is_system(geteuid()); + + username = getusername_malloc(); + if (!username) + return -ENOMEM; + + if (streq(username, c->parameter)) + return 1; + + if (getpid_cached() == 1) + return streq(c->parameter, "root"); + + u = c->parameter; + r = get_user_creds(&u, &id, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING); + if (r < 0) + return 0; + + return id == getuid() || id == geteuid(); +} + +static int condition_test_control_group_controller(Condition *c, char **env) { + int r; + CGroupMask system_mask, wanted_mask = 0; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_CONTROL_GROUP_CONTROLLER); + + r = cg_mask_supported(&system_mask); + if (r < 0) + return log_debug_errno(r, "Failed to determine supported controllers: %m"); + + r = cg_mask_from_string(c->parameter, &wanted_mask); + if (r < 0 || wanted_mask <= 0) { + /* This won't catch the case that we have an unknown controller + * mixed in with valid ones -- these are only assessed on the + * validity of the valid controllers found. */ + log_debug("Failed to parse cgroup string: %s", c->parameter); + return 1; + } + + return FLAGS_SET(system_mask, wanted_mask); +} + +static int condition_test_group(Condition *c, char **env) { + gid_t id; + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_GROUP); + + r = parse_gid(c->parameter, &id); + if (r >= 0) + return in_gid(id); + + /* Avoid any NSS lookups if we are PID1 */ + if (getpid_cached() == 1) + return streq(c->parameter, "root"); + + return in_group(c->parameter) > 0; +} + +static int condition_test_virtualization(Condition *c, char **env) { + int b, v; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_VIRTUALIZATION); + + if (streq(c->parameter, "private-users")) + return running_in_userns(); + + v = detect_virtualization(); + if (v < 0) + return v; + + /* First, compare with yes/no */ + b = parse_boolean(c->parameter); + if (b >= 0) + return b == !!v; + + /* Then, compare categorization */ + if (streq(c->parameter, "vm")) + return VIRTUALIZATION_IS_VM(v); + + if (streq(c->parameter, "container")) + return VIRTUALIZATION_IS_CONTAINER(v); + + /* Finally compare id */ + return v != VIRTUALIZATION_NONE && streq(c->parameter, virtualization_to_string(v)); +} + +static int condition_test_architecture(Condition *c, char **env) { + int a, b; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_ARCHITECTURE); + + a = uname_architecture(); + if (a < 0) + return a; + + if (streq(c->parameter, "native")) + b = native_architecture(); + else { + b = architecture_from_string(c->parameter); + if (b < 0) /* unknown architecture? Then it's definitely not ours */ + return false; + } + + return a == b; +} + +static int condition_test_host(Condition *c, char **env) { + _cleanup_free_ char *h = NULL; + sd_id128_t x, y; + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_HOST); + + if (sd_id128_from_string(c->parameter, &x) >= 0) { + + r = sd_id128_get_machine(&y); + if (r < 0) + return r; + + return sd_id128_equal(x, y); + } + + h = gethostname_malloc(); + if (!h) + return -ENOMEM; + + return fnmatch(c->parameter, h, FNM_CASEFOLD) == 0; +} + +static int condition_test_ac_power(Condition *c, char **env) { + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_AC_POWER); + + r = parse_boolean(c->parameter); + if (r < 0) + return r; + + return (on_ac_power() != 0) == !!r; +} + +static int condition_test_security(Condition *c, char **env) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_SECURITY); + + if (streq(c->parameter, "selinux")) + return mac_selinux_use(); + if (streq(c->parameter, "smack")) + return mac_smack_use(); + if (streq(c->parameter, "apparmor")) + return mac_apparmor_use(); + if (streq(c->parameter, "audit")) + return use_audit(); + if (streq(c->parameter, "ima")) + return use_ima(); + if (streq(c->parameter, "tomoyo")) + return mac_tomoyo_use(); + if (streq(c->parameter, "uefi-secureboot")) + return is_efi_secure_boot(); + + return false; +} + +static int condition_test_capability(Condition *c, char **env) { + unsigned long long capabilities = (unsigned long long) -1; + _cleanup_fclose_ FILE *f = NULL; + int value, r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_CAPABILITY); + + /* If it's an invalid capability, we don't have it */ + value = capability_from_name(c->parameter); + if (value < 0) + return -EINVAL; + + /* If it's a valid capability we default to assume + * that we have it */ + + f = fopen("/proc/self/status", "re"); + if (!f) + return -errno; + + for (;;) { + _cleanup_free_ char *line = NULL; + const char *p; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + p = startswith(line, "CapBnd:"); + if (p) { + if (sscanf(line+7, "%llx", &capabilities) != 1) + return -EIO; + + break; + } + } + + return !!(capabilities & (1ULL << value)); +} + +static int condition_test_needs_update(Condition *c, char **env) { + struct stat usr, other; + const char *p; + bool b; + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_NEEDS_UPDATE); + + r = proc_cmdline_get_bool("systemd.condition-needs-update", &b); + if (r < 0) + log_debug_errno(r, "Failed to parse systemd.condition-needs-update= kernel command line argument, ignoring: %m"); + if (r > 0) + return b; + + if (!path_is_absolute(c->parameter)) { + log_debug("Specified condition parameter '%s' is not absolute, assuming an update is needed.", c->parameter); + return true; + } + + /* If the file system is read-only we shouldn't suggest an update */ + r = path_is_read_only_fs(c->parameter); + if (r < 0) + log_debug_errno(r, "Failed to determine if '%s' is read-only, ignoring: %m", c->parameter); + if (r > 0) + return false; + + /* Any other failure means we should allow the condition to be true, so that we rather invoke too + * many update tools than too few. */ + + p = strjoina(c->parameter, "/.updated"); + if (lstat(p, &other) < 0) { + if (errno != ENOENT) + log_debug_errno(errno, "Failed to stat() '%s', assuming an update is needed: %m", p); + return true; + } + + if (lstat("/usr/", &usr) < 0) { + log_debug_errno(errno, "Failed to stat() /usr/, assuming an update is needed: %m"); + return true; + } + + /* + * First, compare seconds as they are always accurate... + */ + if (usr.st_mtim.tv_sec != other.st_mtim.tv_sec) + return usr.st_mtim.tv_sec > other.st_mtim.tv_sec; + + /* + * ...then compare nanoseconds. + * + * A false positive is only possible when /usr's nanoseconds > 0 + * (otherwise /usr cannot be strictly newer than the target file) + * AND the target file's nanoseconds == 0 + * (otherwise the filesystem supports nsec timestamps, see stat(2)). + */ + if (usr.st_mtim.tv_nsec == 0 || other.st_mtim.tv_nsec > 0) + return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec; + + _cleanup_free_ char *timestamp_str = NULL; + r = parse_env_file(NULL, p, "TIMESTAMP_NSEC", ×tamp_str); + if (r < 0) { + log_debug_errno(r, "Failed to parse timestamp file '%s', using mtime: %m", p); + return true; + } else if (r == 0) { + log_debug("No data in timestamp file '%s', using mtime.", p); + return true; + } + + uint64_t timestamp; + r = safe_atou64(timestamp_str, ×tamp); + if (r < 0) { + log_debug_errno(r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", timestamp_str, p); + return true; + } + + return timespec_load_nsec(&usr.st_mtim) > timestamp; +} + +static int condition_test_first_boot(Condition *c, char **env) { + int r, q; + bool b; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_FIRST_BOOT); + + r = proc_cmdline_get_bool("systemd.condition-first-boot", &b); + if (r < 0) + log_debug_errno(r, "Failed to parse systemd.condition-first-boot= kernel command line argument, ignoring: %m"); + if (r > 0) + return b == !!r; + + r = parse_boolean(c->parameter); + if (r < 0) + return r; + + q = access("/run/systemd/first-boot", F_OK); + if (q < 0 && errno != ENOENT) + log_debug_errno(errno, "Failed to check if /run/systemd/first-boot exists, ignoring: %m"); + + return (q >= 0) == !!r; +} + +static int condition_test_environment(Condition *c, char **env) { + bool equal; + char **i; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_ENVIRONMENT); + + equal = strchr(c->parameter, '='); + + STRV_FOREACH(i, env) { + bool found; + + if (equal) + found = streq(c->parameter, *i); + else { + const char *f; + + f = startswith(*i, c->parameter); + found = f && IN_SET(*f, 0, '='); + } + + if (found) + return true; + } + + return false; +} + +static int condition_test_path_exists(Condition *c, char **env) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_EXISTS); + + return access(c->parameter, F_OK) >= 0; +} + +static int condition_test_path_exists_glob(Condition *c, char **env) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_EXISTS_GLOB); + + return glob_exists(c->parameter) > 0; +} + +static int condition_test_path_is_directory(Condition *c, char **env) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_IS_DIRECTORY); + + return is_dir(c->parameter, true) > 0; +} + +static int condition_test_path_is_symbolic_link(Condition *c, char **env) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_IS_SYMBOLIC_LINK); + + return is_symlink(c->parameter) > 0; +} + +static int condition_test_path_is_mount_point(Condition *c, char **env) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_IS_MOUNT_POINT); + + return path_is_mount_point(c->parameter, NULL, AT_SYMLINK_FOLLOW) > 0; +} + +static int condition_test_path_is_read_write(Condition *c, char **env) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_IS_READ_WRITE); + + return path_is_read_only_fs(c->parameter) <= 0; +} + +static int condition_test_path_is_encrypted(Condition *c, char **env) { + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_IS_ENCRYPTED); + + r = path_is_encrypted(c->parameter); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to determine if '%s' is encrypted: %m", c->parameter); + + return r > 0; +} + +static int condition_test_directory_not_empty(Condition *c, char **env) { + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_DIRECTORY_NOT_EMPTY); + + r = dir_is_empty(c->parameter); + return r <= 0 && r != -ENOENT; +} + +static int condition_test_file_not_empty(Condition *c, char **env) { + struct stat st; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_FILE_NOT_EMPTY); + + return (stat(c->parameter, &st) >= 0 && + S_ISREG(st.st_mode) && + st.st_size > 0); +} + +static int condition_test_file_is_executable(Condition *c, char **env) { + struct stat st; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_FILE_IS_EXECUTABLE); + + return (stat(c->parameter, &st) >= 0 && + S_ISREG(st.st_mode) && + (st.st_mode & 0111)); +} + +int condition_test(Condition *c, char **env) { + + static int (*const condition_tests[_CONDITION_TYPE_MAX])(Condition *c, char **env) = { + [CONDITION_PATH_EXISTS] = condition_test_path_exists, + [CONDITION_PATH_EXISTS_GLOB] = condition_test_path_exists_glob, + [CONDITION_PATH_IS_DIRECTORY] = condition_test_path_is_directory, + [CONDITION_PATH_IS_SYMBOLIC_LINK] = condition_test_path_is_symbolic_link, + [CONDITION_PATH_IS_MOUNT_POINT] = condition_test_path_is_mount_point, + [CONDITION_PATH_IS_READ_WRITE] = condition_test_path_is_read_write, + [CONDITION_PATH_IS_ENCRYPTED] = condition_test_path_is_encrypted, + [CONDITION_DIRECTORY_NOT_EMPTY] = condition_test_directory_not_empty, + [CONDITION_FILE_NOT_EMPTY] = condition_test_file_not_empty, + [CONDITION_FILE_IS_EXECUTABLE] = condition_test_file_is_executable, + [CONDITION_KERNEL_COMMAND_LINE] = condition_test_kernel_command_line, + [CONDITION_KERNEL_VERSION] = condition_test_kernel_version, + [CONDITION_VIRTUALIZATION] = condition_test_virtualization, + [CONDITION_SECURITY] = condition_test_security, + [CONDITION_CAPABILITY] = condition_test_capability, + [CONDITION_HOST] = condition_test_host, + [CONDITION_AC_POWER] = condition_test_ac_power, + [CONDITION_ARCHITECTURE] = condition_test_architecture, + [CONDITION_NEEDS_UPDATE] = condition_test_needs_update, + [CONDITION_FIRST_BOOT] = condition_test_first_boot, + [CONDITION_USER] = condition_test_user, + [CONDITION_GROUP] = condition_test_group, + [CONDITION_CONTROL_GROUP_CONTROLLER] = condition_test_control_group_controller, + [CONDITION_CPUS] = condition_test_cpus, + [CONDITION_MEMORY] = condition_test_memory, + [CONDITION_ENVIRONMENT] = condition_test_environment, + }; + + int r, b; + + assert(c); + assert(c->type >= 0); + assert(c->type < _CONDITION_TYPE_MAX); + + r = condition_tests[c->type](c, env); + if (r < 0) { + c->result = CONDITION_ERROR; + return r; + } + + b = (r > 0) == !c->negate; + c->result = b ? CONDITION_SUCCEEDED : CONDITION_FAILED; + return b; +} + +bool condition_test_list( + Condition *first, + char **env, + condition_to_string_t to_string, + condition_test_logger_t logger, + void *userdata) { + + Condition *c; + int triggered = -1; + + assert(!!logger == !!to_string); + + /* If the condition list is empty, then it is true */ + if (!first) + return true; + + /* Otherwise, if all of the non-trigger conditions apply and + * if any of the trigger conditions apply (unless there are + * none) we return true */ + LIST_FOREACH(conditions, c, first) { + int r; + + r = condition_test(c, env); + + if (logger) { + if (r < 0) + logger(userdata, LOG_WARNING, r, PROJECT_FILE, __LINE__, __func__, + "Couldn't determine result for %s=%s%s%s, assuming failed: %m", + to_string(c->type), + c->trigger ? "|" : "", + c->negate ? "!" : "", + c->parameter); + else + logger(userdata, LOG_DEBUG, 0, PROJECT_FILE, __LINE__, __func__, + "%s=%s%s%s %s.", + to_string(c->type), + c->trigger ? "|" : "", + c->negate ? "!" : "", + c->parameter, + condition_result_to_string(c->result)); + } + + if (!c->trigger && r <= 0) + return false; + + if (c->trigger && triggered <= 0) + triggered = r > 0; + } + + return triggered != 0; +} + +void condition_dump(Condition *c, FILE *f, const char *prefix, condition_to_string_t to_string) { + assert(c); + assert(f); + assert(to_string); + + prefix = strempty(prefix); + + fprintf(f, + "%s\t%s: %s%s%s %s\n", + prefix, + to_string(c->type), + c->trigger ? "|" : "", + c->negate ? "!" : "", + c->parameter, + condition_result_to_string(c->result)); +} + +void condition_dump_list(Condition *first, FILE *f, const char *prefix, condition_to_string_t to_string) { + Condition *c; + + LIST_FOREACH(conditions, c, first) + condition_dump(c, f, prefix, to_string); +} + +static const char* const condition_type_table[_CONDITION_TYPE_MAX] = { + [CONDITION_ARCHITECTURE] = "ConditionArchitecture", + [CONDITION_VIRTUALIZATION] = "ConditionVirtualization", + [CONDITION_HOST] = "ConditionHost", + [CONDITION_KERNEL_COMMAND_LINE] = "ConditionKernelCommandLine", + [CONDITION_KERNEL_VERSION] = "ConditionKernelVersion", + [CONDITION_SECURITY] = "ConditionSecurity", + [CONDITION_CAPABILITY] = "ConditionCapability", + [CONDITION_AC_POWER] = "ConditionACPower", + [CONDITION_NEEDS_UPDATE] = "ConditionNeedsUpdate", + [CONDITION_FIRST_BOOT] = "ConditionFirstBoot", + [CONDITION_PATH_EXISTS] = "ConditionPathExists", + [CONDITION_PATH_EXISTS_GLOB] = "ConditionPathExistsGlob", + [CONDITION_PATH_IS_DIRECTORY] = "ConditionPathIsDirectory", + [CONDITION_PATH_IS_SYMBOLIC_LINK] = "ConditionPathIsSymbolicLink", + [CONDITION_PATH_IS_MOUNT_POINT] = "ConditionPathIsMountPoint", + [CONDITION_PATH_IS_READ_WRITE] = "ConditionPathIsReadWrite", + [CONDITION_PATH_IS_ENCRYPTED] = "ConditionPathIsEncrypted", + [CONDITION_DIRECTORY_NOT_EMPTY] = "ConditionDirectoryNotEmpty", + [CONDITION_FILE_NOT_EMPTY] = "ConditionFileNotEmpty", + [CONDITION_FILE_IS_EXECUTABLE] = "ConditionFileIsExecutable", + [CONDITION_USER] = "ConditionUser", + [CONDITION_GROUP] = "ConditionGroup", + [CONDITION_CONTROL_GROUP_CONTROLLER] = "ConditionControlGroupController", + [CONDITION_CPUS] = "ConditionCPUs", + [CONDITION_MEMORY] = "ConditionMemory", + [CONDITION_ENVIRONMENT] = "ConditionEnvironment", +}; + +DEFINE_STRING_TABLE_LOOKUP(condition_type, ConditionType); + +static const char* const assert_type_table[_CONDITION_TYPE_MAX] = { + [CONDITION_ARCHITECTURE] = "AssertArchitecture", + [CONDITION_VIRTUALIZATION] = "AssertVirtualization", + [CONDITION_HOST] = "AssertHost", + [CONDITION_KERNEL_COMMAND_LINE] = "AssertKernelCommandLine", + [CONDITION_KERNEL_VERSION] = "AssertKernelVersion", + [CONDITION_SECURITY] = "AssertSecurity", + [CONDITION_CAPABILITY] = "AssertCapability", + [CONDITION_AC_POWER] = "AssertACPower", + [CONDITION_NEEDS_UPDATE] = "AssertNeedsUpdate", + [CONDITION_FIRST_BOOT] = "AssertFirstBoot", + [CONDITION_PATH_EXISTS] = "AssertPathExists", + [CONDITION_PATH_EXISTS_GLOB] = "AssertPathExistsGlob", + [CONDITION_PATH_IS_DIRECTORY] = "AssertPathIsDirectory", + [CONDITION_PATH_IS_SYMBOLIC_LINK] = "AssertPathIsSymbolicLink", + [CONDITION_PATH_IS_MOUNT_POINT] = "AssertPathIsMountPoint", + [CONDITION_PATH_IS_READ_WRITE] = "AssertPathIsReadWrite", + [CONDITION_PATH_IS_ENCRYPTED] = "AssertPathIsEncrypted", + [CONDITION_DIRECTORY_NOT_EMPTY] = "AssertDirectoryNotEmpty", + [CONDITION_FILE_NOT_EMPTY] = "AssertFileNotEmpty", + [CONDITION_FILE_IS_EXECUTABLE] = "AssertFileIsExecutable", + [CONDITION_USER] = "AssertUser", + [CONDITION_GROUP] = "AssertGroup", + [CONDITION_CONTROL_GROUP_CONTROLLER] = "AssertControlGroupController", + [CONDITION_CPUS] = "AssertCPUs", + [CONDITION_MEMORY] = "AssertMemory", + [CONDITION_ENVIRONMENT] = "AssertEnvironment", +}; + +DEFINE_STRING_TABLE_LOOKUP(assert_type, ConditionType); + +static const char* const condition_result_table[_CONDITION_RESULT_MAX] = { + [CONDITION_UNTESTED] = "untested", + [CONDITION_SUCCEEDED] = "succeeded", + [CONDITION_FAILED] = "failed", + [CONDITION_ERROR] = "error", +}; + +DEFINE_STRING_TABLE_LOOKUP(condition_result, ConditionResult); diff --git a/src/shared/condition.h b/src/shared/condition.h new file mode 100644 index 0000000..0d9754e --- /dev/null +++ b/src/shared/condition.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stdio.h> + +#include "list.h" +#include "macro.h" + +typedef enum ConditionType { + CONDITION_ARCHITECTURE, + CONDITION_VIRTUALIZATION, + CONDITION_HOST, + CONDITION_KERNEL_COMMAND_LINE, + CONDITION_KERNEL_VERSION, + CONDITION_SECURITY, + CONDITION_CAPABILITY, + CONDITION_AC_POWER, + CONDITION_MEMORY, + CONDITION_CPUS, + CONDITION_ENVIRONMENT, + + CONDITION_NEEDS_UPDATE, + CONDITION_FIRST_BOOT, + + CONDITION_PATH_EXISTS, + CONDITION_PATH_EXISTS_GLOB, + CONDITION_PATH_IS_DIRECTORY, + CONDITION_PATH_IS_SYMBOLIC_LINK, + CONDITION_PATH_IS_MOUNT_POINT, + CONDITION_PATH_IS_READ_WRITE, + CONDITION_PATH_IS_ENCRYPTED, + CONDITION_DIRECTORY_NOT_EMPTY, + CONDITION_FILE_NOT_EMPTY, + CONDITION_FILE_IS_EXECUTABLE, + + CONDITION_USER, + CONDITION_GROUP, + + CONDITION_CONTROL_GROUP_CONTROLLER, + + _CONDITION_TYPE_MAX, + _CONDITION_TYPE_INVALID = -1 +} ConditionType; + +typedef enum ConditionResult { + CONDITION_UNTESTED, + CONDITION_SUCCEEDED, + CONDITION_FAILED, + CONDITION_ERROR, + _CONDITION_RESULT_MAX, + _CONDITION_RESULT_INVALID = -1 +} ConditionResult; + +typedef struct Condition { + ConditionType type:8; + + bool trigger:1; + bool negate:1; + + ConditionResult result:6; + + char *parameter; + + LIST_FIELDS(struct Condition, conditions); +} Condition; + +Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate); +Condition* condition_free(Condition *c); +Condition* condition_free_list_type(Condition *first, ConditionType type); +static inline Condition* condition_free_list(Condition *first) { + return condition_free_list_type(first, _CONDITION_TYPE_INVALID); +} + +int condition_test(Condition *c, char **env); + +typedef int (*condition_test_logger_t)(void *userdata, int level, int error, const char *file, int line, const char *func, const char *format, ...) _printf_(7, 8); +typedef const char* (*condition_to_string_t)(ConditionType t) _const_; +bool condition_test_list(Condition *first, char **env, condition_to_string_t to_string, condition_test_logger_t logger, void *userdata); + +void condition_dump(Condition *c, FILE *f, const char *prefix, condition_to_string_t to_string); +void condition_dump_list(Condition *c, FILE *f, const char *prefix, condition_to_string_t to_string); + +const char* condition_type_to_string(ConditionType t) _const_; +ConditionType condition_type_from_string(const char *s) _pure_; + +const char* assert_type_to_string(ConditionType t) _const_; +ConditionType assert_type_from_string(const char *s) _pure_; + +const char* condition_result_to_string(ConditionResult r) _const_; +ConditionResult condition_result_from_string(const char *s) _pure_; + +static inline bool condition_takes_path(ConditionType t) { + return IN_SET(t, + CONDITION_PATH_EXISTS, + CONDITION_PATH_EXISTS_GLOB, + CONDITION_PATH_IS_DIRECTORY, + CONDITION_PATH_IS_SYMBOLIC_LINK, + CONDITION_PATH_IS_MOUNT_POINT, + CONDITION_PATH_IS_READ_WRITE, + CONDITION_PATH_IS_ENCRYPTED, + CONDITION_DIRECTORY_NOT_EMPTY, + CONDITION_FILE_NOT_EMPTY, + CONDITION_FILE_IS_EXECUTABLE, + CONDITION_NEEDS_UPDATE); +} diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c new file mode 100644 index 0000000..35d301d --- /dev/null +++ b/src/shared/conf-parser.c @@ -0,0 +1,1247 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> + +#include "alloc-util.h" +#include "conf-files.h" +#include "conf-parser.h" +#include "def.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "log.h" +#include "macro.h" +#include "missing_network.h" +#include "nulstr-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "rlimit-util.h" +#include "sd-id128.h" +#include "signal-util.h" +#include "socket-util.h" +#include "string-util.h" +#include "strv.h" +#include "syslog-util.h" +#include "time-util.h" +#include "utf8.h" + +int config_item_table_lookup( + const void *table, + const char *section, + const char *lvalue, + ConfigParserCallback *func, + int *ltype, + void **data, + void *userdata) { + + const ConfigTableItem *t; + + assert(table); + assert(lvalue); + assert(func); + assert(ltype); + assert(data); + + for (t = table; t->lvalue; t++) { + + if (!streq(lvalue, t->lvalue)) + continue; + + if (!streq_ptr(section, t->section)) + continue; + + *func = t->parse; + *ltype = t->ltype; + *data = t->data; + return 1; + } + + return 0; +} + +int config_item_perf_lookup( + const void *table, + const char *section, + const char *lvalue, + ConfigParserCallback *func, + int *ltype, + void **data, + void *userdata) { + + ConfigPerfItemLookup lookup = (ConfigPerfItemLookup) table; + const ConfigPerfItem *p; + + assert(table); + assert(lvalue); + assert(func); + assert(ltype); + assert(data); + + if (section) { + const char *key; + + key = strjoina(section, ".", lvalue); + p = lookup(key, strlen(key)); + } else + p = lookup(lvalue, strlen(lvalue)); + if (!p) + return 0; + + *func = p->parse; + *ltype = p->ltype; + *data = (uint8_t*) userdata + p->offset; + return 1; +} + +/* Run the user supplied parser for an assignment */ +static int next_assignment( + const char *unit, + const char *filename, + unsigned line, + ConfigItemLookup lookup, + const void *table, + const char *section, + unsigned section_line, + const char *lvalue, + const char *rvalue, + ConfigParseFlags flags, + void *userdata) { + + ConfigParserCallback func = NULL; + int ltype = 0; + void *data = NULL; + int r; + + assert(filename); + assert(line > 0); + assert(lookup); + assert(lvalue); + assert(rvalue); + + r = lookup(table, section, lvalue, &func, <ype, &data, userdata); + if (r < 0) + return r; + if (r > 0) { + if (func) + return func(unit, filename, line, section, section_line, + lvalue, ltype, rvalue, data, userdata); + + return 0; + } + + /* Warn about unknown non-extension fields. */ + if (!(flags & CONFIG_PARSE_RELAXED) && !startswith(lvalue, "X-")) + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Unknown key name '%s' in section '%s', ignoring.", lvalue, section); + + return 0; +} + +/* Parse a single logical line */ +static int parse_line( + const char* unit, + const char *filename, + unsigned line, + const char *sections, + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + char **section, + unsigned *section_line, + bool *section_ignored, + char *l, + void *userdata) { + + char *e; + + assert(filename); + assert(line > 0); + assert(lookup); + assert(l); + + l = strstrip(l); + if (!*l) + return 0; + + if (*l == '\n') + return 0; + + if (!utf8_is_valid(l)) + return log_syntax_invalid_utf8(unit, LOG_WARNING, filename, line, l); + + if (*l == '[') { + size_t k; + char *n; + + k = strlen(l); + assert(k > 0); + + if (l[k-1] != ']') + return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EBADMSG), "Invalid section header '%s'", l); + + n = strndup(l+1, k-2); + if (!n) + return log_oom(); + + if (sections && !nulstr_contains(sections, n)) { + bool ignore = flags & CONFIG_PARSE_RELAXED; + const char *t; + + ignore = ignore || startswith(n, "X-"); + + if (!ignore) + NULSTR_FOREACH(t, sections) + if (streq_ptr(n, startswith(t, "-"))) { + ignore = true; + break; + } + + if (!ignore) + log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown section '%s'. Ignoring.", n); + + free(n); + *section = mfree(*section); + *section_line = 0; + *section_ignored = true; + } else { + free_and_replace(*section, n); + *section_line = line; + *section_ignored = false; + } + + return 0; + } + + if (sections && !*section) { + if (!(flags & CONFIG_PARSE_RELAXED) && !*section_ignored) + log_syntax(unit, LOG_WARNING, filename, line, 0, "Assignment outside of section. Ignoring."); + + return 0; + } + + e = strchr(l, '='); + if (!e) + return log_syntax(unit, LOG_WARNING, filename, line, 0, + "Missing '=', ignoring line."); + if (e == l) + return log_syntax(unit, LOG_WARNING, filename, line, 0, + "Missing key name before '=', ignoring line."); + + *e = 0; + e++; + + return next_assignment(unit, + filename, + line, + lookup, + table, + *section, + *section_line, + strstrip(l), + strstrip(e), + flags, + userdata); +} + +/* Go through the file and parse each line */ +int config_parse(const char *unit, + const char *filename, + FILE *f, + const char *sections, + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata, + usec_t *ret_mtime) { + + _cleanup_free_ char *section = NULL, *continuation = NULL; + _cleanup_fclose_ FILE *ours = NULL; + unsigned line = 0, section_line = 0; + bool section_ignored = false, bom_seen = false; + int r, fd; + usec_t mtime; + + assert(filename); + assert(lookup); + + if (!f) { + f = ours = fopen(filename, "re"); + if (!f) { + /* Only log on request, except for ENOENT, + * since we return 0 to the caller. */ + if ((flags & CONFIG_PARSE_WARN) || errno == ENOENT) + log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, + "Failed to open configuration file '%s': %m", filename); + return errno == ENOENT ? 0 : -errno; + } + } + + fd = fileno(f); + if (fd >= 0) { /* stream might not have an fd, let's be careful hence */ + struct stat st; + + if (fstat(fd, &st) < 0) + return log_full_errno(FLAGS_SET(flags, CONFIG_PARSE_WARN) ? LOG_ERR : LOG_DEBUG, errno, + "Failed to fstat(%s): %m", filename); + + (void) stat_warn_permissions(filename, &st); + mtime = timespec_load(&st.st_mtim); + } + + for (;;) { + _cleanup_free_ char *buf = NULL; + bool escaped = false; + char *l, *p, *e; + + r = read_line(f, LONG_LINE_MAX, &buf); + if (r == 0) + break; + if (r == -ENOBUFS) { + if (flags & CONFIG_PARSE_WARN) + log_error_errno(r, "%s:%u: Line too long", filename, line); + + return r; + } + if (r < 0) { + if (FLAGS_SET(flags, CONFIG_PARSE_WARN)) + log_error_errno(r, "%s:%u: Error while reading configuration file: %m", filename, line); + + return r; + } + + line++; + + l = skip_leading_chars(buf, WHITESPACE); + if (*l != '\0' && strchr(COMMENTS, *l)) + continue; + + l = buf; + if (!bom_seen) { + char *q; + + q = startswith(buf, UTF8_BYTE_ORDER_MARK); + if (q) { + l = q; + bom_seen = true; + } + } + + if (continuation) { + if (strlen(continuation) + strlen(l) > LONG_LINE_MAX) { + if (flags & CONFIG_PARSE_WARN) + log_error("%s:%u: Continuation line too long", filename, line); + return -ENOBUFS; + } + + if (!strextend(&continuation, l, NULL)) { + if (flags & CONFIG_PARSE_WARN) + log_oom(); + return -ENOMEM; + } + + p = continuation; + } else + p = l; + + for (e = p; *e; e++) { + if (escaped) + escaped = false; + else if (*e == '\\') + escaped = true; + } + + if (escaped) { + *(e-1) = ' '; + + if (!continuation) { + continuation = strdup(l); + if (!continuation) { + if (flags & CONFIG_PARSE_WARN) + log_oom(); + return -ENOMEM; + } + } + + continue; + } + + r = parse_line(unit, + filename, + line, + sections, + lookup, + table, + flags, + §ion, + §ion_line, + §ion_ignored, + p, + userdata); + if (r < 0) { + if (flags & CONFIG_PARSE_WARN) + log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line); + return r; + } + + continuation = mfree(continuation); + } + + if (continuation) { + r = parse_line(unit, + filename, + ++line, + sections, + lookup, + table, + flags, + §ion, + §ion_line, + §ion_ignored, + continuation, + userdata); + if (r < 0) { + if (flags & CONFIG_PARSE_WARN) + log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line); + return r; + } + } + + if (ret_mtime) + *ret_mtime = mtime; + + return 0; +} + +static int config_parse_many_files( + const char *conf_file, + char **files, + const char *sections, + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata, + usec_t *ret_mtime) { + + usec_t mtime = 0; + char **fn; + int r; + + if (conf_file) { + r = config_parse(NULL, conf_file, NULL, sections, lookup, table, flags, userdata, &mtime); + if (r < 0) + return r; + } + + STRV_FOREACH(fn, files) { + usec_t t; + + r = config_parse(NULL, *fn, NULL, sections, lookup, table, flags, userdata, &t); + if (r < 0) + return r; + if (t > mtime) /* Find the newest */ + mtime = t; + } + + if (ret_mtime) + *ret_mtime = mtime; + + return 0; +} + +/* Parse each config file in the directories specified as nulstr. */ +int config_parse_many_nulstr( + const char *conf_file, + const char *conf_file_dirs, + const char *sections, + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata, + usec_t *ret_mtime) { + + _cleanup_strv_free_ char **files = NULL; + int r; + + r = conf_files_list_nulstr(&files, ".conf", NULL, 0, conf_file_dirs); + if (r < 0) + return r; + + return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata, ret_mtime); +} + +/* Parse each config file in the directories specified as strv. */ +int config_parse_many( + const char *conf_file, + const char* const* conf_file_dirs, + const char *dropin_dirname, + const char *sections, + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata, + usec_t *ret_mtime) { + + _cleanup_strv_free_ char **dropin_dirs = NULL; + _cleanup_strv_free_ char **files = NULL; + const char *suffix; + int r; + + suffix = strjoina("/", dropin_dirname); + r = strv_extend_strv_concat(&dropin_dirs, (char**) conf_file_dirs, suffix); + if (r < 0) + return r; + + r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char* const*) dropin_dirs); + if (r < 0) + return r; + + return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata, ret_mtime); +} + +#define DEFINE_PARSER(type, vartype, conv_func) \ + DEFINE_CONFIG_PARSE_PTR(config_parse_##type, conv_func, vartype, "Failed to parse " #type " value") + +DEFINE_PARSER(int, int, safe_atoi); +DEFINE_PARSER(long, long, safe_atoli); +DEFINE_PARSER(uint8, uint8_t, safe_atou8); +DEFINE_PARSER(uint16, uint16_t, safe_atou16); +DEFINE_PARSER(uint32, uint32_t, safe_atou32); +DEFINE_PARSER(int32, int32_t, safe_atoi32); +DEFINE_PARSER(uint64, uint64_t, safe_atou64); +DEFINE_PARSER(unsigned, unsigned, safe_atou); +DEFINE_PARSER(double, double, safe_atod); +DEFINE_PARSER(nsec, nsec_t, parse_nsec); +DEFINE_PARSER(sec, usec_t, parse_sec); +DEFINE_PARSER(sec_def_infinity, usec_t, parse_sec_def_infinity); +DEFINE_PARSER(mode, mode_t, parse_mode); + +int config_parse_iec_size(const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + size_t *sz = data; + uint64_t v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + r = parse_size(rvalue, 1024, &v); + if (r >= 0 && (uint64_t) (size_t) v != v) + r = -ERANGE; + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue); + return 0; + } + + *sz = (size_t) v; + return 0; +} + +int config_parse_si_uint64( + const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint64_t *sz = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + r = parse_size(rvalue, 1000, sz); + if (r < 0) + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue); + + return 0; +} + +int config_parse_iec_uint64( + const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint64_t *bytes = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + r = parse_size(rvalue, 1024, bytes); + if (r < 0) + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse size value, ignoring: %s", rvalue); + + return 0; +} + +int config_parse_bool(const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int k; + bool *b = data; + bool fatal = ltype; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + k = parse_boolean(rvalue); + if (k < 0) { + log_syntax(unit, fatal ? LOG_ERR : LOG_WARNING, filename, line, k, + "Failed to parse boolean value%s: %s", + fatal ? "" : ", ignoring", rvalue); + return fatal ? -ENOEXEC : 0; + } + + *b = k; + return 0; +} + +int config_parse_id128( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + sd_id128_t t, *result = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = sd_id128_from_string(rvalue, &t); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse 128bit ID/UUID, ignoring: %s", rvalue); + return 0; + } + + if (sd_id128_is_null(t)) { + log_syntax(unit, LOG_WARNING, filename, line, 0, "128bit ID/UUID is all 0, ignoring: %s", rvalue); + return 0; + } + + *result = t; + return 0; +} + +int config_parse_tristate( + const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int k, *t = data; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + /* A tristate is pretty much a boolean, except that it can + * also take the special value -1, indicating "uninitialized", + * much like NULL is for a pointer type. */ + + k = parse_boolean(rvalue); + if (k < 0) { + log_syntax(unit, LOG_WARNING, filename, line, k, "Failed to parse boolean value, ignoring: %s", rvalue); + return 0; + } + + *t = k; + return 0; +} + +int config_parse_string( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + char **s = data; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (free_and_strdup(s, empty_to_null(rvalue)) < 0) + return log_oom(); + + return 0; +} + +int config_parse_path( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *n = NULL; + bool fatal = ltype; + char **s = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) + goto finalize; + + n = strdup(rvalue); + if (!n) + return log_oom(); + + r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE | (fatal ? PATH_CHECK_FATAL : 0), unit, filename, line, lvalue); + if (r < 0) + return fatal ? -ENOEXEC : 0; + +finalize: + return free_and_replace(*s, n); +} + +int config_parse_strv( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + char ***sv = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + *sv = strv_free(*sv); + return 0; + } + + for (const char *p = rvalue;;) { + char *word = NULL; + + r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE); + if (r == 0) + return 0; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue); + return 0; + } + + r = strv_consume(sv, word); + if (r < 0) + return log_oom(); + } +} + +int config_parse_warn_compat( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Disabled reason = ltype; + + switch(reason) { + + case DISABLED_CONFIGURATION: + log_syntax(unit, LOG_DEBUG, filename, line, 0, + "Support for option %s= has been disabled at compile time and it is ignored", lvalue); + break; + + case DISABLED_LEGACY: + log_syntax(unit, LOG_INFO, filename, line, 0, + "Support for option %s= has been removed and it is ignored", lvalue); + break; + + case DISABLED_EXPERIMENTAL: + log_syntax(unit, LOG_INFO, filename, line, 0, + "Support for option %s= has not yet been enabled and it is ignored", lvalue); + break; + } + + return 0; +} + +int config_parse_log_facility( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int *o = data, x; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + x = log_facility_unshifted_from_string(rvalue); + if (x < 0) { + log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse log facility, ignoring: %s", rvalue); + return 0; + } + + *o = (x << 3) | LOG_PRI(*o); + + return 0; +} + +int config_parse_log_level( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int *o = data, x; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + x = log_level_from_string(rvalue); + if (x < 0) { + log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse log level, ignoring: %s", rvalue); + return 0; + } + + if (*o < 0) /* if it wasn't initialized so far, assume zero facility */ + *o = x; + else + *o = (*o & LOG_FACMASK) | x; + + return 0; +} + +int config_parse_signal( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int *sig = data, r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(sig); + + r = signal_from_string(rvalue); + if (r <= 0) { + log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse signal name, ignoring: %s", rvalue); + return 0; + } + + *sig = r; + return 0; +} + +int config_parse_personality( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + unsigned long *personality = data, p; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(personality); + + if (isempty(rvalue)) + p = PERSONALITY_INVALID; + else { + p = personality_from_string(rvalue); + if (p == PERSONALITY_INVALID) { + log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse personality, ignoring: %s", rvalue); + return 0; + } + } + + *personality = p; + return 0; +} + +int config_parse_ifname( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + char **s = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + *s = mfree(*s); + return 0; + } + + if (!ifname_valid(rvalue)) { + log_syntax(unit, LOG_WARNING, filename, line, 0, "Interface name is not valid or too long, ignoring assignment: %s", rvalue); + return 0; + } + + r = free_and_strdup(s, rvalue); + if (r < 0) + return log_oom(); + + return 0; +} + +int config_parse_ifnames( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_strv_free_ char **names = NULL; + char ***s = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + *s = strv_free(*s); + return 0; + } + + for (const char *p = rvalue;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, 0); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to extract interface name, ignoring assignment: %s", + rvalue); + return 0; + } + if (r == 0) + break; + + if (!ifname_valid_full(word, ltype)) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Interface name is not valid or too long, ignoring assignment: %s", + word); + continue; + } + + r = strv_consume(&names, TAKE_PTR(word)); + if (r < 0) + return log_oom(); + } + + r = strv_extend_strv(s, names, true); + if (r < 0) + return log_oom(); + + return 0; +} + +int config_parse_ip_port( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint16_t *s = data; + uint16_t port; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + *s = 0; + return 0; + } + + r = parse_ip_port(rvalue, &port); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse port '%s'.", rvalue); + return 0; + } + + *s = port; + + return 0; +} + +int config_parse_mtu( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint32_t *mtu = data; + int r; + + assert(rvalue); + assert(mtu); + + r = parse_mtu(ltype, rvalue, mtu); + if (r == -ERANGE) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Maximum transfer unit (MTU) value out of range. Permitted range is %" PRIu32 "…%" PRIu32 ", ignoring: %s", + (uint32_t) (ltype == AF_INET6 ? IPV6_MIN_MTU : IPV4_MIN_MTU), (uint32_t) UINT32_MAX, + rvalue); + return 0; + } + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse MTU value '%s', ignoring: %m", rvalue); + return 0; + } + + return 0; +} + +int config_parse_rlimit( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + struct rlimit **rl = data, d = {}; + int r; + + assert(rvalue); + assert(rl); + + r = rlimit_parse(ltype, rvalue, &d); + if (r == -EILSEQ) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Soft resource limit chosen higher than hard limit, ignoring: %s", rvalue); + return 0; + } + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse resource value, ignoring: %s", rvalue); + return 0; + } + + if (rl[ltype]) + *rl[ltype] = d; + else { + rl[ltype] = newdup(struct rlimit, &d, 1); + if (!rl[ltype]) + return log_oom(); + } + + return 0; +} + +int config_parse_permille(const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + unsigned *permille = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(permille); + + r = parse_permille(rvalue); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse permille value, ignoring: %s", rvalue); + return 0; + } + + *permille = (unsigned) r; + + return 0; +} + +int config_parse_vlanprotocol(const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + int *vlan_protocol = data; + assert(filename); + assert(lvalue); + + if (isempty(rvalue)) { + *vlan_protocol = -1; + return 0; + } + + if (STR_IN_SET(rvalue, "802.1ad", "802.1AD")) + *vlan_protocol = ETH_P_8021AD; + else if (STR_IN_SET(rvalue, "802.1q", "802.1Q")) + *vlan_protocol = ETH_P_8021Q; + else { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Failed to parse VLAN protocol value, ignoring: %s", rvalue); + return 0; + } + + return 0; +} + +DEFINE_CONFIG_PARSE(config_parse_percent, parse_percent, "Failed to parse percent value"); diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h new file mode 100644 index 0000000..f115cb2 --- /dev/null +++ b/src/shared/conf-parser.h @@ -0,0 +1,303 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <syslog.h> + +#include "alloc-util.h" +#include "log.h" +#include "macro.h" +#include "time-util.h" + +/* An abstract parser for simple, line based, shallow configuration files consisting of variable assignments only. */ + +typedef enum ConfigParseFlags { + CONFIG_PARSE_RELAXED = 1 << 0, /* Do not warn about unknown non-extension fields */ + CONFIG_PARSE_WARN = 1 << 1, /* Emit non-debug messages */ +} ConfigParseFlags; + +/* Argument list for parsers of specific configuration settings. */ +#define CONFIG_PARSER_ARGUMENTS \ + const char *unit, \ + const char *filename, \ + unsigned line, \ + const char *section, \ + unsigned section_line, \ + const char *lvalue, \ + int ltype, \ + const char *rvalue, \ + void *data, \ + void *userdata + +/* Prototype for a parser for a specific configuration setting */ +typedef int (*ConfigParserCallback)(CONFIG_PARSER_ARGUMENTS); + +/* A macro declaring a function prototype, following the typedef above, simply because it's so cumbersomely long + * otherwise. (And current emacs gets irritatingly slow when editing files that contain lots of very long function + * prototypes on the same screen…) */ +#define CONFIG_PARSER_PROTOTYPE(name) int name(CONFIG_PARSER_ARGUMENTS) + +/* Wraps information for parsing a specific configuration variable, to + * be stored in a simple array */ +typedef struct ConfigTableItem { + const char *section; /* Section */ + const char *lvalue; /* Name of the variable */ + ConfigParserCallback parse; /* Function that is called to parse the variable's value */ + int ltype; /* Distinguish different variables passed to the same callback */ + void *data; /* Where to store the variable's data */ +} ConfigTableItem; + +/* Wraps information for parsing a specific configuration variable, to + * be stored in a gperf perfect hashtable */ +typedef struct ConfigPerfItem { + const char *section_and_lvalue; /* Section + "." + name of the variable */ + ConfigParserCallback parse; /* Function that is called to parse the variable's value */ + int ltype; /* Distinguish different variables passed to the same callback */ + size_t offset; /* Offset where to store data, from the beginning of userdata */ +} ConfigPerfItem; + +/* Prototype for a low-level gperf lookup function */ +typedef const ConfigPerfItem* (*ConfigPerfItemLookup)(const char *section_and_lvalue, unsigned length); + +/* Prototype for a generic high-level lookup function */ +typedef int (*ConfigItemLookup)( + const void *table, + const char *section, + const char *lvalue, + ConfigParserCallback *func, + int *ltype, + void **data, + void *userdata); + +/* Linear table search implementation of ConfigItemLookup, based on + * ConfigTableItem arrays */ +int config_item_table_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata); + +/* gperf implementation of ConfigItemLookup, based on gperf + * ConfigPerfItem tables */ +int config_item_perf_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata); + +int config_parse( + const char *unit, + const char *filename, + FILE *f, + const char *sections, /* nulstr */ + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata, + usec_t *ret_mtime); /* possibly NULL */ + +int config_parse_many_nulstr( + const char *conf_file, /* possibly NULL */ + const char *conf_file_dirs, /* nulstr */ + const char *sections, /* nulstr */ + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata, + usec_t *ret_mtime); /* possibly NULL */ + +int config_parse_many( + const char *conf_file, /* possibly NULL */ + const char* const* conf_file_dirs, + const char *dropin_dirname, + const char *sections, /* nulstr */ + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata, + usec_t *ret_mtime); /* possibly NULL */ + +CONFIG_PARSER_PROTOTYPE(config_parse_int); +CONFIG_PARSER_PROTOTYPE(config_parse_unsigned); +CONFIG_PARSER_PROTOTYPE(config_parse_long); +CONFIG_PARSER_PROTOTYPE(config_parse_uint8); +CONFIG_PARSER_PROTOTYPE(config_parse_uint16); +CONFIG_PARSER_PROTOTYPE(config_parse_uint32); +CONFIG_PARSER_PROTOTYPE(config_parse_int32); +CONFIG_PARSER_PROTOTYPE(config_parse_uint64); +CONFIG_PARSER_PROTOTYPE(config_parse_double); +CONFIG_PARSER_PROTOTYPE(config_parse_iec_size); +CONFIG_PARSER_PROTOTYPE(config_parse_si_uint64); +CONFIG_PARSER_PROTOTYPE(config_parse_iec_uint64); +CONFIG_PARSER_PROTOTYPE(config_parse_bool); +CONFIG_PARSER_PROTOTYPE(config_parse_id128); +CONFIG_PARSER_PROTOTYPE(config_parse_tristate); +CONFIG_PARSER_PROTOTYPE(config_parse_string); +CONFIG_PARSER_PROTOTYPE(config_parse_path); +CONFIG_PARSER_PROTOTYPE(config_parse_strv); +CONFIG_PARSER_PROTOTYPE(config_parse_sec); +CONFIG_PARSER_PROTOTYPE(config_parse_sec_def_infinity); +CONFIG_PARSER_PROTOTYPE(config_parse_sec_def_unset); +CONFIG_PARSER_PROTOTYPE(config_parse_nsec); +CONFIG_PARSER_PROTOTYPE(config_parse_mode); +CONFIG_PARSER_PROTOTYPE(config_parse_warn_compat); +CONFIG_PARSER_PROTOTYPE(config_parse_log_facility); +CONFIG_PARSER_PROTOTYPE(config_parse_log_level); +CONFIG_PARSER_PROTOTYPE(config_parse_signal); +CONFIG_PARSER_PROTOTYPE(config_parse_personality); +CONFIG_PARSER_PROTOTYPE(config_parse_permille); +CONFIG_PARSER_PROTOTYPE(config_parse_ifname); +CONFIG_PARSER_PROTOTYPE(config_parse_ifnames); +CONFIG_PARSER_PROTOTYPE(config_parse_ip_port); +CONFIG_PARSER_PROTOTYPE(config_parse_mtu); +CONFIG_PARSER_PROTOTYPE(config_parse_rlimit); +CONFIG_PARSER_PROTOTYPE(config_parse_vlanprotocol); +CONFIG_PARSER_PROTOTYPE(config_parse_percent); + +typedef enum Disabled { + DISABLED_CONFIGURATION, + DISABLED_LEGACY, + DISABLED_EXPERIMENTAL, +} Disabled; + +#define DEFINE_CONFIG_PARSE(function, parser, msg) \ + CONFIG_PARSER_PROTOTYPE(function) { \ + int *i = data, r; \ + \ + assert(filename); \ + assert(lvalue); \ + assert(rvalue); \ + assert(data); \ + \ + r = parser(rvalue); \ + if (r < 0) { \ + log_syntax(unit, LOG_WARNING, filename, line, r, \ + msg ", ignoring: %s", rvalue); \ + return 0; \ + } \ + \ + *i = r; \ + return 0; \ + } + +#define DEFINE_CONFIG_PARSE_PTR(function, parser, type, msg) \ + CONFIG_PARSER_PROTOTYPE(function) { \ + type *i = data; \ + int r; \ + \ + assert(filename); \ + assert(lvalue); \ + assert(rvalue); \ + assert(data); \ + \ + r = parser(rvalue, i); \ + if (r < 0) \ + log_syntax(unit, LOG_WARNING, filename, line, r, \ + msg ", ignoring: %s", rvalue); \ + \ + return 0; \ + } + +#define DEFINE_CONFIG_PARSE_ENUM_FULL(function, from_string, type, msg) \ + CONFIG_PARSER_PROTOTYPE(function) { \ + type *i = data, x; \ + \ + assert(filename); \ + assert(lvalue); \ + assert(rvalue); \ + assert(data); \ + \ + x = from_string(rvalue); \ + if (x < 0) { \ + log_syntax(unit, LOG_WARNING, filename, line, 0, \ + msg ", ignoring: %s", rvalue); \ + return 0; \ + } \ + \ + *i = x; \ + return 0; \ + } + +#define DEFINE_CONFIG_PARSE_ENUM(function, name, type, msg) \ + DEFINE_CONFIG_PARSE_ENUM_FULL(function, name##_from_string, type, msg) + +#define DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(function, name, type, default_value, msg) \ + CONFIG_PARSER_PROTOTYPE(function) { \ + type *i = data, x; \ + \ + assert(filename); \ + assert(lvalue); \ + assert(rvalue); \ + assert(data); \ + \ + if (isempty(rvalue)) { \ + *i = default_value; \ + return 0; \ + } \ + \ + x = name##_from_string(rvalue); \ + if (x < 0) { \ + log_syntax(unit, LOG_WARNING, filename, line, 0, \ + msg ", ignoring: %s", rvalue); \ + return 0; \ + } \ + \ + *i = x; \ + return 0; \ + } + +#define DEFINE_CONFIG_PARSE_ENUMV(function, name, type, invalid, msg) \ + CONFIG_PARSER_PROTOTYPE(function) { \ + type **enums = data; \ + _cleanup_free_ type *xs = NULL; \ + size_t i = 0; \ + int r; \ + \ + assert(filename); \ + assert(lvalue); \ + assert(rvalue); \ + assert(data); \ + \ + xs = new0(type, 1); \ + if (!xs) \ + return -ENOMEM; \ + \ + *xs = invalid; \ + \ + for (const char *p = rvalue;;) { \ + _cleanup_free_ char *en = NULL; \ + type x, *new_xs; \ + \ + r = extract_first_word(&p, &en, NULL, 0); \ + if (r == -ENOMEM) \ + return log_oom(); \ + if (r < 0) \ + return log_syntax(unit, LOG_ERR, filename, line, 0, \ + msg ": %s", en); \ + if (r == 0) \ + break; \ + \ + if ((x = name##_from_string(en)) < 0) { \ + log_syntax(unit, LOG_WARNING, filename, line, 0, \ + msg ", ignoring: %s", en); \ + continue; \ + } \ + \ + for (type *ys = xs; x != invalid && *ys != invalid; ys++) \ + if (*ys == x) { \ + log_syntax(unit, LOG_NOTICE, filename, line, 0, \ + "Duplicate entry, ignoring: %s", \ + en); \ + x = invalid; \ + } \ + \ + if (x == invalid) \ + continue; \ + \ + *(xs + i) = x; \ + new_xs = realloc(xs, (++i + 1) * sizeof(type)); \ + if (new_xs) \ + xs = new_xs; \ + else \ + return log_oom(); \ + \ + *(xs + i) = invalid; \ + } \ + \ + return free_and_replace(*enums, xs); \ + } diff --git a/src/shared/coredump-util.c b/src/shared/coredump-util.c new file mode 100644 index 0000000..a0b648b --- /dev/null +++ b/src/shared/coredump-util.c @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "coredump-util.h" +#include "extract-word.h" +#include "fileio.h" +#include "string-table.h" + +static const char *const coredump_filter_table[_COREDUMP_FILTER_MAX] = { + [COREDUMP_FILTER_PRIVATE_ANONYMOUS] = "private-anonymous", + [COREDUMP_FILTER_SHARED_ANONYMOUS] = "shared-anonymous", + [COREDUMP_FILTER_PRIVATE_FILE_BACKED] = "private-file-backed", + [COREDUMP_FILTER_SHARED_FILE_BACKED] = "shared-file-backed", + [COREDUMP_FILTER_ELF_HEADERS] = "elf-headers", + [COREDUMP_FILTER_PRIVATE_HUGE] = "private-huge", + [COREDUMP_FILTER_SHARED_HUGE] = "shared-huge", + [COREDUMP_FILTER_PRIVATE_DAX] = "private-dax", + [COREDUMP_FILTER_SHARED_DAX] = "shared-dax", +}; + +DEFINE_STRING_TABLE_LOOKUP(coredump_filter, CoredumpFilter); + +int coredump_filter_mask_from_string(const char *s, uint64_t *ret) { + uint64_t m = 0; + + assert(s); + assert(ret); + + for (;;) { + _cleanup_free_ char *n = NULL; + CoredumpFilter v; + int r; + + r = extract_first_word(&s, &n, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + if (streq(n, "default")) { + m |= COREDUMP_FILTER_MASK_DEFAULT; + continue; + } + + if (streq(n, "all")) { + m = UINT64_MAX; + continue; + } + + v = coredump_filter_from_string(n); + if (v >= 0) { + m |= 1u << v; + continue; + } + + uint64_t x; + r = safe_atoux64(n, &x); + if (r < 0) + return r; + + m |= x; + } + + *ret = m; + return 0; +} + +int set_coredump_filter(uint64_t value) { + char t[STRLEN("0xFFFFFFFF")]; + + sprintf(t, "0x%"PRIx64, value); + + return write_string_file("/proc/self/coredump_filter", t, + WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER); +} diff --git a/src/shared/coredump-util.h b/src/shared/coredump-util.h new file mode 100644 index 0000000..a7f3c0e --- /dev/null +++ b/src/shared/coredump-util.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" + +typedef enum CoredumpFilter { + COREDUMP_FILTER_PRIVATE_ANONYMOUS = 0, + COREDUMP_FILTER_SHARED_ANONYMOUS, + COREDUMP_FILTER_PRIVATE_FILE_BACKED, + COREDUMP_FILTER_SHARED_FILE_BACKED, + COREDUMP_FILTER_ELF_HEADERS, + COREDUMP_FILTER_PRIVATE_HUGE, + COREDUMP_FILTER_SHARED_HUGE, + COREDUMP_FILTER_PRIVATE_DAX, + COREDUMP_FILTER_SHARED_DAX, + _COREDUMP_FILTER_MAX, + _COREDUMP_FILTER_INVALID = -1, +} CoredumpFilter; + +#define COREDUMP_FILTER_MASK_DEFAULT (1u << COREDUMP_FILTER_PRIVATE_ANONYMOUS | \ + 1u << COREDUMP_FILTER_SHARED_ANONYMOUS | \ + 1u << COREDUMP_FILTER_ELF_HEADERS | \ + 1u << COREDUMP_FILTER_PRIVATE_HUGE) + +const char* coredump_filter_to_string(CoredumpFilter i) _const_; +CoredumpFilter coredump_filter_from_string(const char *s) _pure_; +int coredump_filter_mask_from_string(const char *s, uint64_t *ret); + +int set_coredump_filter(uint64_t value); diff --git a/src/shared/cpu-set-util.c b/src/shared/cpu-set-util.c new file mode 100644 index 0000000..2c3b5bb --- /dev/null +++ b/src/shared/cpu-set-util.c @@ -0,0 +1,295 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <syslog.h> + +#include "alloc-util.h" +#include "cpu-set-util.h" +#include "dirent-util.h" +#include "errno-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "log.h" +#include "macro.h" +#include "memory-util.h" +#include "parse-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "util.h" + +char* cpu_set_to_string(const CPUSet *a) { + _cleanup_free_ char *str = NULL; + size_t allocated = 0, len = 0; + int i, r; + + for (i = 0; (size_t) i < a->allocated * 8; i++) { + if (!CPU_ISSET_S(i, a->allocated, a->set)) + continue; + + if (!GREEDY_REALLOC(str, allocated, len + 1 + DECIMAL_STR_MAX(int))) + return NULL; + + r = sprintf(str + len, len > 0 ? " %d" : "%d", i); + assert_se(r > 0); + len += r; + } + + return TAKE_PTR(str) ?: strdup(""); +} + +char *cpu_set_to_range_string(const CPUSet *set) { + unsigned range_start = 0, range_end; + _cleanup_free_ char *str = NULL; + size_t allocated = 0, len = 0; + bool in_range = false; + int r; + + for (unsigned i = 0; i < set->allocated * 8; i++) + if (CPU_ISSET_S(i, set->allocated, set->set)) { + if (in_range) + range_end++; + else { + range_start = range_end = i; + in_range = true; + } + } else if (in_range) { + in_range = false; + + if (!GREEDY_REALLOC(str, allocated, len + 2 + 2 * DECIMAL_STR_MAX(unsigned))) + return NULL; + + if (range_end > range_start) + r = sprintf(str + len, len > 0 ? " %d-%d" : "%d-%d", range_start, range_end); + else + r = sprintf(str + len, len > 0 ? " %d" : "%d", range_start); + assert_se(r > 0); + len += r; + } + + if (in_range) { + if (!GREEDY_REALLOC(str, allocated, len + 2 + 2 * DECIMAL_STR_MAX(int))) + return NULL; + + if (range_end > range_start) + r = sprintf(str + len, len > 0 ? " %d-%d" : "%d-%d", range_start, range_end); + else + r = sprintf(str + len, len > 0 ? " %d" : "%d", range_start); + assert_se(r > 0); + } + + return TAKE_PTR(str) ?: strdup(""); +} + +int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus) { + size_t need; + + assert(cpu_set); + + need = CPU_ALLOC_SIZE(ncpus); + if (need > cpu_set->allocated) { + cpu_set_t *t; + + t = realloc(cpu_set->set, need); + if (!t) + return -ENOMEM; + + memzero((uint8_t*) t + cpu_set->allocated, need - cpu_set->allocated); + + cpu_set->set = t; + cpu_set->allocated = need; + } + + return 0; +} + +int cpu_set_add(CPUSet *cpu_set, unsigned cpu) { + int r; + + if (cpu >= 8192) + /* As of kernel 5.1, CONFIG_NR_CPUS can be set to 8192 on PowerPC */ + return -ERANGE; + + r = cpu_set_realloc(cpu_set, cpu + 1); + if (r < 0) + return r; + + CPU_SET_S(cpu, cpu_set->allocated, cpu_set->set); + return 0; +} + +int cpu_set_add_all(CPUSet *a, const CPUSet *b) { + int r; + + /* Do this backwards, so if we fail, we fail before changing anything. */ + for (unsigned cpu_p1 = b->allocated * 8; cpu_p1 > 0; cpu_p1--) + if (CPU_ISSET_S(cpu_p1 - 1, b->allocated, b->set)) { + r = cpu_set_add(a, cpu_p1 - 1); + if (r < 0) + return r; + } + + return 1; +} + +int parse_cpu_set_full( + const char *rvalue, + CPUSet *cpu_set, + bool warn, + const char *unit, + const char *filename, + unsigned line, + const char *lvalue) { + + _cleanup_(cpu_set_reset) CPUSet c = {}; + const char *p = rvalue; + + assert(p); + + for (;;) { + _cleanup_free_ char *word = NULL; + unsigned cpu_lower, cpu_upper; + int r; + + r = extract_first_word(&p, &word, WHITESPACE ",", EXTRACT_UNQUOTE); + if (r == -ENOMEM) + return warn ? log_oom() : -ENOMEM; + if (r < 0) + return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, rvalue) : r; + if (r == 0) + break; + + r = parse_range(word, &cpu_lower, &cpu_upper); + if (r < 0) + return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CPU affinity '%s'", word) : r; + + if (cpu_lower > cpu_upper) { + if (warn) + log_syntax(unit, LOG_WARNING, filename, line, 0, "Range '%s' is invalid, %u > %u, ignoring.", + word, cpu_lower, cpu_upper); + + /* Make sure something is allocated, to distinguish this from the empty case */ + r = cpu_set_realloc(&c, 1); + if (r < 0) + return r; + } + + for (unsigned cpu_p1 = MIN(cpu_upper, UINT_MAX-1) + 1; cpu_p1 > cpu_lower; cpu_p1--) { + r = cpu_set_add(&c, cpu_p1 - 1); + if (r < 0) + return warn ? log_syntax(unit, LOG_ERR, filename, line, r, + "Cannot add CPU %u to set: %m", cpu_p1 - 1) : r; + } + } + + /* On success, transfer ownership to the output variable */ + *cpu_set = c; + c = (CPUSet) {}; + + return 0; +} + +int parse_cpu_set_extend( + const char *rvalue, + CPUSet *old, + bool warn, + const char *unit, + const char *filename, + unsigned line, + const char *lvalue) { + + _cleanup_(cpu_set_reset) CPUSet cpuset = {}; + int r; + + r = parse_cpu_set_full(rvalue, &cpuset, true, unit, filename, line, lvalue); + if (r < 0) + return r; + + if (!cpuset.set) { + /* An empty assignment resets the CPU list */ + cpu_set_reset(old); + return 0; + } + + if (!old->set) { + *old = cpuset; + cpuset = (CPUSet) {}; + return 1; + } + + return cpu_set_add_all(old, &cpuset); +} + +int cpus_in_affinity_mask(void) { + size_t n = 16; + int r; + + for (;;) { + cpu_set_t *c; + + c = CPU_ALLOC(n); + if (!c) + return -ENOMEM; + + if (sched_getaffinity(0, CPU_ALLOC_SIZE(n), c) >= 0) { + int k; + + k = CPU_COUNT_S(CPU_ALLOC_SIZE(n), c); + CPU_FREE(c); + + if (k <= 0) + return -EINVAL; + + return k; + } + + r = -errno; + CPU_FREE(c); + + if (r != -EINVAL) + return r; + if (n > SIZE_MAX/2) + return -ENOMEM; + n *= 2; + } +} + +int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated) { + uint8_t *out; + + assert(set); + assert(ret); + + out = new0(uint8_t, set->allocated); + if (!out) + return -ENOMEM; + + for (unsigned cpu = 0; cpu < set->allocated * 8; cpu++) + if (CPU_ISSET_S(cpu, set->allocated, set->set)) + out[cpu / 8] |= 1u << (cpu % 8); + + *ret = out; + *allocated = set->allocated; + return 0; +} + +int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set) { + _cleanup_(cpu_set_reset) CPUSet s = {}; + int r; + + assert(bits); + assert(set); + + for (unsigned cpu = size * 8; cpu > 0; cpu--) + if (bits[(cpu - 1) / 8] & (1u << ((cpu - 1) % 8))) { + r = cpu_set_add(&s, cpu - 1); + if (r < 0) + return r; + } + + *set = s; + s = (CPUSet) {}; + return 0; +} diff --git a/src/shared/cpu-set-util.h b/src/shared/cpu-set-util.h new file mode 100644 index 0000000..3c63a58 --- /dev/null +++ b/src/shared/cpu-set-util.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <sched.h> + +#include "macro.h" +#include "missing_syscall.h" + +/* This wraps the libc interface with a variable to keep the allocated size. */ +typedef struct CPUSet { + cpu_set_t *set; + size_t allocated; /* in bytes */ +} CPUSet; + +static inline void cpu_set_reset(CPUSet *a) { + assert((a->allocated > 0) == !!a->set); + if (a->set) + CPU_FREE(a->set); + *a = (CPUSet) {}; +} + +int cpu_set_add_all(CPUSet *a, const CPUSet *b); +int cpu_set_add(CPUSet *a, unsigned cpu); + +char* cpu_set_to_string(const CPUSet *a); +char *cpu_set_to_range_string(const CPUSet *a); +int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus); + +int parse_cpu_set_full( + const char *rvalue, + CPUSet *cpu_set, + bool warn, + const char *unit, + const char *filename, unsigned line, + const char *lvalue); +int parse_cpu_set_extend( + const char *rvalue, + CPUSet *old, + bool warn, + const char *unit, + const char *filename, + unsigned line, + const char *lvalue); + +static inline int parse_cpu_set(const char *rvalue, CPUSet *cpu_set){ + return parse_cpu_set_full(rvalue, cpu_set, false, NULL, NULL, 0, NULL); +} + +int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated); +int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set); + +int cpus_in_affinity_mask(void); diff --git a/src/shared/cryptsetup-util.c b/src/shared/cryptsetup-util.c new file mode 100644 index 0000000..34a078e --- /dev/null +++ b/src/shared/cryptsetup-util.c @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#if HAVE_LIBCRYPTSETUP +#include "alloc-util.h" +#include "cryptsetup-util.h" +#include "dlfcn-util.h" +#include "log.h" + +static void *cryptsetup_dl = NULL; + +int (*sym_crypt_activate_by_passphrase)(struct crypt_device *cd, const char *name, int keyslot, const char *passphrase, size_t passphrase_size, uint32_t flags); +#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY +int (*sym_crypt_activate_by_signed_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, const char *signature, size_t signature_size, uint32_t flags); +#endif +int (*sym_crypt_activate_by_volume_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, uint32_t flags); +int (*sym_crypt_deactivate_by_name)(struct crypt_device *cd, const char *name, uint32_t flags); +int (*sym_crypt_format)(struct crypt_device *cd, const char *type, const char *cipher, const char *cipher_mode, const char *uuid, const char *volume_key, size_t volume_key_size, void *params); +void (*sym_crypt_free)(struct crypt_device *cd); +const char *(*sym_crypt_get_dir)(void); +int (*sym_crypt_get_verity_info)(struct crypt_device *cd, struct crypt_params_verity *vp); +int (*sym_crypt_init)(struct crypt_device **cd, const char *device); +int (*sym_crypt_init_by_name)(struct crypt_device **cd, const char *name); +int (*sym_crypt_keyslot_add_by_volume_key)(struct crypt_device *cd, int keyslot, const char *volume_key, size_t volume_key_size, const char *passphrase, size_t passphrase_size); +int (*sym_crypt_load)(struct crypt_device *cd, const char *requested_type, void *params); +int (*sym_crypt_resize)(struct crypt_device *cd, const char *name, uint64_t new_size); +int (*sym_crypt_set_data_device)(struct crypt_device *cd, const char *device); +void (*sym_crypt_set_debug_level)(int level); +void (*sym_crypt_set_log_callback)(struct crypt_device *cd, void (*log)(int level, const char *msg, void *usrptr), void *usrptr); +int (*sym_crypt_volume_key_get)(struct crypt_device *cd, int keyslot, char *volume_key, size_t *volume_key_size, const char *passphrase, size_t passphrase_size); + +int dlopen_cryptsetup(void) { + _cleanup_(dlclosep) void *dl = NULL; + int r; + + if (cryptsetup_dl) + return 0; /* Already loaded */ + + dl = dlopen("libcryptsetup.so.12", RTLD_LAZY); + if (!dl) + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "libcryptsetup support is not installed: %s", dlerror()); + + r = dlsym_many_and_warn( + dl, + LOG_DEBUG, + &sym_crypt_activate_by_passphrase, "crypt_activate_by_passphrase", +#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY + &sym_crypt_activate_by_signed_key, "crypt_activate_by_signed_key", +#endif + &sym_crypt_activate_by_volume_key, "crypt_activate_by_volume_key", + &sym_crypt_deactivate_by_name, "crypt_deactivate_by_name", + &sym_crypt_format, "crypt_format", + &sym_crypt_free, "crypt_free", + &sym_crypt_get_dir, "crypt_get_dir", + &sym_crypt_get_verity_info, "crypt_get_verity_info", + &sym_crypt_init, "crypt_init", + &sym_crypt_init_by_name, "crypt_init_by_name", + &sym_crypt_keyslot_add_by_volume_key, "crypt_keyslot_add_by_volume_key", + &sym_crypt_load, "crypt_load", + &sym_crypt_resize, "crypt_resize", + &sym_crypt_set_data_device, "crypt_set_data_device", + &sym_crypt_set_debug_level, "crypt_set_debug_level", + &sym_crypt_set_log_callback, "crypt_set_log_callback", + &sym_crypt_volume_key_get, "crypt_volume_key_get", + NULL); + if (r < 0) + return r; + + /* Note that we never release the reference here, because there's no real reason to, after all this + * was traditionally a regular shared library dependency which lives forever too. */ + cryptsetup_dl = TAKE_PTR(dl); + return 1; +} + +static void cryptsetup_log_glue(int level, const char *msg, void *usrptr) { + + switch (level) { + case CRYPT_LOG_NORMAL: + level = LOG_NOTICE; + break; + case CRYPT_LOG_ERROR: + level = LOG_ERR; + break; + case CRYPT_LOG_VERBOSE: + level = LOG_INFO; + break; + case CRYPT_LOG_DEBUG: + level = LOG_DEBUG; + break; + default: + log_error("Unknown libcryptsetup log level: %d", level); + level = LOG_ERR; + } + + log_full(level, "%s", msg); +} + +void cryptsetup_enable_logging(struct crypt_device *cd) { + if (!cd) + return; + + if (dlopen_cryptsetup() < 0) /* If this fails, let's gracefully ignore the issue, this is just debug + * logging after all, and if this failed we already generated a debug + * log message that should help to track things down. */ + return; + + sym_crypt_set_log_callback(cd, cryptsetup_log_glue, NULL); + sym_crypt_set_debug_level(DEBUG_LOGGING ? CRYPT_DEBUG_ALL : CRYPT_DEBUG_NONE); +} + +#endif diff --git a/src/shared/cryptsetup-util.h b/src/shared/cryptsetup-util.h new file mode 100644 index 0000000..e7d885d --- /dev/null +++ b/src/shared/cryptsetup-util.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" + +#if HAVE_LIBCRYPTSETUP +#include <libcryptsetup.h> + +/* These next two are defined in libcryptsetup.h from cryptsetup version 2.3.4 forwards. */ +#ifndef CRYPT_ACTIVATE_NO_READ_WORKQUEUE +#define CRYPT_ACTIVATE_NO_READ_WORKQUEUE (1 << 24) +#endif +#ifndef CRYPT_ACTIVATE_NO_WRITE_WORKQUEUE +#define CRYPT_ACTIVATE_NO_WRITE_WORKQUEUE (1 << 25) +#endif + +extern int (*sym_crypt_activate_by_passphrase)(struct crypt_device *cd, const char *name, int keyslot, const char *passphrase, size_t passphrase_size, uint32_t flags); +#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY +extern int (*sym_crypt_activate_by_signed_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, const char *signature, size_t signature_size, uint32_t flags); +#endif +extern int (*sym_crypt_activate_by_volume_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, uint32_t flags); +extern int (*sym_crypt_deactivate_by_name)(struct crypt_device *cd, const char *name, uint32_t flags); +extern int (*sym_crypt_format)(struct crypt_device *cd, const char *type, const char *cipher, const char *cipher_mode, const char *uuid, const char *volume_key, size_t volume_key_size, void *params); +extern void (*sym_crypt_free)(struct crypt_device *cd); +extern const char *(*sym_crypt_get_dir)(void); +extern int (*sym_crypt_get_verity_info)(struct crypt_device *cd, struct crypt_params_verity *vp); +extern int (*sym_crypt_init)(struct crypt_device **cd, const char *device); +extern int (*sym_crypt_init_by_name)(struct crypt_device **cd, const char *name); +extern int (*sym_crypt_keyslot_add_by_volume_key)(struct crypt_device *cd, int keyslot, const char *volume_key, size_t volume_key_size, const char *passphrase, size_t passphrase_size); +extern int (*sym_crypt_load)(struct crypt_device *cd, const char *requested_type, void *params); +extern int (*sym_crypt_resize)(struct crypt_device *cd, const char *name, uint64_t new_size); +extern int (*sym_crypt_set_data_device)(struct crypt_device *cd, const char *device); +extern void (*sym_crypt_set_debug_level)(int level); +extern void (*sym_crypt_set_log_callback)(struct crypt_device *cd, void (*log)(int level, const char *msg, void *usrptr), void *usrptr); +extern int (*sym_crypt_volume_key_get)(struct crypt_device *cd, int keyslot, char *volume_key, size_t *volume_key_size, const char *passphrase, size_t passphrase_size); + +int dlopen_cryptsetup(void); + +DEFINE_TRIVIAL_CLEANUP_FUNC(struct crypt_device *, crypt_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct crypt_device *, sym_crypt_free); + +void cryptsetup_enable_logging(struct crypt_device *cd); + +#endif diff --git a/src/shared/daemon-util.h b/src/shared/daemon-util.h new file mode 100644 index 0000000..585e489 --- /dev/null +++ b/src/shared/daemon-util.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "sd-daemon.h" + +#define NOTIFY_READY "READY=1\n" "STATUS=Processing requests..." +#define NOTIFY_STOPPING "STOPPING=1\n" "STATUS=Shutting down..." + +static inline const char *notify_start(const char *start, const char *stop) { + if (start) + (void) sd_notify(false, start); + + return stop; +} + +/* This is intended to be used with _cleanup_ attribute. */ +static inline void notify_on_cleanup(const char **p) { + if (*p) + (void) sd_notify(false, *p); +} diff --git a/src/shared/dev-setup.c b/src/shared/dev-setup.c new file mode 100644 index 0000000..b788b06 --- /dev/null +++ b/src/shared/dev-setup.c @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "dev-setup.h" +#include "label.h" +#include "log.h" +#include "nulstr-util.h" +#include "path-util.h" +#include "umask-util.h" +#include "user-util.h" + +int dev_setup(const char *prefix, uid_t uid, gid_t gid) { + static const char symlinks[] = + "-/proc/kcore\0" "/dev/core\0" + "/proc/self/fd\0" "/dev/fd\0" + "/proc/self/fd/0\0" "/dev/stdin\0" + "/proc/self/fd/1\0" "/dev/stdout\0" + "/proc/self/fd/2\0" "/dev/stderr\0"; + + const char *j, *k; + int r; + + NULSTR_FOREACH_PAIR(j, k, symlinks) { + _cleanup_free_ char *link_name = NULL; + const char *n; + + if (j[0] == '-') { + j++; + + if (access(j, F_OK) < 0) + continue; + } + + if (prefix) { + link_name = path_join(prefix, k); + if (!link_name) + return -ENOMEM; + + n = link_name; + } else + n = k; + + r = symlink_label(j, n); + if (r < 0) + log_debug_errno(r, "Failed to symlink %s to %s: %m", j, n); + + if (uid != UID_INVALID || gid != GID_INVALID) + if (lchown(n, uid, gid) < 0) + log_debug_errno(errno, "Failed to chown %s: %m", n); + } + + return 0; +} + +int make_inaccessible_nodes( + const char *parent_dir, + uid_t uid, + gid_t gid) { + + static const struct { + const char *name; + mode_t mode; + } table[] = { + { "inaccessible", S_IFDIR | 0755 }, + { "inaccessible/reg", S_IFREG | 0000 }, + { "inaccessible/dir", S_IFDIR | 0000 }, + { "inaccessible/fifo", S_IFIFO | 0000 }, + { "inaccessible/sock", S_IFSOCK | 0000 }, + + /* The following two are likely to fail if we lack the privs for it (for example in an userns + * environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibits creation of + * device nodes with a major/minor of 0). But that's entirely fine. Consumers of these files + * should implement falling back to use a different node then, for example + * <root>/inaccessible/sock, which is close enough in behaviour and semantics for most uses. + */ + { "inaccessible/chr", S_IFCHR | 0000 }, + { "inaccessible/blk", S_IFBLK | 0000 }, + }; + + _cleanup_umask_ mode_t u; + int r; + + if (!parent_dir) + parent_dir = "/run/systemd"; + + u = umask(0000); + + /* Set up inaccessible (and empty) file nodes of all types. This are used to as mount sources for over-mounting + * ("masking") file nodes that shall become inaccessible and empty for specific containers or services. We try + * to lock down these nodes as much as we can, but otherwise try to match them as closely as possible with the + * underlying file, i.e. in the best case we offer the same node type as the underlying node. */ + + for (size_t i = 0; i < ELEMENTSOF(table); i++) { + _cleanup_free_ char *path = NULL; + + path = path_join(parent_dir, table[i].name); + if (!path) + return log_oom(); + + if (S_ISDIR(table[i].mode)) + r = mkdir_label(path, table[i].mode & 07777); + else + r = mknod_label(path, table[i].mode, makedev(0, 0)); + if (r < 0) { + log_debug_errno(r, "Failed to create '%s', ignoring: %m", path); + continue; + } + + if (uid != UID_INVALID || gid != GID_INVALID) { + if (lchown(path, uid, gid) < 0) + log_debug_errno(errno, "Failed to chown '%s': %m", path); + } + } + + return 0; +} diff --git a/src/shared/dev-setup.h b/src/shared/dev-setup.h new file mode 100644 index 0000000..92ba6cf --- /dev/null +++ b/src/shared/dev-setup.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <sys/types.h> + +int dev_setup(const char *prefix, uid_t uid, gid_t gid); + +int make_inaccessible_nodes(const char *parent_dir, uid_t uid, gid_t gid); diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c new file mode 100644 index 0000000..d1f299a --- /dev/null +++ b/src/shared/dissect-image.c @@ -0,0 +1,2557 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#if HAVE_VALGRIND_MEMCHECK_H +#include <valgrind/memcheck.h> +#endif + +#include <linux/dm-ioctl.h> +#include <linux/loop.h> +#include <sys/mount.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <sysexits.h> + +#include "sd-device.h" +#include "sd-id128.h" + +#include "architecture.h" +#include "ask-password-api.h" +#include "blkid-util.h" +#include "blockdev-util.h" +#include "copy.h" +#include "cryptsetup-util.h" +#include "def.h" +#include "device-nodes.h" +#include "device-util.h" +#include "dissect-image.h" +#include "dm-util.h" +#include "env-file.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "fsck-util.h" +#include "gpt.h" +#include "hexdecoct.h" +#include "hostname-util.h" +#include "id128-util.h" +#include "mkdir.h" +#include "mount-util.h" +#include "mountpoint-util.h" +#include "namespace-util.h" +#include "nulstr-util.h" +#include "os-util.h" +#include "path-util.h" +#include "process-util.h" +#include "raw-clone.h" +#include "signal-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "tmpfile-util.h" +#include "udev-util.h" +#include "user-util.h" +#include "xattr-util.h" + +/* how many times to wait for the device nodes to appear */ +#define N_DEVICE_NODE_LIST_ATTEMPTS 10 + +int probe_filesystem(const char *node, char **ret_fstype) { + /* Try to find device content type and return it in *ret_fstype. If nothing is found, + * 0/NULL will be returned. -EUCLEAN will be returned for ambiguous results, and an + * different error otherwise. */ + +#if HAVE_BLKID + _cleanup_(blkid_free_probep) blkid_probe b = NULL; + const char *fstype; + int r; + + errno = 0; + b = blkid_new_probe_from_filename(node); + if (!b) + return errno_or_else(ENOMEM); + + blkid_probe_enable_superblocks(b, 1); + blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE); + + errno = 0; + r = blkid_do_safeprobe(b); + if (r == 1) { + log_debug("No type detected on partition %s", node); + goto not_found; + } + if (r == -2) + return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN), + "Results ambiguous for partition %s", node); + if (r != 0) + return errno_or_else(EIO); + + (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); + + if (fstype) { + char *t; + + t = strdup(fstype); + if (!t) + return -ENOMEM; + + *ret_fstype = t; + return 1; + } + +not_found: + *ret_fstype = NULL; + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +#if HAVE_BLKID +static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + int r; + + assert(d); + assert(ret); + + r = sd_device_enumerator_new(&e); + if (r < 0) + return r; + + r = sd_device_enumerator_allow_uninitialized(e); + if (r < 0) + return r; + + r = sd_device_enumerator_add_match_parent(e, d); + if (r < 0) + return r; + + *ret = TAKE_PTR(e); + return 0; +} + +static int device_is_partition(sd_device *d, blkid_partition pp) { + blkid_loff_t bsize, bstart; + uint64_t size, start; + int partno, bpartno, r; + const char *ss, *v; + + assert(d); + assert(pp); + + r = sd_device_get_subsystem(d, &ss); + if (r < 0) + return r; + if (!streq(ss, "block")) + return false; + + r = sd_device_get_sysattr_value(d, "partition", &v); + if (r == -ENOENT) /* Not a partition device */ + return false; + if (r < 0) + return r; + r = safe_atoi(v, &partno); + if (r < 0) + return r; + + errno = 0; + bpartno = blkid_partition_get_partno(pp); + if (bpartno < 0) + return errno_or_else(EIO); + + if (partno != bpartno) + return false; + + r = sd_device_get_sysattr_value(d, "start", &v); + if (r < 0) + return r; + r = safe_atou64(v, &start); + if (r < 0) + return r; + + errno = 0; + bstart = blkid_partition_get_start(pp); + if (bstart < 0) + return errno_or_else(EIO); + + if (start != (uint64_t) bstart) + return false; + + r = sd_device_get_sysattr_value(d, "size", &v); + if (r < 0) + return r; + r = safe_atou64(v, &size); + if (r < 0) + return r; + + errno = 0; + bsize = blkid_partition_get_size(pp); + if (bsize < 0) + return errno_or_else(EIO); + + if (size != (uint64_t) bsize) + return false; + + return true; +} + +static int find_partition( + sd_device *parent, + blkid_partition pp, + sd_device **ret) { + + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + sd_device *q; + int r; + + assert(parent); + assert(pp); + assert(ret); + + r = enumerator_for_parent(parent, &e); + if (r < 0) + return r; + + FOREACH_DEVICE(e, q) { + r = device_is_partition(q, pp); + if (r < 0) + return r; + if (r > 0) { + *ret = sd_device_ref(q); + return 0; + } + } + + return -ENXIO; +} + +struct wait_data { + sd_device *parent_device; + blkid_partition blkidp; + sd_device *found; +}; + +static inline void wait_data_done(struct wait_data *d) { + sd_device_unref(d->found); +} + +static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) { + const char *parent1_path, *parent2_path; + struct wait_data *w = userdata; + sd_device *pp; + int r; + + assert(w); + + if (device_for_action(device, DEVICE_ACTION_REMOVE)) + return 0; + + r = sd_device_get_parent(device, &pp); + if (r < 0) + return 0; /* Doesn't have a parent? No relevant to us */ + + r = sd_device_get_syspath(pp, &parent1_path); /* Check parent of device of this action */ + if (r < 0) + goto finish; + + r = sd_device_get_syspath(w->parent_device, &parent2_path); /* Check parent of device we are looking for */ + if (r < 0) + goto finish; + + if (!path_equal(parent1_path, parent2_path)) + return 0; /* Has a different parent than what we need, not interesting to us */ + + r = device_is_partition(device, w->blkidp); + if (r < 0) + goto finish; + if (r == 0) /* Not the one we need */ + return 0; + + /* It's the one we need! Yay! */ + assert(!w->found); + w->found = sd_device_ref(device); + r = 0; + +finish: + return sd_event_exit(sd_device_monitor_get_event(monitor), r); +} + +static int wait_for_partition_device( + sd_device *parent, + blkid_partition pp, + usec_t deadline, + sd_device **ret) { + + _cleanup_(sd_event_source_unrefp) sd_event_source *timeout_source = NULL; + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL; + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + int r; + + assert(parent); + assert(pp); + assert(ret); + + r = find_partition(parent, pp, ret); + if (r != -ENXIO) + return r; + + r = sd_event_new(&event); + if (r < 0) + return r; + + r = sd_device_monitor_new(&monitor); + if (r < 0) + return r; + + r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, "block", "partition"); + if (r < 0) + return r; + + r = sd_device_monitor_attach_event(monitor, event); + if (r < 0) + return r; + + _cleanup_(wait_data_done) struct wait_data w = { + .parent_device = parent, + .blkidp = pp, + }; + + r = sd_device_monitor_start(monitor, device_monitor_handler, &w); + if (r < 0) + return r; + + /* Check again, the partition might have appeared in the meantime */ + r = find_partition(parent, pp, ret); + if (r != -ENXIO) + return r; + + if (deadline != USEC_INFINITY) { + r = sd_event_add_time( + event, &timeout_source, + CLOCK_MONOTONIC, deadline, 0, + NULL, INT_TO_PTR(-ETIMEDOUT)); + if (r < 0) + return r; + } + + r = sd_event_loop(event); + if (r < 0) + return r; + + assert(w.found); + *ret = TAKE_PTR(w.found); + return 0; +} + +static void check_partition_flags( + const char *node, + unsigned long long pflags, + unsigned long long supported) { + + assert(node); + + /* Mask away all flags supported by this partition's type and the three flags the UEFI spec defines generically */ + pflags &= ~(supported | GPT_FLAG_REQUIRED_PARTITION | GPT_FLAG_NO_BLOCK_IO_PROTOCOL | GPT_FLAG_LEGACY_BIOS_BOOTABLE); + + if (pflags == 0) + return; + + /* If there are other bits set, then log about it, to make things discoverable */ + for (unsigned i = 0; i < sizeof(pflags) * 8; i++) { + unsigned long long bit = 1ULL << i; + if (!FLAGS_SET(pflags, bit)) + continue; + + log_debug("Unexpected partition flag %llu set on %s!", bit, node); + } +} + +static int device_wait_for_initialization_harder( + sd_device *device, + const char *subsystem, + usec_t deadline, + sd_device **ret) { + + _cleanup_free_ char *uevent = NULL; + usec_t start, left, retrigger_timeout; + int r; + + start = now(CLOCK_MONOTONIC); + left = usec_sub_unsigned(deadline, start); + + if (DEBUG_LOGGING) { + char buf[FORMAT_TIMESPAN_MAX]; + const char *sn = NULL; + + (void) sd_device_get_sysname(device, &sn); + log_debug("Waiting for device '%s' to initialize for %s.", strna(sn), format_timespan(buf, sizeof(buf), left, 0)); + } + + if (left != USEC_INFINITY) + retrigger_timeout = CLAMP(left / 4, 1 * USEC_PER_SEC, 5 * USEC_PER_SEC); /* A fourth of the total timeout, but let's clamp to 1s…5s range */ + else + retrigger_timeout = 2 * USEC_PER_SEC; + + for (;;) { + usec_t local_deadline, n; + bool last_try; + + n = now(CLOCK_MONOTONIC); + assert(n >= start); + + /* Find next deadline, when we'll retrigger */ + local_deadline = start + + DIV_ROUND_UP(n - start, retrigger_timeout) * retrigger_timeout; + + if (deadline != USEC_INFINITY && deadline <= local_deadline) { + local_deadline = deadline; + last_try = true; + } else + last_try = false; + + r = device_wait_for_initialization(device, subsystem, local_deadline, ret); + if (r >= 0 && DEBUG_LOGGING) { + char buf[FORMAT_TIMESPAN_MAX]; + const char *sn = NULL; + + (void) sd_device_get_sysname(device, &sn); + log_debug("Successfully waited for device '%s' to initialize for %s.", strna(sn), format_timespan(buf, sizeof(buf), usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0)); + + } + if (r != -ETIMEDOUT || last_try) + return r; + + if (!uevent) { + const char *syspath; + + r = sd_device_get_syspath(device, &syspath); + if (r < 0) + return r; + + uevent = path_join(syspath, "uevent"); + if (!uevent) + return -ENOMEM; + } + + if (DEBUG_LOGGING) { + char buf[FORMAT_TIMESPAN_MAX]; + + log_debug("Device didn't initialize within %s, assuming lost event. Retriggering device through %s.", + format_timespan(buf, sizeof(buf), usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0), + uevent); + } + + r = write_string_file(uevent, "change", WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + } +} +#endif + +#define DEVICE_TIMEOUT_USEC (45 * USEC_PER_SEC) + +int dissect_image( + int fd, + const VeritySettings *verity, + const MountOptions *mount_options, + DissectImageFlags flags, + DissectedImage **ret) { + +#if HAVE_BLKID +#ifdef GPT_ROOT_NATIVE + sd_id128_t root_uuid = SD_ID128_NULL, root_verity_uuid = SD_ID128_NULL; +#endif +#ifdef GPT_USR_NATIVE + sd_id128_t usr_uuid = SD_ID128_NULL, usr_verity_uuid = SD_ID128_NULL; +#endif + bool is_gpt, is_mbr, generic_rw, multiple_generic = false; + _cleanup_(sd_device_unrefp) sd_device *d = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; + _cleanup_(blkid_free_probep) blkid_probe b = NULL; + _cleanup_free_ char *generic_node = NULL; + sd_id128_t generic_uuid = SD_ID128_NULL; + const char *pttype = NULL; + blkid_partlist pl; + int r, generic_nr, n_partitions; + struct stat st; + usec_t deadline; + + assert(fd >= 0); + assert(ret); + assert(!verity || verity->root_hash || verity->root_hash_size == 0); + assert(!((flags & DISSECT_IMAGE_GPT_ONLY) && (flags & DISSECT_IMAGE_NO_PARTITION_TABLE))); + + /* Probes a disk image, and returns information about what it found in *ret. + * + * Returns -ENOPKG if no suitable partition table or file system could be found. + * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */ + + if (verity && verity->root_hash) { + sd_id128_t fsuuid, vuuid; + + /* If a root hash is supplied, then we use the root partition that has a UUID that match the + * first 128bit of the root hash. And we use the verity partition that has a UUID that match + * the final 128bit. */ + + if (verity->root_hash_size < sizeof(sd_id128_t)) + return -EINVAL; + + memcpy(&fsuuid, verity->root_hash, sizeof(sd_id128_t)); + memcpy(&vuuid, (const uint8_t*) verity->root_hash + verity->root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t)); + + if (sd_id128_is_null(fsuuid)) + return -EINVAL; + if (sd_id128_is_null(vuuid)) + return -EINVAL; + + /* If the verity data declares it's for the /usr partition, then search for that, in all + * other cases assume it's for the root partition. */ +#ifdef GPT_USR_NATIVE + if (verity->designator == PARTITION_USR) { + usr_uuid = fsuuid; + usr_verity_uuid = vuuid; + } else { +#endif +#ifdef GPT_ROOT_NATIVE + root_uuid = fsuuid; + root_verity_uuid = vuuid; +#endif +#ifdef GPT_USR_NATIVE + } +#endif + } + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISBLK(st.st_mode)) + return -ENOTBLK; + + r = sd_device_new_from_devnum(&d, 'b', st.st_rdev); + if (r < 0) + return r; + + if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) { + _cleanup_(sd_device_unrefp) sd_device *initialized = NULL; + + /* If udev support is enabled, then let's wait for the device to be initialized before we doing anything. */ + + r = device_wait_for_initialization_harder( + d, + "block", + usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC), + &initialized); + if (r < 0) + return r; + + sd_device_unref(d); + d = TAKE_PTR(initialized); + } + + b = blkid_new_probe(); + if (!b) + return -ENOMEM; + + errno = 0; + r = blkid_probe_set_device(b, fd, 0, 0); + if (r != 0) + return errno_or_else(ENOMEM); + + if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) { + /* Look for file system superblocks, unless we only shall look for GPT partition tables */ + blkid_probe_enable_superblocks(b, 1); + blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE); + } + + blkid_probe_enable_partitions(b, 1); + blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); + + errno = 0; + r = blkid_do_safeprobe(b); + if (IN_SET(r, -2, 1)) + return log_debug_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to identify any partition table."); + if (r != 0) + return errno_or_else(EIO); + + m = new0(DissectedImage, 1); + if (!m) + return -ENOMEM; + + if ((!(flags & DISSECT_IMAGE_GPT_ONLY) && + (flags & DISSECT_IMAGE_REQUIRE_ROOT)) || + (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)) { + const char *usage = NULL; + + /* If flags permit this, also allow using non-partitioned single-filesystem images */ + + (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL); + if (STRPTR_IN_SET(usage, "filesystem", "crypto")) { + const char *fstype = NULL, *options = NULL, *devname = NULL; + _cleanup_free_ char *t = NULL, *n = NULL, *o = NULL; + + /* OK, we have found a file system, that's our root partition then. */ + (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); + + if (fstype) { + t = strdup(fstype); + if (!t) + return -ENOMEM; + } + + r = sd_device_get_devname(d, &devname); + if (r < 0) + return r; + + n = strdup(devname); + if (!n) + return -ENOMEM; + + m->single_file_system = true; + m->verity = verity && verity->root_hash && verity->data_path && (verity->designator < 0 || verity->designator == PARTITION_ROOT); + m->can_verity = verity && verity->data_path; + + options = mount_options_from_designator(mount_options, PARTITION_ROOT); + if (options) { + o = strdup(options); + if (!o) + return -ENOMEM; + } + + m->partitions[PARTITION_ROOT] = (DissectedPartition) { + .found = true, + .rw = !m->verity, + .partno = -1, + .architecture = _ARCHITECTURE_INVALID, + .fstype = TAKE_PTR(t), + .node = TAKE_PTR(n), + .mount_options = TAKE_PTR(o), + }; + + m->encrypted = streq_ptr(fstype, "crypto_LUKS"); + + *ret = TAKE_PTR(m); + return 0; + } + } + + (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL); + if (!pttype) + return -ENOPKG; + + is_gpt = streq_ptr(pttype, "gpt"); + is_mbr = streq_ptr(pttype, "dos"); + + if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr)) + return -ENOPKG; + + /* Safety check: refuse block devices that carry a partition table but for which the kernel doesn't + * do partition scanning. */ + r = blockdev_partscan_enabled(fd); + if (r < 0) + return r; + if (r == 0) + return -EPROTONOSUPPORT; + + errno = 0; + pl = blkid_probe_get_partitions(b); + if (!pl) + return errno_or_else(ENOMEM); + + errno = 0; + n_partitions = blkid_partlist_numof_partitions(pl); + if (n_partitions < 0) + return errno_or_else(EIO); + + deadline = usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC); + for (int i = 0; i < n_partitions; i++) { + _cleanup_(sd_device_unrefp) sd_device *q = NULL; + unsigned long long pflags; + blkid_partition pp; + const char *node; + int nr; + + errno = 0; + pp = blkid_partlist_get_partition(pl, i); + if (!pp) + return errno_or_else(EIO); + + r = wait_for_partition_device(d, pp, deadline, &q); + if (r < 0) + return r; + + r = sd_device_get_devname(q, &node); + if (r < 0) + return r; + + pflags = blkid_partition_get_flags(pp); + + errno = 0; + nr = blkid_partition_get_partno(pp); + if (nr < 0) + return errno_or_else(EIO); + + if (is_gpt) { + PartitionDesignator designator = _PARTITION_DESIGNATOR_INVALID; + int architecture = _ARCHITECTURE_INVALID; + const char *stype, *sid, *fstype = NULL; + sd_id128_t type_id, id; + bool rw = true; + + sid = blkid_partition_get_uuid(pp); + if (!sid) + continue; + if (sd_id128_from_string(sid, &id) < 0) + continue; + + stype = blkid_partition_get_type_string(pp); + if (!stype) + continue; + if (sd_id128_from_string(stype, &type_id) < 0) + continue; + + if (sd_id128_equal(type_id, GPT_HOME)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + designator = PARTITION_HOME; + rw = !(pflags & GPT_FLAG_READ_ONLY); + + } else if (sd_id128_equal(type_id, GPT_SRV)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + designator = PARTITION_SRV; + rw = !(pflags & GPT_FLAG_READ_ONLY); + + } else if (sd_id128_equal(type_id, GPT_ESP)) { + + /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is + * not defined there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as + * recommended by the UEFI spec (See "12.3.3 Number and Location of System + * Partitions"). */ + + if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL) + continue; + + designator = PARTITION_ESP; + fstype = "vfat"; + + } else if (sd_id128_equal(type_id, GPT_XBOOTLDR)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + designator = PARTITION_XBOOTLDR; + rw = !(pflags & GPT_FLAG_READ_ONLY); + } +#ifdef GPT_ROOT_NATIVE + else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + /* If a root ID is specified, ignore everything but the root id */ + if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id)) + continue; + + designator = PARTITION_ROOT; + architecture = native_architecture(); + rw = !(pflags & GPT_FLAG_READ_ONLY); + + } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + m->can_verity = true; + + /* Ignore verity unless a root hash is specified */ + if (sd_id128_is_null(root_verity_uuid) || !sd_id128_equal(root_verity_uuid, id)) + continue; + + designator = PARTITION_ROOT_VERITY; + fstype = "DM_verity_hash"; + architecture = native_architecture(); + rw = false; + } +#endif +#ifdef GPT_ROOT_SECONDARY + else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + /* If a root ID is specified, ignore everything but the root id */ + if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id)) + continue; + + designator = PARTITION_ROOT_SECONDARY; + architecture = SECONDARY_ARCHITECTURE; + rw = !(pflags & GPT_FLAG_READ_ONLY); + + } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + m->can_verity = true; + + /* Ignore verity unless root has is specified */ + if (sd_id128_is_null(root_verity_uuid) || !sd_id128_equal(root_verity_uuid, id)) + continue; + + designator = PARTITION_ROOT_SECONDARY_VERITY; + fstype = "DM_verity_hash"; + architecture = SECONDARY_ARCHITECTURE; + rw = false; + } +#endif +#ifdef GPT_USR_NATIVE + else if (sd_id128_equal(type_id, GPT_USR_NATIVE)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + /* If a usr ID is specified, ignore everything but the usr id */ + if (!sd_id128_is_null(usr_uuid) && !sd_id128_equal(usr_uuid, id)) + continue; + + designator = PARTITION_USR; + architecture = native_architecture(); + rw = !(pflags & GPT_FLAG_READ_ONLY); + + } else if (sd_id128_equal(type_id, GPT_USR_NATIVE_VERITY)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + m->can_verity = true; + + /* Ignore verity unless a usr hash is specified */ + if (sd_id128_is_null(usr_verity_uuid) || !sd_id128_equal(usr_verity_uuid, id)) + continue; + + designator = PARTITION_USR_VERITY; + fstype = "DM_verity_hash"; + architecture = native_architecture(); + rw = false; + } +#endif +#ifdef GPT_USR_SECONDARY + else if (sd_id128_equal(type_id, GPT_USR_SECONDARY)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + /* If a usr ID is specified, ignore everything but the usr id */ + if (!sd_id128_is_null(usr_uuid) && !sd_id128_equal(usr_uuid, id)) + continue; + + designator = PARTITION_USR_SECONDARY; + architecture = SECONDARY_ARCHITECTURE; + rw = !(pflags & GPT_FLAG_READ_ONLY); + + } else if (sd_id128_equal(type_id, GPT_USR_SECONDARY_VERITY)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + m->can_verity = true; + + /* Ignore verity unless usr has is specified */ + if (sd_id128_is_null(usr_verity_uuid) || !sd_id128_equal(usr_verity_uuid, id)) + continue; + + designator = PARTITION_USR_SECONDARY_VERITY; + fstype = "DM_verity_hash"; + architecture = SECONDARY_ARCHITECTURE; + rw = false; + } +#endif + else if (sd_id128_equal(type_id, GPT_SWAP)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + designator = PARTITION_SWAP; + fstype = "swap"; + + } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + if (generic_node) + multiple_generic = true; + else { + generic_nr = nr; + generic_rw = !(pflags & GPT_FLAG_READ_ONLY); + generic_uuid = id; + generic_node = strdup(node); + if (!generic_node) + return -ENOMEM; + } + + } else if (sd_id128_equal(type_id, GPT_TMP)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + designator = PARTITION_TMP; + rw = !(pflags & GPT_FLAG_READ_ONLY); + + } else if (sd_id128_equal(type_id, GPT_VAR)) { + + check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY); + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + if (!FLAGS_SET(flags, DISSECT_IMAGE_RELAX_VAR_CHECK)) { + sd_id128_t var_uuid; + + /* For /var we insist that the uuid of the partition matches the + * HMAC-SHA256 of the /var GPT partition type uuid, keyed by machine + * ID. Why? Unlike the other partitions /var is inherently + * installation specific, hence we need to be careful not to mount it + * in the wrong installation. By hashing the partition UUID from + * /etc/machine-id we can securely bind the partition to the + * installation. */ + + r = sd_id128_get_machine_app_specific(GPT_VAR, &var_uuid); + if (r < 0) + return r; + + if (!sd_id128_equal(var_uuid, id)) { + log_debug("Found a /var/ partition, but its UUID didn't match our expectations, ignoring."); + continue; + } + } + + designator = PARTITION_VAR; + rw = !(pflags & GPT_FLAG_READ_ONLY); + } + + if (designator != _PARTITION_DESIGNATOR_INVALID) { + _cleanup_free_ char *t = NULL, *n = NULL, *o = NULL; + const char *options = NULL; + + /* First one wins */ + if (m->partitions[designator].found) + continue; + + if (fstype) { + t = strdup(fstype); + if (!t) + return -ENOMEM; + } + + n = strdup(node); + if (!n) + return -ENOMEM; + + options = mount_options_from_designator(mount_options, designator); + if (options) { + o = strdup(options); + if (!o) + return -ENOMEM; + } + + m->partitions[designator] = (DissectedPartition) { + .found = true, + .partno = nr, + .rw = rw, + .architecture = architecture, + .node = TAKE_PTR(n), + .fstype = TAKE_PTR(t), + .uuid = id, + .mount_options = TAKE_PTR(o), + }; + } + + } else if (is_mbr) { + + switch (blkid_partition_get_type(pp)) { + + case 0x83: /* Linux partition */ + + if (pflags != 0x80) /* Bootable flag */ + continue; + + if (generic_node) + multiple_generic = true; + else { + generic_nr = nr; + generic_rw = true; + generic_node = strdup(node); + if (!generic_node) + return -ENOMEM; + } + + break; + + case 0xEA: { /* Boot Loader Spec extended $BOOT partition */ + _cleanup_free_ char *n = NULL, *o = NULL; + sd_id128_t id = SD_ID128_NULL; + const char *sid, *options = NULL; + + /* First one wins */ + if (m->partitions[PARTITION_XBOOTLDR].found) + continue; + + sid = blkid_partition_get_uuid(pp); + if (sid) + (void) sd_id128_from_string(sid, &id); + + n = strdup(node); + if (!n) + return -ENOMEM; + + options = mount_options_from_designator(mount_options, PARTITION_XBOOTLDR); + if (options) { + o = strdup(options); + if (!o) + return -ENOMEM; + } + + m->partitions[PARTITION_XBOOTLDR] = (DissectedPartition) { + .found = true, + .partno = nr, + .rw = true, + .architecture = _ARCHITECTURE_INVALID, + .node = TAKE_PTR(n), + .uuid = id, + .mount_options = TAKE_PTR(o), + }; + + break; + }} + } + } + + if (m->partitions[PARTITION_ROOT].found) { + /* If we found the primary arch, then invalidate the secondary arch to avoid any ambiguities, + * since we never want to mount the secondary arch in this case. */ + m->partitions[PARTITION_ROOT_SECONDARY].found = false; + m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false; + m->partitions[PARTITION_USR_SECONDARY].found = false; + m->partitions[PARTITION_USR_SECONDARY_VERITY].found = false; + } else { + /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not + * either, then check if there's a single generic one, and use that. */ + + if (m->partitions[PARTITION_ROOT_VERITY].found) + return -EADDRNOTAVAIL; + + /* We didn't find a primary architecture root, but we found a primary architecture /usr? Refuse that for now. */ + if (m->partitions[PARTITION_USR].found || m->partitions[PARTITION_USR_VERITY].found) + return -EADDRNOTAVAIL; + + if (m->partitions[PARTITION_ROOT_SECONDARY].found) { + /* Upgrade secondary arch to first */ + m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY]; + zero(m->partitions[PARTITION_ROOT_SECONDARY]); + m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY]; + zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]); + + m->partitions[PARTITION_USR] = m->partitions[PARTITION_USR_SECONDARY]; + zero(m->partitions[PARTITION_USR_SECONDARY]); + m->partitions[PARTITION_USR_VERITY] = m->partitions[PARTITION_USR_SECONDARY_VERITY]; + zero(m->partitions[PARTITION_USR_SECONDARY_VERITY]); + + } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) { + _cleanup_free_ char *o = NULL; + const char *options = NULL; + + /* If the root hash was set, then we won't fall back to a generic node, because the + * root hash decides. */ + if (verity && verity->root_hash) + return -EADDRNOTAVAIL; + + /* If we didn't find a generic node, then we can't fix this up either */ + if (!generic_node) + return -ENXIO; + + /* If we didn't find a properly marked root partition, but we did find a single suitable + * generic Linux partition, then use this as root partition, if the caller asked for it. */ + if (multiple_generic) + return -ENOTUNIQ; + + options = mount_options_from_designator(mount_options, PARTITION_ROOT); + if (options) { + o = strdup(options); + if (!o) + return -ENOMEM; + } + + m->partitions[PARTITION_ROOT] = (DissectedPartition) { + .found = true, + .rw = generic_rw, + .partno = generic_nr, + .architecture = _ARCHITECTURE_INVALID, + .node = TAKE_PTR(generic_node), + .uuid = generic_uuid, + .mount_options = TAKE_PTR(o), + }; + } + } + + /* Refuse if we found a verity partition for /usr but no matching file system partition */ + if (!m->partitions[PARTITION_USR].found && m->partitions[PARTITION_USR_VERITY].found) + return -EADDRNOTAVAIL; + + /* Combinations of verity /usr with verity-less root is OK, but the reverse is not */ + if (m->partitions[PARTITION_ROOT_VERITY].found && m->partitions[PARTITION_USR].found && !m->partitions[PARTITION_USR_VERITY].found) + return -EADDRNOTAVAIL; + + if (verity && verity->root_hash) { + if (verity->designator < 0 || verity->designator == PARTITION_ROOT) { + if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found) + return -EADDRNOTAVAIL; + + /* If we found a verity setup, then the root partition is necessarily read-only. */ + m->partitions[PARTITION_ROOT].rw = false; + m->verity = true; + } + + if (verity->designator == PARTITION_USR) { + if (!m->partitions[PARTITION_USR_VERITY].found || !m->partitions[PARTITION_USR].found) + return -EADDRNOTAVAIL; + + m->partitions[PARTITION_USR].rw = false; + m->verity = true; + } + } + + blkid_free_probe(b); + b = NULL; + + /* Fill in file system types if we don't know them yet. */ + for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { + DissectedPartition *p = m->partitions + i; + + if (!p->found) + continue; + + if (!p->fstype && p->node) { + r = probe_filesystem(p->node, &p->fstype); + if (r < 0 && r != -EUCLEAN) + return r; + } + + if (streq_ptr(p->fstype, "crypto_LUKS")) + m->encrypted = true; + + if (p->fstype && fstype_is_ro(p->fstype)) + p->rw = false; + } + + *ret = TAKE_PTR(m); + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +DissectedImage* dissected_image_unref(DissectedImage *m) { + if (!m) + return NULL; + + for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { + free(m->partitions[i].fstype); + free(m->partitions[i].node); + free(m->partitions[i].decrypted_fstype); + free(m->partitions[i].decrypted_node); + free(m->partitions[i].mount_options); + } + + free(m->hostname); + strv_free(m->machine_info); + strv_free(m->os_release); + + return mfree(m); +} + +static int is_loop_device(const char *path) { + char s[SYS_BLOCK_PATH_MAX("/../loop/")]; + struct stat st; + + assert(path); + + if (stat(path, &st) < 0) + return -errno; + + if (!S_ISBLK(st.st_mode)) + return -ENOTBLK; + + xsprintf_sys_block_path(s, "/loop/", st.st_dev); + if (access(s, F_OK) < 0) { + if (errno != ENOENT) + return -errno; + + /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */ + xsprintf_sys_block_path(s, "/../loop/", st.st_dev); + if (access(s, F_OK) < 0) + return errno == ENOENT ? false : -errno; + } + + return true; +} + +static int run_fsck(const char *node, const char *fstype) { + int r, exit_status; + pid_t pid; + + assert(node); + assert(fstype); + + r = fsck_exists(fstype); + if (r < 0) { + log_debug_errno(r, "Couldn't determine whether fsck for %s exists, proceeding anyway.", fstype); + return 0; + } + if (r == 0) { + log_debug("Not checking partition %s, as fsck for %s does not exist.", node, fstype); + return 0; + } + + r = safe_fork("(fsck)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_NULL_STDIO, &pid); + if (r < 0) + return log_debug_errno(r, "Failed to fork off fsck: %m"); + if (r == 0) { + /* Child */ + execl("/sbin/fsck", "/sbin/fsck", "-aT", node, NULL); + log_debug_errno(errno, "Failed to execl() fsck: %m"); + _exit(FSCK_OPERATIONAL_ERROR); + } + + exit_status = wait_for_terminate_and_check("fsck", pid, 0); + if (exit_status < 0) + return log_debug_errno(exit_status, "Failed to fork off /sbin/fsck: %m"); + + if ((exit_status & ~FSCK_ERROR_CORRECTED) != FSCK_SUCCESS) { + log_debug("fsck failed with exit status %i.", exit_status); + + if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0) + return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN), "File system is corrupted, refusing."); + + log_debug("Ignoring fsck error."); + } + + return 0; +} + +static int mount_partition( + DissectedPartition *m, + const char *where, + const char *directory, + uid_t uid_shift, + DissectImageFlags flags) { + + _cleanup_free_ char *chased = NULL, *options = NULL; + const char *p, *node, *fstype; + bool rw; + int r; + + assert(m); + assert(where); + + /* Use decrypted node and matching fstype if available, otherwise use the original device */ + node = m->decrypted_node ?: m->node; + fstype = m->decrypted_node ? m->decrypted_fstype: m->fstype; + + if (!m->found || !node) + return 0; + if (!fstype) + return -EAFNOSUPPORT; + + /* We are looking at an encrypted partition? This either means stacked encryption, or the caller didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error for this case. */ + if (streq(fstype, "crypto_LUKS")) + return -EUNATCH; + + rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY); + + if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw) { + r = run_fsck(node, fstype); + if (r < 0) + return r; + } + + if (directory) { + if (!FLAGS_SET(flags, DISSECT_IMAGE_READ_ONLY)) { + /* Automatically create missing mount points, if necessary. */ + r = mkdir_p_root(where, directory, uid_shift, (gid_t) uid_shift, 0755); + if (r < 0) + return r; + } + + r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased, NULL); + if (r < 0) + return r; + + p = chased; + } else + p = where; + + /* If requested, turn on discard support. */ + if (fstype_can_discard(fstype) && + ((flags & DISSECT_IMAGE_DISCARD) || + ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node) > 0))) { + options = strdup("discard"); + if (!options) + return -ENOMEM; + } + + if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) { + _cleanup_free_ char *uid_option = NULL; + + if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0) + return -ENOMEM; + + if (!strextend_with_separator(&options, ",", uid_option, NULL)) + return -ENOMEM; + } + + if (!isempty(m->mount_options)) + if (!strextend_with_separator(&options, ",", m->mount_options, NULL)) + return -ENOMEM; + + if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) { + r = mkdir_p(p, 0755); + if (r < 0) + return r; + } + + r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options); + if (r < 0) + return r; + + return 1; +} + +int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) { + int r, xbootldr_mounted; + + assert(m); + assert(where); + + /* Returns: + * + * -ENXIO → No root partition found + * -EMEDIUMTYPE → DISSECT_IMAGE_VALIDATE_OS set but no os-release file found + * -EUNATCH → Encrypted partition found for which no dm-crypt was set up yet + * -EUCLEAN → fsck for file system failed + * -EBUSY → File system already mounted/used elsewhere (kernel) + * -EAFNOSUPPORT → File system type not supported or not known + */ + + if (!m->partitions[PARTITION_ROOT].found) + return -ENXIO; + + if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) { + r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags); + if (r < 0) + return r; + } + + /* Mask DISSECT_IMAGE_MKDIR for all subdirs: the idea is that only the top-level mount point is + * created if needed, but the image itself not modified. */ + flags &= ~DISSECT_IMAGE_MKDIR; + + if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) { + /* For us mounting root always means mounting /usr as well */ + r = mount_partition(m->partitions + PARTITION_USR, where, "/usr", uid_shift, flags); + if (r < 0) + return r; + + if (flags & DISSECT_IMAGE_VALIDATE_OS) { + r = path_is_os_tree(where); + if (r < 0) + return r; + if (r == 0) + return -EMEDIUMTYPE; + } + } + + if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY) + return 0; + + r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags); + if (r < 0) + return r; + + r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags); + if (r < 0) + return r; + + r = mount_partition(m->partitions + PARTITION_VAR, where, "/var", uid_shift, flags); + if (r < 0) + return r; + + r = mount_partition(m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, flags); + if (r < 0) + return r; + + xbootldr_mounted = mount_partition(m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, flags); + if (xbootldr_mounted < 0) + return xbootldr_mounted; + + if (m->partitions[PARTITION_ESP].found) { + int esp_done = false; + + /* Mount the ESP to /efi if it exists. If it doesn't exist, use /boot instead, but only if it + * exists and is empty, and we didn't already mount the XBOOTLDR partition into it. */ + + r = chase_symlinks("/efi", where, CHASE_PREFIX_ROOT, NULL, NULL); + if (r < 0) { + if (r != -ENOENT) + return r; + + /* /efi doesn't exist. Let's see if /boot is suitable then */ + + if (!xbootldr_mounted) { + _cleanup_free_ char *p = NULL; + + r = chase_symlinks("/boot", where, CHASE_PREFIX_ROOT, &p, NULL); + if (r < 0) { + if (r != -ENOENT) + return r; + } else if (dir_is_empty(p) > 0) { + /* It exists and is an empty directory. Let's mount the ESP there. */ + r = mount_partition(m->partitions + PARTITION_ESP, where, "/boot", uid_shift, flags); + if (r < 0) + return r; + + esp_done = true; + } + } + } + + if (!esp_done) { + /* OK, let's mount the ESP now to /efi (possibly creating the dir if missing) */ + + r = mount_partition(m->partitions + PARTITION_ESP, where, "/efi", uid_shift, flags); + if (r < 0) + return r; + } + } + + return 0; +} + +int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) { + int r; + + assert(m); + assert(where); + + r = dissected_image_mount(m, where, uid_shift, flags); + if (r == -ENXIO) + return log_error_errno(r, "Not root file system found in image."); + if (r == -EMEDIUMTYPE) + return log_error_errno(r, "No suitable os-release file in image found."); + if (r == -EUNATCH) + return log_error_errno(r, "Encrypted file system discovered, but decryption not requested."); + if (r == -EUCLEAN) + return log_error_errno(r, "File system check on image failed."); + if (r == -EBUSY) + return log_error_errno(r, "File system already mounted elsewhere."); + if (r == -EAFNOSUPPORT) + return log_error_errno(r, "File system type not supported or not known."); + if (r < 0) + return log_error_errno(r, "Failed to mount image: %m"); + + return r; +} + +#if HAVE_LIBCRYPTSETUP +typedef struct DecryptedPartition { + struct crypt_device *device; + char *name; + bool relinquished; +} DecryptedPartition; + +struct DecryptedImage { + DecryptedPartition *decrypted; + size_t n_decrypted; + size_t n_allocated; +}; +#endif + +DecryptedImage* decrypted_image_unref(DecryptedImage* d) { +#if HAVE_LIBCRYPTSETUP + size_t i; + int r; + + if (!d) + return NULL; + + for (i = 0; i < d->n_decrypted; i++) { + DecryptedPartition *p = d->decrypted + i; + + if (p->device && p->name && !p->relinquished) { + r = sym_crypt_deactivate_by_name(p->device, p->name, 0); + if (r < 0) + log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name); + } + + if (p->device) + sym_crypt_free(p->device); + free(p->name); + } + + free(d); +#endif + return NULL; +} + +#if HAVE_LIBCRYPTSETUP + +static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) { + _cleanup_free_ char *name = NULL, *node = NULL; + const char *base; + + assert(original_node); + assert(suffix); + assert(ret_name); + assert(ret_node); + + base = strrchr(original_node, '/'); + if (!base) + base = original_node; + else + base++; + if (isempty(base)) + return -EINVAL; + + name = strjoin(base, suffix); + if (!name) + return -ENOMEM; + if (!filename_is_valid(name)) + return -EINVAL; + + node = path_join(sym_crypt_get_dir(), name); + if (!node) + return -ENOMEM; + + *ret_name = TAKE_PTR(name); + *ret_node = TAKE_PTR(node); + + return 0; +} + +static int decrypt_partition( + DissectedPartition *m, + const char *passphrase, + DissectImageFlags flags, + DecryptedImage *d) { + + _cleanup_free_ char *node = NULL, *name = NULL; + _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL; + int r; + + assert(m); + assert(d); + + if (!m->found || !m->node || !m->fstype) + return 0; + + if (!streq(m->fstype, "crypto_LUKS")) + return 0; + + if (!passphrase) + return -ENOKEY; + + r = dlopen_cryptsetup(); + if (r < 0) + return r; + + r = make_dm_name_and_node(m->node, "-decrypted", &name, &node); + if (r < 0) + return r; + + if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1)) + return -ENOMEM; + + r = sym_crypt_init(&cd, m->node); + if (r < 0) + return log_debug_errno(r, "Failed to initialize dm-crypt: %m"); + + cryptsetup_enable_logging(cd); + + r = sym_crypt_load(cd, CRYPT_LUKS, NULL); + if (r < 0) + return log_debug_errno(r, "Failed to load LUKS metadata: %m"); + + r = sym_crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase), + ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) | + ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0)); + if (r < 0) { + log_debug_errno(r, "Failed to activate LUKS device: %m"); + return r == -EPERM ? -EKEYREJECTED : r; + } + + d->decrypted[d->n_decrypted++] = (DecryptedPartition) { + .name = TAKE_PTR(name), + .device = TAKE_PTR(cd), + }; + + m->decrypted_node = TAKE_PTR(node); + + return 0; +} + +static int verity_can_reuse( + const VeritySettings *verity, + const char *name, + struct crypt_device **ret_cd) { + + /* If the same volume was already open, check that the root hashes match, and reuse it if they do */ + _cleanup_free_ char *root_hash_existing = NULL; + _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL; + struct crypt_params_verity crypt_params = {}; + size_t root_hash_existing_size; + int r; + + assert(verity); + assert(name); + assert(ret_cd); + + r = sym_crypt_init_by_name(&cd, name); + if (r < 0) + return log_debug_errno(r, "Error opening verity device, crypt_init_by_name failed: %m"); + + r = sym_crypt_get_verity_info(cd, &crypt_params); + if (r < 0) + return log_debug_errno(r, "Error opening verity device, crypt_get_verity_info failed: %m"); + + root_hash_existing_size = verity->root_hash_size; + root_hash_existing = malloc0(root_hash_existing_size); + if (!root_hash_existing) + return -ENOMEM; + + r = sym_crypt_volume_key_get(cd, CRYPT_ANY_SLOT, root_hash_existing, &root_hash_existing_size, NULL, 0); + if (r < 0) + return log_debug_errno(r, "Error opening verity device, crypt_volume_key_get failed: %m"); + if (verity->root_hash_size != root_hash_existing_size || + memcmp(root_hash_existing, verity->root_hash, verity->root_hash_size) != 0) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but root hashes are different."); + +#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY + /* Ensure that, if signatures are supported, we only reuse the device if the previous mount used the + * same settings, so that a previous unsigned mount will not be reused if the user asks to use + * signing for the new one, and viceversa. */ + if (!!verity->root_hash_sig != !!(crypt_params.flags & CRYPT_VERITY_ROOT_HASH_SIGNATURE)) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but signature settings are not the same."); +#endif + + *ret_cd = TAKE_PTR(cd); + return 0; +} + +static inline void dm_deferred_remove_clean(char *name) { + if (!name) + return; + + (void) sym_crypt_deactivate_by_name(NULL, name, CRYPT_DEACTIVATE_DEFERRED); + free(name); +} +DEFINE_TRIVIAL_CLEANUP_FUNC(char *, dm_deferred_remove_clean); + +static int verity_partition( + PartitionDesignator designator, + DissectedPartition *m, + DissectedPartition *v, + const VeritySettings *verity, + DissectImageFlags flags, + DecryptedImage *d) { + + _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL; + _cleanup_(dm_deferred_remove_cleanp) char *restore_deferred_remove = NULL; + _cleanup_free_ char *node = NULL, *name = NULL; + int r; + + assert(m); + assert(v || (verity && verity->data_path)); + + if (!verity || !verity->root_hash) + return 0; + if (!((verity->designator < 0 && designator == PARTITION_ROOT) || + (verity->designator == designator))) + return 0; + + if (!m->found || !m->node || !m->fstype) + return 0; + if (!verity->data_path) { + if (!v->found || !v->node || !v->fstype) + return 0; + + if (!streq(v->fstype, "DM_verity_hash")) + return 0; + } + + r = dlopen_cryptsetup(); + if (r < 0) + return r; + + if (FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE)) { + /* Use the roothash, which is unique per volume, as the device node name, so that it can be reused */ + _cleanup_free_ char *root_hash_encoded = NULL; + + root_hash_encoded = hexmem(verity->root_hash, verity->root_hash_size); + if (!root_hash_encoded) + return -ENOMEM; + + r = make_dm_name_and_node(root_hash_encoded, "-verity", &name, &node); + } else + r = make_dm_name_and_node(m->node, "-verity", &name, &node); + if (r < 0) + return r; + + r = sym_crypt_init(&cd, verity->data_path ?: v->node); + if (r < 0) + return r; + + cryptsetup_enable_logging(cd); + + r = sym_crypt_load(cd, CRYPT_VERITY, NULL); + if (r < 0) + return r; + + r = sym_crypt_set_data_device(cd, m->node); + if (r < 0) + return r; + + if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1)) + return -ENOMEM; + + /* If activating fails because the device already exists, check the metadata and reuse it if it matches. + * In case of ENODEV/ENOENT, which can happen if another process is activating at the exact same time, + * retry a few times before giving up. */ + for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) { + if (verity->root_hash_sig) { +#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY + r = sym_crypt_activate_by_signed_key( + cd, + name, + verity->root_hash, + verity->root_hash_size, + verity->root_hash_sig, + verity->root_hash_sig_size, + CRYPT_ACTIVATE_READONLY); +#else + r = log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "Activation of verity device with signature requested, but not supported by %s due to missing crypt_activate_by_signed_key().", program_invocation_short_name); +#endif + } else + r = sym_crypt_activate_by_volume_key( + cd, + name, + verity->root_hash, + verity->root_hash_size, + CRYPT_ACTIVATE_READONLY); + /* libdevmapper can return EINVAL when the device is already in the activation stage. + * There's no way to distinguish this situation from a genuine error due to invalid + * parameters, so immediately fall back to activating the device with a unique name. + * Improvements in libcrypsetup can ensure this never happens: + * https://gitlab.com/cryptsetup/cryptsetup/-/merge_requests/96 */ + if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE)) + return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d); + if (!IN_SET(r, + 0, /* Success */ + -EEXIST, /* Volume is already open and ready to be used */ + -EBUSY, /* Volume is being opened but not ready, crypt_init_by_name can fetch details */ + -ENODEV /* Volume is being opened but not ready, crypt_init_by_name would fail, try to open again */)) + return r; + if (IN_SET(r, -EEXIST, -EBUSY)) { + struct crypt_device *existing_cd = NULL; + + if (!restore_deferred_remove){ + /* To avoid races, disable automatic removal on umount while setting up the new device. Restore it on failure. */ + r = dm_deferred_remove_cancel(name); + /* If activation returns EBUSY there might be no deferred removal to cancel, that's fine */ + if (r < 0 && r != -ENXIO) + return log_debug_errno(r, "Disabling automated deferred removal for verity device %s failed: %m", node); + if (r == 0) { + restore_deferred_remove = strdup(name); + if (!restore_deferred_remove) + return -ENOMEM; + } + } + + r = verity_can_reuse(verity, name, &existing_cd); + /* Same as above, -EINVAL can randomly happen when it actually means -EEXIST */ + if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE)) + return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d); + if (!IN_SET(r, 0, -ENODEV, -ENOENT, -EBUSY)) + return log_debug_errno(r, "Checking whether existing verity device %s can be reused failed: %m", node); + if (r == 0) { + /* devmapper might say that the device exists, but the devlink might not yet have been + * created. Check and wait for the udev event in that case. */ + r = device_wait_for_devlink(node, "block", usec_add(now(CLOCK_MONOTONIC), 100 * USEC_PER_MSEC), NULL); + /* Fallback to activation with a unique device if it's taking too long */ + if (r == -ETIMEDOUT) + break; + if (r < 0) + return r; + + if (cd) + sym_crypt_free(cd); + cd = existing_cd; + } + } + if (r == 0) + break; + + /* Device is being opened by another process, but it has not finished yet, yield for 2ms */ + (void) usleep(2 * USEC_PER_MSEC); + } + + /* An existing verity device was reported by libcryptsetup/libdevmapper, but we can't use it at this time. + * Fall back to activating it with a unique device name. */ + if (r != 0 && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE)) + return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d); + + /* Everything looks good and we'll be able to mount the device, so deferred remove will be re-enabled at that point. */ + restore_deferred_remove = mfree(restore_deferred_remove); + + d->decrypted[d->n_decrypted++] = (DecryptedPartition) { + .name = TAKE_PTR(name), + .device = TAKE_PTR(cd), + }; + + m->decrypted_node = TAKE_PTR(node); + + return 0; +} +#endif + +int dissected_image_decrypt( + DissectedImage *m, + const char *passphrase, + const VeritySettings *verity, + DissectImageFlags flags, + DecryptedImage **ret) { + +#if HAVE_LIBCRYPTSETUP + _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL; + int r; +#endif + + assert(m); + assert(!verity || verity->root_hash || verity->root_hash_size == 0); + + /* Returns: + * + * = 0 → There was nothing to decrypt + * > 0 → Decrypted successfully + * -ENOKEY → There's something to decrypt but no key was supplied + * -EKEYREJECTED → Passed key was not correct + */ + + if (verity && verity->root_hash && verity->root_hash_size < sizeof(sd_id128_t)) + return -EINVAL; + + if (!m->encrypted && !m->verity) { + *ret = NULL; + return 0; + } + +#if HAVE_LIBCRYPTSETUP + d = new0(DecryptedImage, 1); + if (!d) + return -ENOMEM; + + for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { + DissectedPartition *p = m->partitions + i; + PartitionDesignator k; + + if (!p->found) + continue; + + r = decrypt_partition(p, passphrase, flags, d); + if (r < 0) + return r; + + k = PARTITION_VERITY_OF(i); + if (k >= 0) { + r = verity_partition(i, p, m->partitions + k, verity, flags | DISSECT_IMAGE_VERITY_SHARE, d); + if (r < 0) + return r; + } + + if (!p->decrypted_fstype && p->decrypted_node) { + r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype); + if (r < 0 && r != -EUCLEAN) + return r; + } + } + + *ret = TAKE_PTR(d); + + return 1; +#else + return -EOPNOTSUPP; +#endif +} + +int dissected_image_decrypt_interactively( + DissectedImage *m, + const char *passphrase, + const VeritySettings *verity, + DissectImageFlags flags, + DecryptedImage **ret) { + + _cleanup_strv_free_erase_ char **z = NULL; + int n = 3, r; + + if (passphrase) + n--; + + for (;;) { + r = dissected_image_decrypt(m, passphrase, verity, flags, ret); + if (r >= 0) + return r; + if (r == -EKEYREJECTED) + log_error_errno(r, "Incorrect passphrase, try again!"); + else if (r != -ENOKEY) + return log_error_errno(r, "Failed to decrypt image: %m"); + + if (--n < 0) + return log_error_errno(SYNTHETIC_ERRNO(EKEYREJECTED), + "Too many retries."); + + z = strv_free(z); + + r = ask_password_auto("Please enter image passphrase:", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z); + if (r < 0) + return log_error_errno(r, "Failed to query for passphrase: %m"); + + passphrase = z[0]; + } +} + +int decrypted_image_relinquish(DecryptedImage *d) { + +#if HAVE_LIBCRYPTSETUP + size_t i; + int r; +#endif + + assert(d); + + /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so + * that we don't clean it up ourselves either anymore */ + +#if HAVE_LIBCRYPTSETUP + for (i = 0; i < d->n_decrypted; i++) { + DecryptedPartition *p = d->decrypted + i; + + if (p->relinquished) + continue; + + r = sym_crypt_deactivate_by_name(NULL, p->name, CRYPT_DEACTIVATE_DEFERRED); + if (r < 0) + return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name); + + p->relinquished = true; + } +#endif + + return 0; +} + +static char *build_auxiliary_path(const char *image, const char *suffix) { + const char *e; + char *n; + + assert(image); + assert(suffix); + + e = endswith(image, ".raw"); + if (!e) + return strjoin(e, suffix); + + n = new(char, e - image + strlen(suffix) + 1); + if (!n) + return NULL; + + strcpy(mempcpy(n, image, e - image), suffix); + return n; +} + +void verity_settings_done(VeritySettings *v) { + assert(v); + + v->root_hash = mfree(v->root_hash); + v->root_hash_size = 0; + + v->root_hash_sig = mfree(v->root_hash_sig); + v->root_hash_sig_size = 0; + + v->data_path = mfree(v->data_path); +} + +int verity_settings_load( + VeritySettings *verity, + const char *image, + const char *root_hash_path, + const char *root_hash_sig_path) { + + _cleanup_free_ void *root_hash = NULL, *root_hash_sig = NULL; + size_t root_hash_size = 0, root_hash_sig_size = 0; + _cleanup_free_ char *verity_data_path = NULL; + PartitionDesignator designator; + int r; + + assert(verity); + assert(image); + assert(verity->designator < 0 || IN_SET(verity->designator, PARTITION_ROOT, PARTITION_USR)); + + /* If we are asked to load the root hash for a device node, exit early */ + if (is_device_path(image)) + return 0; + + designator = verity->designator; + + /* We only fill in what isn't already filled in */ + + if (!verity->root_hash) { + _cleanup_free_ char *text = NULL; + + if (root_hash_path) { + /* If explicitly specified it takes precedence */ + r = read_one_line_file(root_hash_path, &text); + if (r < 0) + return r; + + if (designator < 0) + designator = PARTITION_ROOT; + } else { + /* Otherwise look for xattr and separate file, and first for the data for root and if + * that doesn't exist for /usr */ + + if (designator < 0 || designator == PARTITION_ROOT) { + r = getxattr_malloc(image, "user.verity.roothash", &text, true); + if (r < 0) { + _cleanup_free_ char *p = NULL; + + if (!IN_SET(r, -ENODATA, -ENOENT) && !ERRNO_IS_NOT_SUPPORTED(r)) + return r; + + p = build_auxiliary_path(image, ".roothash"); + if (!p) + return -ENOMEM; + + r = read_one_line_file(p, &text); + if (r < 0 && r != -ENOENT) + return r; + } + + if (text) + designator = PARTITION_ROOT; + } + + if (!text && (designator < 0 || designator == PARTITION_USR)) { + /* So in the "roothash" xattr/file name above the "root" of course primarily + * refers to the root of the Verity Merkle tree. But coincidentally it also + * is the hash for the *root* file system, i.e. the "root" neatly refers to + * two distinct concepts called "root". Taking benefit of this happy + * coincidence we call the file with the root hash for the /usr/ file system + * `usrhash`, because `usrroothash` or `rootusrhash` would just be too + * confusing. We thus drop the reference to the root of the Merkle tree, and + * just indicate which file system it's about. */ + r = getxattr_malloc(image, "user.verity.usrhash", &text, true); + if (r < 0) { + _cleanup_free_ char *p = NULL; + + if (!IN_SET(r, -ENODATA, -ENOENT) && !ERRNO_IS_NOT_SUPPORTED(r)) + return r; + + p = build_auxiliary_path(image, ".usrhash"); + if (!p) + return -ENOMEM; + + r = read_one_line_file(p, &text); + if (r < 0 && r != -ENOENT) + return r; + } + + if (text) + designator = PARTITION_USR; + } + } + + if (text) { + r = unhexmem(text, strlen(text), &root_hash, &root_hash_size); + if (r < 0) + return r; + if (root_hash_size < sizeof(sd_id128_t)) + return -EINVAL; + } + } + + if ((root_hash || verity->root_hash) && !verity->root_hash_sig) { + if (root_hash_sig_path) { + r = read_full_file_full(AT_FDCWD, root_hash_sig_path, 0, NULL, (char**) &root_hash_sig, &root_hash_sig_size); + if (r < 0 && r != -ENOENT) + return r; + + if (designator < 0) + designator = PARTITION_ROOT; + } else { + if (designator < 0 || designator == PARTITION_ROOT) { + _cleanup_free_ char *p = NULL; + + /* Follow naming convention recommended by the relevant RFC: + * https://tools.ietf.org/html/rfc5751#section-3.2.1 */ + p = build_auxiliary_path(image, ".roothash.p7s"); + if (!p) + return -ENOMEM; + + r = read_full_file_full(AT_FDCWD, p, 0, NULL, (char**) &root_hash_sig, &root_hash_sig_size); + if (r < 0 && r != -ENOENT) + return r; + if (r >= 0) + designator = PARTITION_ROOT; + } + + if (!root_hash_sig && (designator < 0 || designator == PARTITION_USR)) { + _cleanup_free_ char *p = NULL; + + p = build_auxiliary_path(image, ".usrhash.p7s"); + if (!p) + return -ENOMEM; + + r = read_full_file_full(AT_FDCWD, p, 0, NULL, (char**) &root_hash_sig, &root_hash_sig_size); + if (r < 0 && r != -ENOENT) + return r; + if (r >= 0) + designator = PARTITION_USR; + } + } + + if (root_hash_sig && root_hash_sig_size == 0) /* refuse empty size signatures */ + return -EINVAL; + } + + if (!verity->data_path) { + _cleanup_free_ char *p = NULL; + + p = build_auxiliary_path(image, ".verity"); + if (!p) + return -ENOMEM; + + if (access(p, F_OK) < 0) { + if (errno != ENOENT) + return -errno; + } else + verity_data_path = TAKE_PTR(p); + } + + if (root_hash) { + verity->root_hash = TAKE_PTR(root_hash); + verity->root_hash_size = root_hash_size; + } + + if (root_hash_sig) { + verity->root_hash_sig = TAKE_PTR(root_hash_sig); + verity->root_hash_sig_size = root_hash_sig_size; + } + + if (verity_data_path) + verity->data_path = TAKE_PTR(verity_data_path); + + if (verity->designator < 0) + verity->designator = designator; + + return 1; +} + +int dissected_image_acquire_metadata(DissectedImage *m) { + + enum { + META_HOSTNAME, + META_MACHINE_ID, + META_MACHINE_INFO, + META_OS_RELEASE, + _META_MAX, + }; + + static const char *const paths[_META_MAX] = { + [META_HOSTNAME] = "/etc/hostname\0", + [META_MACHINE_ID] = "/etc/machine-id\0", + [META_MACHINE_INFO] = "/etc/machine-info\0", + [META_OS_RELEASE] = "/etc/os-release\0" + "/usr/lib/os-release\0", + }; + + _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL; + _cleanup_close_pair_ int error_pipe[2] = { -1, -1 }; + _cleanup_(rmdir_and_freep) char *t = NULL; + _cleanup_(sigkill_waitp) pid_t child = 0; + sd_id128_t machine_id = SD_ID128_NULL; + _cleanup_free_ char *hostname = NULL; + unsigned n_meta_initialized = 0, k; + int fds[2 * _META_MAX], r, v; + ssize_t n; + + BLOCK_SIGNALS(SIGCHLD); + + assert(m); + + for (; n_meta_initialized < _META_MAX; n_meta_initialized ++) + if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) { + r = -errno; + goto finish; + } + + r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t); + if (r < 0) + goto finish; + + if (pipe2(error_pipe, O_CLOEXEC) < 0) { + r = -errno; + goto finish; + } + + r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child); + if (r < 0) + goto finish; + if (r == 0) { + error_pipe[0] = safe_close(error_pipe[0]); + + r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_VALIDATE_OS); + if (r < 0) { + /* Let parent know the error */ + (void) write(error_pipe[1], &r, sizeof(r)); + + log_debug_errno(r, "Failed to mount dissected image: %m"); + _exit(EXIT_FAILURE); + } + + for (k = 0; k < _META_MAX; k++) { + _cleanup_close_ int fd = -ENOENT; + const char *p; + + fds[2*k] = safe_close(fds[2*k]); + + NULSTR_FOREACH(p, paths[k]) { + fd = chase_symlinks_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL); + if (fd >= 0) + break; + } + if (fd < 0) { + log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]); + fds[2*k+1] = safe_close(fds[2*k+1]); + continue; + } + + r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0); + if (r < 0) { + (void) write(error_pipe[1], &r, sizeof(r)); + _exit(EXIT_FAILURE); + } + + fds[2*k+1] = safe_close(fds[2*k+1]); + } + + _exit(EXIT_SUCCESS); + } + + error_pipe[1] = safe_close(error_pipe[1]); + + for (k = 0; k < _META_MAX; k++) { + _cleanup_fclose_ FILE *f = NULL; + + fds[2*k+1] = safe_close(fds[2*k+1]); + + f = take_fdopen(&fds[2*k], "r"); + if (!f) { + r = -errno; + goto finish; + } + + switch (k) { + + case META_HOSTNAME: + r = read_etc_hostname_stream(f, &hostname); + if (r < 0) + log_debug_errno(r, "Failed to read /etc/hostname: %m"); + + break; + + case META_MACHINE_ID: { + _cleanup_free_ char *line = NULL; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + log_debug_errno(r, "Failed to read /etc/machine-id: %m"); + else if (r == 33) { + r = sd_id128_from_string(line, &machine_id); + if (r < 0) + log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line); + } else if (r == 0) + log_debug("/etc/machine-id file is empty."); + else if (streq(line, "uninitialized")) + log_debug("/etc/machine-id file is uninitialized (likely aborted first boot)."); + else + log_debug("/etc/machine-id has unexpected length %i.", r); + + break; + } + + case META_MACHINE_INFO: + r = load_env_file_pairs(f, "machine-info", &machine_info); + if (r < 0) + log_debug_errno(r, "Failed to read /etc/machine-info: %m"); + + break; + + case META_OS_RELEASE: + r = load_env_file_pairs(f, "os-release", &os_release); + if (r < 0) + log_debug_errno(r, "Failed to read OS release file: %m"); + + break; + } + } + + r = wait_for_terminate_and_check("(sd-dissect)", child, 0); + child = 0; + if (r < 0) + return r; + + n = read(error_pipe[0], &v, sizeof(v)); + if (n < 0) + return -errno; + if (n == sizeof(v)) + return v; /* propagate error sent to us from child */ + if (n != 0) + return -EIO; + + if (r != EXIT_SUCCESS) + return -EPROTO; + + free_and_replace(m->hostname, hostname); + m->machine_id = machine_id; + strv_free_and_replace(m->machine_info, machine_info); + strv_free_and_replace(m->os_release, os_release); + +finish: + for (k = 0; k < n_meta_initialized; k++) + safe_close_pair(fds + 2*k); + + return r; +} + +int dissect_image_and_warn( + int fd, + const char *name, + const VeritySettings *verity, + const MountOptions *mount_options, + DissectImageFlags flags, + DissectedImage **ret) { + + _cleanup_free_ char *buffer = NULL; + int r; + + if (!name) { + r = fd_get_path(fd, &buffer); + if (r < 0) + return r; + + name = buffer; + } + + r = dissect_image(fd, verity, mount_options, flags, ret); + switch (r) { + + case -EOPNOTSUPP: + return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support."); + + case -ENOPKG: + return log_error_errno(r, "Couldn't identify a suitable partition table or file system in '%s'.", name); + + case -EADDRNOTAVAIL: + return log_error_errno(r, "No root partition for specified root hash found in '%s'.", name); + + case -ENOTUNIQ: + return log_error_errno(r, "Multiple suitable root partitions found in image '%s'.", name); + + case -ENXIO: + return log_error_errno(r, "No suitable root partition found in image '%s'.", name); + + case -EPROTONOSUPPORT: + return log_error_errno(r, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name); + + default: + if (r < 0) + return log_error_errno(r, "Failed to dissect image '%s': %m", name); + + return r; + } +} + +bool dissected_image_can_do_verity(const DissectedImage *image, PartitionDesignator partition_designator) { + if (image->single_file_system) + return partition_designator == PARTITION_ROOT && image->can_verity; + + return PARTITION_VERITY_OF(partition_designator) >= 0; +} + +bool dissected_image_has_verity(const DissectedImage *image, PartitionDesignator partition_designator) { + int k; + + if (image->single_file_system) + return partition_designator == PARTITION_ROOT && image->verity; + + k = PARTITION_VERITY_OF(partition_designator); + return k >= 0 && image->partitions[k].found; +} + +MountOptions* mount_options_free_all(MountOptions *options) { + MountOptions *m; + + while ((m = options)) { + LIST_REMOVE(mount_options, options, m); + free(m->options); + free(m); + } + + return NULL; +} + +const char* mount_options_from_designator(const MountOptions *options, PartitionDesignator designator) { + const MountOptions *m; + + LIST_FOREACH(mount_options, m, options) + if (designator == m->partition_designator && !isempty(m->options)) + return m->options; + + return NULL; +} + +int mount_image_privately_interactively( + const char *image, + DissectImageFlags flags, + char **ret_directory, + LoopDevice **ret_loop_device, + DecryptedImage **ret_decrypted_image) { + + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; + _cleanup_(rmdir_and_freep) char *created_dir = NULL; + _cleanup_free_ char *temp = NULL; + int r; + + /* Mounts an OS image at a temporary place, inside a newly created mount namespace of our own. This + * is used by tools such as systemd-tmpfiles or systemd-firstboot to operate on some disk image + * easily. */ + + assert(image); + assert(ret_directory); + assert(ret_loop_device); + assert(ret_decrypted_image); + + r = tempfn_random_child(NULL, program_invocation_short_name, &temp); + if (r < 0) + return log_error_errno(r, "Failed to generate temporary mount directory: %m"); + + r = loop_device_make_by_path( + image, + FLAGS_SET(flags, DISSECT_IMAGE_READ_ONLY) ? O_RDONLY : O_RDWR, + FLAGS_SET(flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN, + &d); + if (r < 0) + return log_error_errno(r, "Failed to set up loopback device: %m"); + + r = dissect_image_and_warn(d->fd, image, NULL, NULL, flags, &dissected_image); + if (r < 0) + return r; + + r = dissected_image_decrypt_interactively(dissected_image, NULL, NULL, flags, &decrypted_image); + if (r < 0) + return r; + + r = detach_mount_namespace(); + if (r < 0) + return log_error_errno(r, "Failed to detach mount namespace: %m"); + + r = mkdir_p(temp, 0700); + if (r < 0) + return log_error_errno(r, "Failed to create mount point: %m"); + + created_dir = TAKE_PTR(temp); + + r = dissected_image_mount_and_warn(dissected_image, created_dir, UID_INVALID, flags); + if (r < 0) + return r; + + if (decrypted_image) { + r = decrypted_image_relinquish(decrypted_image); + if (r < 0) + return log_error_errno(r, "Failed to relinquish DM devices: %m"); + } + + loop_device_relinquish(d); + + *ret_directory = TAKE_PTR(created_dir); + *ret_loop_device = TAKE_PTR(d); + *ret_decrypted_image = TAKE_PTR(decrypted_image); + + return 0; +} + +static const char *const partition_designator_table[] = { + [PARTITION_ROOT] = "root", + [PARTITION_ROOT_SECONDARY] = "root-secondary", + [PARTITION_USR] = "usr", + [PARTITION_USR_SECONDARY] = "usr-secondary", + [PARTITION_HOME] = "home", + [PARTITION_SRV] = "srv", + [PARTITION_ESP] = "esp", + [PARTITION_XBOOTLDR] = "xbootldr", + [PARTITION_SWAP] = "swap", + [PARTITION_ROOT_VERITY] = "root-verity", + [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity", + [PARTITION_USR_VERITY] = "usr-verity", + [PARTITION_USR_SECONDARY_VERITY] = "usr-secondary-verity", + [PARTITION_TMP] = "tmp", + [PARTITION_VAR] = "var", +}; + +DEFINE_STRING_TABLE_LOOKUP(partition_designator, PartitionDesignator); diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h new file mode 100644 index 0000000..3b30e08 --- /dev/null +++ b/src/shared/dissect-image.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "sd-id128.h" + +#include "list.h" +#include "loop-util.h" +#include "macro.h" + +typedef struct DissectedImage DissectedImage; +typedef struct DissectedPartition DissectedPartition; +typedef struct DecryptedImage DecryptedImage; +typedef struct MountOptions MountOptions; +typedef struct VeritySettings VeritySettings; + +struct DissectedPartition { + bool found:1; + bool rw:1; + int partno; /* -1 if there was no partition and the images contains a file system directly */ + int architecture; /* Intended architecture: either native, secondary or unset (-1). */ + sd_id128_t uuid; /* Partition entry UUID as reported by the GPT */ + char *fstype; + char *node; + char *decrypted_node; + char *decrypted_fstype; + char *mount_options; +}; + +typedef enum PartitionDesignator { + PARTITION_ROOT, + PARTITION_ROOT_SECONDARY, /* Secondary architecture */ + PARTITION_USR, + PARTITION_USR_SECONDARY, + PARTITION_HOME, + PARTITION_SRV, + PARTITION_ESP, + PARTITION_XBOOTLDR, + PARTITION_SWAP, + PARTITION_ROOT_VERITY, /* verity data for the PARTITION_ROOT partition */ + PARTITION_ROOT_SECONDARY_VERITY, /* verity data for the PARTITION_ROOT_SECONDARY partition */ + PARTITION_USR_VERITY, + PARTITION_USR_SECONDARY_VERITY, + PARTITION_TMP, + PARTITION_VAR, + _PARTITION_DESIGNATOR_MAX, + _PARTITION_DESIGNATOR_INVALID = -1 +} PartitionDesignator; + +static inline PartitionDesignator PARTITION_VERITY_OF(PartitionDesignator p) { + switch (p) { + + case PARTITION_ROOT: + return PARTITION_ROOT_VERITY; + + case PARTITION_ROOT_SECONDARY: + return PARTITION_ROOT_SECONDARY_VERITY; + + case PARTITION_USR: + return PARTITION_USR_VERITY; + + case PARTITION_USR_SECONDARY: + return PARTITION_USR_SECONDARY_VERITY; + + default: + return _PARTITION_DESIGNATOR_INVALID; + } +} + +typedef enum DissectImageFlags { + DISSECT_IMAGE_READ_ONLY = 1 << 0, + DISSECT_IMAGE_DISCARD_ON_LOOP = 1 << 1, /* Turn on "discard" if on a loop device and file system supports it */ + DISSECT_IMAGE_DISCARD = 1 << 2, /* Turn on "discard" if file system supports it, on all block devices */ + DISSECT_IMAGE_DISCARD_ON_CRYPTO = 1 << 3, /* Turn on "discard" also on crypto devices */ + DISSECT_IMAGE_DISCARD_ANY = DISSECT_IMAGE_DISCARD_ON_LOOP | + DISSECT_IMAGE_DISCARD | + DISSECT_IMAGE_DISCARD_ON_CRYPTO, + DISSECT_IMAGE_GPT_ONLY = 1 << 4, /* Only recognize images with GPT partition tables */ + DISSECT_IMAGE_REQUIRE_ROOT = 1 << 5, /* Don't accept disks without root partition (and if no partition table or only single generic partition, assume it's root) */ + DISSECT_IMAGE_MOUNT_ROOT_ONLY = 1 << 6, /* Mount only the root and /usr partitions */ + DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY = 1 << 7, /* Mount only the non-root and non-/usr partitions */ + DISSECT_IMAGE_VALIDATE_OS = 1 << 8, /* Refuse mounting images that aren't identifiable as OS images */ + DISSECT_IMAGE_NO_UDEV = 1 << 9, /* Don't wait for udev initializing things */ + DISSECT_IMAGE_RELAX_VAR_CHECK = 1 << 10, /* Don't insist that the UUID of /var is hashed from /etc/machine-id */ + DISSECT_IMAGE_FSCK = 1 << 11, /* File system check the partition before mounting (no effect when combined with DISSECT_IMAGE_READ_ONLY) */ + DISSECT_IMAGE_NO_PARTITION_TABLE = 1 << 12, /* Only recognize single file system images */ + DISSECT_IMAGE_VERITY_SHARE = 1 << 13, /* When activating a verity device, reuse existing one if already open */ + DISSECT_IMAGE_MKDIR = 1 << 14, /* Make directory to mount right before mounting, if missing */ +} DissectImageFlags; + +struct DissectedImage { + bool encrypted:1; + bool verity:1; /* verity available and usable */ + bool can_verity:1; /* verity available, but not necessarily used */ + bool single_file_system:1; /* MBR/GPT or single file system */ + + DissectedPartition partitions[_PARTITION_DESIGNATOR_MAX]; + + char *hostname; + sd_id128_t machine_id; + char **machine_info; + char **os_release; +}; + +struct MountOptions { + PartitionDesignator partition_designator; + char *options; + LIST_FIELDS(MountOptions, mount_options); +}; + +struct VeritySettings { + /* Binary root hash for the Verity Merkle tree */ + void *root_hash; + size_t root_hash_size; + + /* PKCS#7 signature of the above */ + void *root_hash_sig; + size_t root_hash_sig_size; + + /* Path to the verity data file, if stored externally */ + char *data_path; + + /* PARTITION_ROOT or PARTITION_USR, depending on what these Verity settings are for */ + PartitionDesignator designator; +}; + +#define VERITY_SETTINGS_DEFAULT { \ + .designator = _PARTITION_DESIGNATOR_INVALID \ + } + +MountOptions* mount_options_free_all(MountOptions *options); +DEFINE_TRIVIAL_CLEANUP_FUNC(MountOptions*, mount_options_free_all); +const char* mount_options_from_designator(const MountOptions *options, PartitionDesignator designator); + +int probe_filesystem(const char *node, char **ret_fstype); +int dissect_image(int fd, const VeritySettings *verity, const MountOptions *mount_options, DissectImageFlags flags, DissectedImage **ret); +int dissect_image_and_warn(int fd, const char *name, const VeritySettings *verity, const MountOptions *mount_options, DissectImageFlags flags, DissectedImage **ret); + +DissectedImage* dissected_image_unref(DissectedImage *m); +DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref); + +int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags, DecryptedImage **ret); +int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags, DecryptedImage **ret); +int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, DissectImageFlags flags); +int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags); + +int dissected_image_acquire_metadata(DissectedImage *m); + +DecryptedImage* decrypted_image_unref(DecryptedImage *p); +DEFINE_TRIVIAL_CLEANUP_FUNC(DecryptedImage*, decrypted_image_unref); +int decrypted_image_relinquish(DecryptedImage *d); + +const char* partition_designator_to_string(PartitionDesignator d) _const_; +PartitionDesignator partition_designator_from_string(const char *name) _pure_; + +int verity_settings_load(VeritySettings *verity, const char *image, const char *root_hash_path, const char *root_hash_sig_path); +void verity_settings_done(VeritySettings *verity); + +bool dissected_image_can_do_verity(const DissectedImage *image, PartitionDesignator d); +bool dissected_image_has_verity(const DissectedImage *image, PartitionDesignator d); + +int mount_image_privately_interactively(const char *path, DissectImageFlags flags, char **ret_directory, LoopDevice **ret_loop_device, DecryptedImage **ret_decrypted_image); diff --git a/src/shared/dm-util.c b/src/shared/dm-util.c new file mode 100644 index 0000000..b48b9b5 --- /dev/null +++ b/src/shared/dm-util.c @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <linux/dm-ioctl.h> +#include <sys/ioctl.h> + +#include "dm-util.h" +#include "fd-util.h" +#include "string-util.h" + +int dm_deferred_remove_cancel(const char *name) { + _cleanup_close_ int fd = -1; + struct message { + struct dm_ioctl dm_ioctl; + struct dm_target_msg dm_target_msg; + char msg_text[STRLEN("@cancel_deferred_remove") + 1]; + } _packed_ message = { + .dm_ioctl = { + .version = { + DM_VERSION_MAJOR, + DM_VERSION_MINOR, + DM_VERSION_PATCHLEVEL + }, + .data_size = sizeof(struct message), + .data_start = sizeof(struct dm_ioctl), + }, + .msg_text = "@cancel_deferred_remove", + }; + + assert(name); + + if (strlen(name) >= sizeof(message.dm_ioctl.name)) + return -ENODEV; /* A device with a name longer than this cannot possibly exist */ + + strncpy_exact(message.dm_ioctl.name, name, sizeof(message.dm_ioctl.name)); + + fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC); + if (fd < 0) + return -errno; + + if (ioctl(fd, DM_TARGET_MSG, &message)) + return -errno; + + return 0; +} diff --git a/src/shared/dm-util.h b/src/shared/dm-util.h new file mode 100644 index 0000000..e6e3d7d --- /dev/null +++ b/src/shared/dm-util.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +int dm_deferred_remove_cancel(const char *name); diff --git a/src/shared/dns-domain.c b/src/shared/dns-domain.c new file mode 100644 index 0000000..ec42b29 --- /dev/null +++ b/src/shared/dns-domain.c @@ -0,0 +1,1414 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <endian.h> +#include <netinet/in.h> +#include <stdio.h> +#include <sys/socket.h> + +#include "alloc-util.h" +#include "dns-domain.h" +#include "hashmap.h" +#include "hexdecoct.h" +#include "hostname-util.h" +#include "idn-util.h" +#include "in-addr-util.h" +#include "macro.h" +#include "parse-util.h" +#include "string-util.h" +#include "strv.h" +#include "utf8.h" + +int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags) { + const char *n; + char *d, last_char = 0; + int r = 0; + + assert(name); + assert(*name); + + n = *name; + d = dest; + + for (;;) { + if (IN_SET(*n, 0, '.')) { + if (FLAGS_SET(flags, DNS_LABEL_LDH) && last_char == '-') + /* Trailing dash */ + return -EINVAL; + + if (n[0] == '.' && (n[1] != 0 || !FLAGS_SET(flags, DNS_LABEL_LEAVE_TRAILING_DOT))) + n++; + + break; + } + + if (r >= DNS_LABEL_MAX) + return -EINVAL; + + if (sz <= 0) + return -ENOBUFS; + + if (*n == '\\') { + /* Escaped character */ + if (FLAGS_SET(flags, DNS_LABEL_NO_ESCAPES)) + return -EINVAL; + + n++; + + if (*n == 0) + /* Ending NUL */ + return -EINVAL; + + else if (IN_SET(*n, '\\', '.')) { + /* Escaped backslash or dot */ + + if (FLAGS_SET(flags, DNS_LABEL_LDH)) + return -EINVAL; + + last_char = *n; + if (d) + *(d++) = *n; + sz--; + r++; + n++; + + } else if (n[0] >= '0' && n[0] <= '9') { + unsigned k; + + /* Escaped literal ASCII character */ + + if (!(n[1] >= '0' && n[1] <= '9') || + !(n[2] >= '0' && n[2] <= '9')) + return -EINVAL; + + k = ((unsigned) (n[0] - '0') * 100) + + ((unsigned) (n[1] - '0') * 10) + + ((unsigned) (n[2] - '0')); + + /* Don't allow anything that doesn't + * fit in 8bit. Note that we do allow + * control characters, as some servers + * (e.g. cloudflare) are happy to + * generate labels with them + * inside. */ + if (k > 255) + return -EINVAL; + + if (FLAGS_SET(flags, DNS_LABEL_LDH) && + !valid_ldh_char((char) k)) + return -EINVAL; + + last_char = (char) k; + if (d) + *(d++) = (char) k; + sz--; + r++; + + n += 3; + } else + return -EINVAL; + + } else if ((uint8_t) *n >= (uint8_t) ' ' && *n != 127) { + + /* Normal character */ + + if (FLAGS_SET(flags, DNS_LABEL_LDH)) { + if (!valid_ldh_char(*n)) + return -EINVAL; + if (r == 0 && *n == '-') + /* Leading dash */ + return -EINVAL; + } + + last_char = *n; + if (d) + *(d++) = *n; + sz--; + r++; + n++; + } else + return -EINVAL; + } + + /* Empty label that is not at the end? */ + if (r == 0 && *n) + return -EINVAL; + + /* More than one trailing dot? */ + if (n[0] == '.' && !FLAGS_SET(flags, DNS_LABEL_LEAVE_TRAILING_DOT)) + return -EINVAL; + + if (sz >= 1 && d) + *d = 0; + + *name = n; + return r; +} + +/* @label_terminal: terminal character of a label, updated to point to the terminal character of + * the previous label (always skipping one dot) or to NULL if there are no more + * labels. */ +int dns_label_unescape_suffix(const char *name, const char **label_terminal, char *dest, size_t sz) { + const char *terminal; + int r; + + assert(name); + assert(label_terminal); + assert(dest); + + /* no more labels */ + if (!*label_terminal) { + if (sz >= 1) + *dest = 0; + + return 0; + } + + terminal = *label_terminal; + assert(IN_SET(*terminal, 0, '.')); + + /* Skip current terminal character (and accept domain names ending it ".") */ + if (*terminal == 0) + terminal--; + if (terminal >= name && *terminal == '.') + terminal--; + + /* Point name to the last label, and terminal to the preceding terminal symbol (or make it a NULL pointer) */ + for (;;) { + if (terminal < name) { + /* Reached the first label, so indicate that there are no more */ + terminal = NULL; + break; + } + + /* Find the start of the last label */ + if (*terminal == '.') { + const char *y; + unsigned slashes = 0; + + for (y = terminal - 1; y >= name && *y == '\\'; y--) + slashes++; + + if (slashes % 2 == 0) { + /* The '.' was not escaped */ + name = terminal + 1; + break; + } else { + terminal = y; + continue; + } + } + + terminal--; + } + + r = dns_label_unescape(&name, dest, sz, 0); + if (r < 0) + return r; + + *label_terminal = terminal; + + return r; +} + +int dns_label_escape(const char *p, size_t l, char *dest, size_t sz) { + char *q; + + /* DNS labels must be between 1 and 63 characters long. A + * zero-length label does not exist. See RFC 2182, Section + * 11. */ + + if (l <= 0 || l > DNS_LABEL_MAX) + return -EINVAL; + if (sz < 1) + return -ENOBUFS; + + assert(p); + assert(dest); + + q = dest; + while (l > 0) { + + if (IN_SET(*p, '.', '\\')) { + + /* Dot or backslash */ + + if (sz < 3) + return -ENOBUFS; + + *(q++) = '\\'; + *(q++) = *p; + + sz -= 2; + + } else if (IN_SET(*p, '_', '-') || + (*p >= '0' && *p <= '9') || + (*p >= 'a' && *p <= 'z') || + (*p >= 'A' && *p <= 'Z')) { + + /* Proper character */ + + if (sz < 2) + return -ENOBUFS; + + *(q++) = *p; + sz -= 1; + + } else { + + /* Everything else */ + + if (sz < 5) + return -ENOBUFS; + + *(q++) = '\\'; + *(q++) = '0' + (char) ((uint8_t) *p / 100); + *(q++) = '0' + (char) (((uint8_t) *p / 10) % 10); + *(q++) = '0' + (char) ((uint8_t) *p % 10); + + sz -= 4; + } + + p++; + l--; + } + + *q = 0; + return (int) (q - dest); +} + +int dns_label_escape_new(const char *p, size_t l, char **ret) { + _cleanup_free_ char *s = NULL; + int r; + + assert(p); + assert(ret); + + if (l <= 0 || l > DNS_LABEL_MAX) + return -EINVAL; + + s = new(char, DNS_LABEL_ESCAPED_MAX); + if (!s) + return -ENOMEM; + + r = dns_label_escape(p, l, s, DNS_LABEL_ESCAPED_MAX); + if (r < 0) + return r; + + *ret = TAKE_PTR(s); + + return r; +} + +#if HAVE_LIBIDN +int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) { + _cleanup_free_ uint32_t *input = NULL; + size_t input_size, l; + const char *p; + bool contains_8bit = false; + char buffer[DNS_LABEL_MAX+1]; + int r; + + assert(encoded); + assert(decoded); + + /* Converts an U-label into an A-label */ + + r = dlopen_idn(); + if (r < 0) + return r; + + if (encoded_size <= 0) + return -EINVAL; + + for (p = encoded; p < encoded + encoded_size; p++) + if ((uint8_t) *p > 127) + contains_8bit = true; + + if (!contains_8bit) { + if (encoded_size > DNS_LABEL_MAX) + return -EINVAL; + + return 0; + } + + input = sym_stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size); + if (!input) + return -ENOMEM; + + if (sym_idna_to_ascii_4i(input, input_size, buffer, 0) != 0) + return -EINVAL; + + l = strlen(buffer); + + /* Verify that the result is not longer than one DNS label. */ + if (l <= 0 || l > DNS_LABEL_MAX) + return -EINVAL; + if (l > decoded_max) + return -ENOBUFS; + + memcpy(decoded, buffer, l); + + /* If there's room, append a trailing NUL byte, but only then */ + if (decoded_max > l) + decoded[l] = 0; + + return (int) l; +} + +int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) { + size_t input_size, output_size; + _cleanup_free_ uint32_t *input = NULL; + _cleanup_free_ char *result = NULL; + uint32_t *output = NULL; + size_t w; + int r; + + /* To be invoked after unescaping. Converts an A-label into an U-label. */ + + assert(encoded); + assert(decoded); + + r = dlopen_idn(); + if (r < 0) + return r; + + if (encoded_size <= 0 || encoded_size > DNS_LABEL_MAX) + return -EINVAL; + + if (!memory_startswith(encoded, encoded_size, IDNA_ACE_PREFIX)) + return 0; + + input = sym_stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size); + if (!input) + return -ENOMEM; + + output_size = input_size; + output = newa(uint32_t, output_size); + + sym_idna_to_unicode_44i(input, input_size, output, &output_size, 0); + + result = sym_stringprep_ucs4_to_utf8(output, output_size, NULL, &w); + if (!result) + return -ENOMEM; + if (w <= 0) + return -EINVAL; + if (w > decoded_max) + return -ENOBUFS; + + memcpy(decoded, result, w); + + /* Append trailing NUL byte if there's space, but only then. */ + if (decoded_max > w) + decoded[w] = 0; + + return w; +} +#endif + +int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **_ret) { + _cleanup_free_ char *ret = NULL; + size_t n = 0, allocated = 0; + const char *p; + bool first = true; + int r; + + if (a) + p = a; + else if (b) + p = TAKE_PTR(b); + else + goto finish; + + for (;;) { + char label[DNS_LABEL_MAX]; + + r = dns_label_unescape(&p, label, sizeof label, flags); + if (r < 0) + return r; + if (r == 0) { + if (*p != 0) + return -EINVAL; + + if (b) { + /* Now continue with the second string, if there is one */ + p = TAKE_PTR(b); + continue; + } + + break; + } + + if (_ret) { + if (!GREEDY_REALLOC(ret, allocated, n + !first + DNS_LABEL_ESCAPED_MAX)) + return -ENOMEM; + + r = dns_label_escape(label, r, ret + n + !first, DNS_LABEL_ESCAPED_MAX); + if (r < 0) + return r; + + if (!first) + ret[n] = '.'; + } else { + char escaped[DNS_LABEL_ESCAPED_MAX]; + + r = dns_label_escape(label, r, escaped, sizeof(escaped)); + if (r < 0) + return r; + } + + if (!first) + n++; + else + first = false; + + n += r; + } + +finish: + if (n > DNS_HOSTNAME_MAX) + return -EINVAL; + + if (_ret) { + if (n == 0) { + /* Nothing appended? If so, generate at least a single dot, to indicate the DNS root domain */ + if (!GREEDY_REALLOC(ret, allocated, 2)) + return -ENOMEM; + + ret[n++] = '.'; + } else { + if (!GREEDY_REALLOC(ret, allocated, n + 1)) + return -ENOMEM; + } + + ret[n] = 0; + *_ret = TAKE_PTR(ret); + } + + return 0; +} + +void dns_name_hash_func(const char *p, struct siphash *state) { + int r; + + assert(p); + + for (;;) { + char label[DNS_LABEL_MAX+1]; + + r = dns_label_unescape(&p, label, sizeof label, 0); + if (r < 0) + break; + if (r == 0) + break; + + ascii_strlower_n(label, r); + siphash24_compress(label, r, state); + siphash24_compress_byte(0, state); /* make sure foobar and foo.bar result in different hashes */ + } + + /* enforce that all names are terminated by the empty label */ + string_hash_func("", state); +} + +int dns_name_compare_func(const char *a, const char *b) { + const char *x, *y; + int r, q; + + assert(a); + assert(b); + + x = a + strlen(a); + y = b + strlen(b); + + for (;;) { + char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX]; + + if (x == NULL && y == NULL) + return 0; + + r = dns_label_unescape_suffix(a, &x, la, sizeof(la)); + q = dns_label_unescape_suffix(b, &y, lb, sizeof(lb)); + if (r < 0 || q < 0) + return CMP(r, q); + + r = ascii_strcasecmp_nn(la, r, lb, q); + if (r != 0) + return r; + } +} + +DEFINE_HASH_OPS(dns_name_hash_ops, char, dns_name_hash_func, dns_name_compare_func); + +int dns_name_equal(const char *x, const char *y) { + int r, q; + + assert(x); + assert(y); + + for (;;) { + char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX]; + + r = dns_label_unescape(&x, la, sizeof la, 0); + if (r < 0) + return r; + + q = dns_label_unescape(&y, lb, sizeof lb, 0); + if (q < 0) + return q; + + if (r != q) + return false; + if (r == 0) + return true; + + if (ascii_strcasecmp_n(la, lb, r) != 0) + return false; + } +} + +int dns_name_endswith(const char *name, const char *suffix) { + const char *n, *s, *saved_n = NULL; + int r, q; + + assert(name); + assert(suffix); + + n = name; + s = suffix; + + for (;;) { + char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX]; + + r = dns_label_unescape(&n, ln, sizeof ln, 0); + if (r < 0) + return r; + + if (!saved_n) + saved_n = n; + + q = dns_label_unescape(&s, ls, sizeof ls, 0); + if (q < 0) + return q; + + if (r == 0 && q == 0) + return true; + if (r == 0 && saved_n == n) + return false; + + if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) { + + /* Not the same, let's jump back, and try with the next label again */ + s = suffix; + n = TAKE_PTR(saved_n); + } + } +} + +int dns_name_startswith(const char *name, const char *prefix) { + const char *n, *p; + int r, q; + + assert(name); + assert(prefix); + + n = name; + p = prefix; + + for (;;) { + char ln[DNS_LABEL_MAX], lp[DNS_LABEL_MAX]; + + r = dns_label_unescape(&p, lp, sizeof lp, 0); + if (r < 0) + return r; + if (r == 0) + return true; + + q = dns_label_unescape(&n, ln, sizeof ln, 0); + if (q < 0) + return q; + + if (r != q) + return false; + if (ascii_strcasecmp_n(ln, lp, r) != 0) + return false; + } +} + +int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret) { + const char *n, *s, *saved_before = NULL, *saved_after = NULL, *prefix; + int r, q; + + assert(name); + assert(old_suffix); + assert(new_suffix); + assert(ret); + + n = name; + s = old_suffix; + + for (;;) { + char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX]; + + if (!saved_before) + saved_before = n; + + r = dns_label_unescape(&n, ln, sizeof ln, 0); + if (r < 0) + return r; + + if (!saved_after) + saved_after = n; + + q = dns_label_unescape(&s, ls, sizeof ls, 0); + if (q < 0) + return q; + + if (r == 0 && q == 0) + break; + if (r == 0 && saved_after == n) { + *ret = NULL; /* doesn't match */ + return 0; + } + + if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) { + + /* Not the same, let's jump back, and try with the next label again */ + s = old_suffix; + n = TAKE_PTR(saved_after); + saved_before = NULL; + } + } + + /* Found it! Now generate the new name */ + prefix = strndupa(name, saved_before - name); + + r = dns_name_concat(prefix, new_suffix, 0, ret); + if (r < 0) + return r; + + return 1; +} + +int dns_name_between(const char *a, const char *b, const char *c) { + /* Determine if b is strictly greater than a and strictly smaller than c. + We consider the order of names to be circular, so that if a is + strictly greater than c, we consider b to be between them if it is + either greater than a or smaller than c. This is how the canonical + DNS name order used in NSEC records work. */ + + if (dns_name_compare_func(a, c) < 0) + /* + a and c are properly ordered: + a<---b--->c + */ + return dns_name_compare_func(a, b) < 0 && + dns_name_compare_func(b, c) < 0; + else + /* + a and c are equal or 'reversed': + <--b--c a-----> + or: + <-----c a--b--> + */ + return dns_name_compare_func(b, c) < 0 || + dns_name_compare_func(a, b) < 0; +} + +int dns_name_reverse(int family, const union in_addr_union *a, char **ret) { + const uint8_t *p; + int r; + + assert(a); + assert(ret); + + p = (const uint8_t*) a; + + if (family == AF_INET) + r = asprintf(ret, "%u.%u.%u.%u.in-addr.arpa", p[3], p[2], p[1], p[0]); + else if (family == AF_INET6) + r = asprintf(ret, "%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.ip6.arpa", + hexchar(p[15] & 0xF), hexchar(p[15] >> 4), hexchar(p[14] & 0xF), hexchar(p[14] >> 4), + hexchar(p[13] & 0xF), hexchar(p[13] >> 4), hexchar(p[12] & 0xF), hexchar(p[12] >> 4), + hexchar(p[11] & 0xF), hexchar(p[11] >> 4), hexchar(p[10] & 0xF), hexchar(p[10] >> 4), + hexchar(p[ 9] & 0xF), hexchar(p[ 9] >> 4), hexchar(p[ 8] & 0xF), hexchar(p[ 8] >> 4), + hexchar(p[ 7] & 0xF), hexchar(p[ 7] >> 4), hexchar(p[ 6] & 0xF), hexchar(p[ 6] >> 4), + hexchar(p[ 5] & 0xF), hexchar(p[ 5] >> 4), hexchar(p[ 4] & 0xF), hexchar(p[ 4] >> 4), + hexchar(p[ 3] & 0xF), hexchar(p[ 3] >> 4), hexchar(p[ 2] & 0xF), hexchar(p[ 2] >> 4), + hexchar(p[ 1] & 0xF), hexchar(p[ 1] >> 4), hexchar(p[ 0] & 0xF), hexchar(p[ 0] >> 4)); + else + return -EAFNOSUPPORT; + if (r < 0) + return -ENOMEM; + + return 0; +} + +int dns_name_address(const char *p, int *family, union in_addr_union *address) { + int r; + + assert(p); + assert(family); + assert(address); + + r = dns_name_endswith(p, "in-addr.arpa"); + if (r < 0) + return r; + if (r > 0) { + uint8_t a[4]; + unsigned i; + + for (i = 0; i < ELEMENTSOF(a); i++) { + char label[DNS_LABEL_MAX+1]; + + r = dns_label_unescape(&p, label, sizeof label, 0); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + if (r > 3) + return -EINVAL; + + r = safe_atou8(label, &a[i]); + if (r < 0) + return r; + } + + r = dns_name_equal(p, "in-addr.arpa"); + if (r <= 0) + return r; + + *family = AF_INET; + address->in.s_addr = htobe32(((uint32_t) a[3] << 24) | + ((uint32_t) a[2] << 16) | + ((uint32_t) a[1] << 8) | + (uint32_t) a[0]); + + return 1; + } + + r = dns_name_endswith(p, "ip6.arpa"); + if (r < 0) + return r; + if (r > 0) { + struct in6_addr a; + unsigned i; + + for (i = 0; i < ELEMENTSOF(a.s6_addr); i++) { + char label[DNS_LABEL_MAX+1]; + int x, y; + + r = dns_label_unescape(&p, label, sizeof label, 0); + if (r <= 0) + return r; + if (r != 1) + return -EINVAL; + x = unhexchar(label[0]); + if (x < 0) + return -EINVAL; + + r = dns_label_unescape(&p, label, sizeof label, 0); + if (r <= 0) + return r; + if (r != 1) + return -EINVAL; + y = unhexchar(label[0]); + if (y < 0) + return -EINVAL; + + a.s6_addr[ELEMENTSOF(a.s6_addr) - i - 1] = (uint8_t) y << 4 | (uint8_t) x; + } + + r = dns_name_equal(p, "ip6.arpa"); + if (r <= 0) + return r; + + *family = AF_INET6; + address->in6 = a; + return 1; + } + + return 0; +} + +bool dns_name_is_root(const char *name) { + + assert(name); + + /* There are exactly two ways to encode the root domain name: + * as empty string, or with a single dot. */ + + return STR_IN_SET(name, "", "."); +} + +bool dns_name_is_single_label(const char *name) { + int r; + + assert(name); + + r = dns_name_parent(&name); + if (r <= 0) + return false; + + return dns_name_is_root(name); +} + +/* Encode a domain name according to RFC 1035 Section 3.1, without compression */ +int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical) { + uint8_t *label_length, *out; + int r; + + assert(domain); + assert(buffer); + + out = buffer; + + do { + /* Reserve a byte for label length */ + if (len <= 0) + return -ENOBUFS; + len--; + label_length = out; + out++; + + /* Convert and copy a single label. Note that + * dns_label_unescape() returns 0 when it hits the end + * of the domain name, which we rely on here to encode + * the trailing NUL byte. */ + r = dns_label_unescape(&domain, (char *) out, len, 0); + if (r < 0) + return r; + + /* Optionally, output the name in DNSSEC canonical + * format, as described in RFC 4034, section 6.2. Or + * in other words: in lower-case. */ + if (canonical) + ascii_strlower_n((char*) out, (size_t) r); + + /* Fill label length, move forward */ + *label_length = r; + out += r; + len -= r; + + } while (r != 0); + + /* Verify the maximum size of the encoded name. The trailing + * dot + NUL byte account are included this time, hence + * compare against DNS_HOSTNAME_MAX + 2 (which is 255) this + * time. */ + if (out - buffer > DNS_HOSTNAME_MAX + 2) + return -EINVAL; + + return out - buffer; +} + +static bool srv_type_label_is_valid(const char *label, size_t n) { + size_t k; + + assert(label); + + if (n < 2) /* Label needs to be at least 2 chars long */ + return false; + + if (label[0] != '_') /* First label char needs to be underscore */ + return false; + + /* Second char must be a letter */ + if (!(label[1] >= 'A' && label[1] <= 'Z') && + !(label[1] >= 'a' && label[1] <= 'z')) + return false; + + /* Third and further chars must be alphanumeric or a hyphen */ + for (k = 2; k < n; k++) { + if (!(label[k] >= 'A' && label[k] <= 'Z') && + !(label[k] >= 'a' && label[k] <= 'z') && + !(label[k] >= '0' && label[k] <= '9') && + label[k] != '-') + return false; + } + + return true; +} + +bool dns_srv_type_is_valid(const char *name) { + unsigned c = 0; + int r; + + if (!name) + return false; + + for (;;) { + char label[DNS_LABEL_MAX]; + + /* This more or less implements RFC 6335, Section 5.1 */ + + r = dns_label_unescape(&name, label, sizeof label, 0); + if (r < 0) + return false; + if (r == 0) + break; + + if (c >= 2) + return false; + + if (!srv_type_label_is_valid(label, r)) + return false; + + c++; + } + + return c == 2; /* exactly two labels */ +} + +bool dnssd_srv_type_is_valid(const char *name) { + return dns_srv_type_is_valid(name) && + ((dns_name_endswith(name, "_tcp") > 0) || + (dns_name_endswith(name, "_udp") > 0)); /* Specific to DNS-SD. RFC 6763, Section 7 */ +} + +bool dns_service_name_is_valid(const char *name) { + size_t l; + + /* This more or less implements RFC 6763, Section 4.1.1 */ + + if (!name) + return false; + + if (!utf8_is_valid(name)) + return false; + + if (string_has_cc(name, NULL)) + return false; + + l = strlen(name); + if (l <= 0) + return false; + if (l > 63) + return false; + + return true; +} + +int dns_service_join(const char *name, const char *type, const char *domain, char **ret) { + char escaped[DNS_LABEL_ESCAPED_MAX]; + _cleanup_free_ char *n = NULL; + int r; + + assert(type); + assert(domain); + assert(ret); + + if (!dns_srv_type_is_valid(type)) + return -EINVAL; + + if (!name) + return dns_name_concat(type, domain, 0, ret); + + if (!dns_service_name_is_valid(name)) + return -EINVAL; + + r = dns_label_escape(name, strlen(name), escaped, sizeof(escaped)); + if (r < 0) + return r; + + r = dns_name_concat(type, domain, 0, &n); + if (r < 0) + return r; + + return dns_name_concat(escaped, n, 0, ret); +} + +static bool dns_service_name_label_is_valid(const char *label, size_t n) { + char *s; + + assert(label); + + if (memchr(label, 0, n)) + return false; + + s = strndupa(label, n); + return dns_service_name_is_valid(s); +} + +int dns_service_split(const char *joined, char **_name, char **_type, char **_domain) { + _cleanup_free_ char *name = NULL, *type = NULL, *domain = NULL; + const char *p = joined, *q = NULL, *d = NULL; + char a[DNS_LABEL_MAX], b[DNS_LABEL_MAX], c[DNS_LABEL_MAX]; + int an, bn, cn, r; + unsigned x = 0; + + assert(joined); + + /* Get first label from the full name */ + an = dns_label_unescape(&p, a, sizeof(a), 0); + if (an < 0) + return an; + + if (an > 0) { + x++; + + /* If there was a first label, try to get the second one */ + bn = dns_label_unescape(&p, b, sizeof(b), 0); + if (bn < 0) + return bn; + + if (bn > 0) { + x++; + + /* If there was a second label, try to get the third one */ + q = p; + cn = dns_label_unescape(&p, c, sizeof(c), 0); + if (cn < 0) + return cn; + + if (cn > 0) + x++; + } else + cn = 0; + } else + an = 0; + + if (x >= 2 && srv_type_label_is_valid(b, bn)) { + + if (x >= 3 && srv_type_label_is_valid(c, cn)) { + + if (dns_service_name_label_is_valid(a, an)) { + /* OK, got <name> . <type> . <type2> . <domain> */ + + name = strndup(a, an); + if (!name) + return -ENOMEM; + + type = strjoin(b, ".", c); + if (!type) + return -ENOMEM; + + d = p; + goto finish; + } + + } else if (srv_type_label_is_valid(a, an)) { + + /* OK, got <type> . <type2> . <domain> */ + + name = NULL; + + type = strjoin(a, ".", b); + if (!type) + return -ENOMEM; + + d = q; + goto finish; + } + } + + name = NULL; + type = NULL; + d = joined; + +finish: + r = dns_name_normalize(d, 0, &domain); + if (r < 0) + return r; + + if (_domain) + *_domain = TAKE_PTR(domain); + + if (_type) + *_type = TAKE_PTR(type); + + if (_name) + *_name = TAKE_PTR(name); + + return 0; +} + +static int dns_name_build_suffix_table(const char *name, const char *table[]) { + const char *p; + unsigned n = 0; + int r; + + assert(name); + assert(table); + + p = name; + for (;;) { + if (n > DNS_N_LABELS_MAX) + return -EINVAL; + + table[n] = p; + r = dns_name_parent(&p); + if (r < 0) + return r; + if (r == 0) + break; + + n++; + } + + return (int) n; +} + +int dns_name_suffix(const char *name, unsigned n_labels, const char **ret) { + const char* labels[DNS_N_LABELS_MAX+1]; + int n; + + assert(name); + assert(ret); + + n = dns_name_build_suffix_table(name, labels); + if (n < 0) + return n; + + if ((unsigned) n < n_labels) + return -EINVAL; + + *ret = labels[n - n_labels]; + return (int) (n - n_labels); +} + +int dns_name_skip(const char *a, unsigned n_labels, const char **ret) { + int r; + + assert(a); + assert(ret); + + for (; n_labels > 0; n_labels--) { + r = dns_name_parent(&a); + if (r < 0) + return r; + if (r == 0) { + *ret = ""; + return 0; + } + } + + *ret = a; + return 1; +} + +int dns_name_count_labels(const char *name) { + unsigned n = 0; + const char *p; + int r; + + assert(name); + + p = name; + for (;;) { + r = dns_name_parent(&p); + if (r < 0) + return r; + if (r == 0) + break; + + if (n >= DNS_N_LABELS_MAX) + return -EINVAL; + + n++; + } + + return (int) n; +} + +int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b) { + int r; + + assert(a); + assert(b); + + r = dns_name_skip(a, n_labels, &a); + if (r <= 0) + return r; + + return dns_name_equal(a, b); +} + +int dns_name_common_suffix(const char *a, const char *b, const char **ret) { + const char *a_labels[DNS_N_LABELS_MAX+1], *b_labels[DNS_N_LABELS_MAX+1]; + int n = 0, m = 0, k = 0, r, q; + + assert(a); + assert(b); + assert(ret); + + /* Determines the common suffix of domain names a and b */ + + n = dns_name_build_suffix_table(a, a_labels); + if (n < 0) + return n; + + m = dns_name_build_suffix_table(b, b_labels); + if (m < 0) + return m; + + for (;;) { + char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX]; + const char *x, *y; + + if (k >= n || k >= m) { + *ret = a_labels[n - k]; + return 0; + } + + x = a_labels[n - 1 - k]; + r = dns_label_unescape(&x, la, sizeof la, 0); + if (r < 0) + return r; + + y = b_labels[m - 1 - k]; + q = dns_label_unescape(&y, lb, sizeof lb, 0); + if (q < 0) + return q; + + if (r != q || ascii_strcasecmp_n(la, lb, r) != 0) { + *ret = a_labels[n - k]; + return 0; + } + + k++; + } +} + +int dns_name_apply_idna(const char *name, char **ret) { + + /* Return negative on error, 0 if not implemented, positive on success. */ + +#if HAVE_LIBIDN2 || HAVE_LIBIDN2 + int r; + + r = dlopen_idn(); + if (r == -EOPNOTSUPP) { + *ret = NULL; + return 0; + } + if (r < 0) + return r; +#endif + +#if HAVE_LIBIDN2 + _cleanup_free_ char *t = NULL; + + assert(name); + assert(ret); + + /* First, try non-transitional mode (i.e. IDN2008 rules) */ + r = sym_idn2_lookup_u8((uint8_t*) name, (uint8_t**) &t, + IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL); + if (r == IDN2_DISALLOWED) /* If that failed, because of disallowed characters, try transitional mode. + * (i.e. IDN2003 rules which supports some unicode chars IDN2008 doesn't allow). */ + r = sym_idn2_lookup_u8((uint8_t*) name, (uint8_t**) &t, + IDN2_NFC_INPUT | IDN2_TRANSITIONAL); + + log_debug("idn2_lookup_u8: %s → %s", name, t); + if (r == IDN2_OK) { + if (!startswith(name, "xn--")) { + _cleanup_free_ char *s = NULL; + + r = sym_idn2_to_unicode_8z8z(t, &s, 0); + if (r != IDN2_OK) { + log_debug("idn2_to_unicode_8z8z(\"%s\") failed: %d/%s", + t, r, sym_idn2_strerror(r)); + return 0; + } + + if (!streq_ptr(name, s)) { + log_debug("idn2 roundtrip failed: \"%s\" → \"%s\" → \"%s\", ignoring.", + name, t, s); + return 0; + } + } + + *ret = TAKE_PTR(t); + + return 1; /* *ret has been written */ + } + + log_debug("idn2_lookup_u8(\"%s\") failed: %d/%s", name, r, sym_idn2_strerror(r)); + if (r == IDN2_2HYPHEN) + /* The name has two hyphens — forbidden by IDNA2008 in some cases */ + return 0; + if (IN_SET(r, IDN2_TOO_BIG_DOMAIN, IDN2_TOO_BIG_LABEL)) + return -ENOSPC; + return -EINVAL; +#elif HAVE_LIBIDN + _cleanup_free_ char *buf = NULL; + size_t n = 0, allocated = 0; + bool first = true; + int r, q; + + assert(name); + assert(ret); + + for (;;) { + char label[DNS_LABEL_MAX]; + + r = dns_label_unescape(&name, label, sizeof label, 0); + if (r < 0) + return r; + if (r == 0) + break; + + q = dns_label_apply_idna(label, r, label, sizeof label); + if (q < 0) + return q; + if (q > 0) + r = q; + + if (!GREEDY_REALLOC(buf, allocated, n + !first + DNS_LABEL_ESCAPED_MAX)) + return -ENOMEM; + + r = dns_label_escape(label, r, buf + n + !first, DNS_LABEL_ESCAPED_MAX); + if (r < 0) + return r; + + if (first) + first = false; + else + buf[n++] = '.'; + + n += r; + } + + if (n > DNS_HOSTNAME_MAX) + return -EINVAL; + + if (!GREEDY_REALLOC(buf, allocated, n + 1)) + return -ENOMEM; + + buf[n] = 0; + *ret = TAKE_PTR(buf); + + return 1; +#else + *ret = NULL; + return 0; +#endif +} + +int dns_name_is_valid_or_address(const char *name) { + /* Returns > 0 if the specified name is either a valid IP address formatted as string or a valid DNS name */ + + if (isempty(name)) + return 0; + + if (in_addr_from_string_auto(name, NULL, NULL) >= 0) + return 1; + + return dns_name_is_valid(name); +} + +int dns_name_dot_suffixed(const char *name) { + const char *p = name; + int r; + + for (;;) { + if (streq(p, ".")) + return true; + + r = dns_label_unescape(&p, NULL, DNS_LABEL_MAX, DNS_LABEL_LEAVE_TRAILING_DOT); + if (r < 0) + return r; + if (r == 0) + return false; + } +} diff --git a/src/shared/dns-domain.h b/src/shared/dns-domain.h new file mode 100644 index 0000000..77f5962 --- /dev/null +++ b/src/shared/dns-domain.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include "hashmap.h" +#include "in-addr-util.h" + +/* Length of a single label, with all escaping removed, excluding any trailing dot or NUL byte */ +#define DNS_LABEL_MAX 63 + +/* Worst case length of a single label, with all escaping applied and room for a trailing NUL byte. */ +#define DNS_LABEL_ESCAPED_MAX (DNS_LABEL_MAX*4+1) + +/* Maximum length of a full hostname, consisting of a series of unescaped labels, and no trailing dot or NUL byte */ +#define DNS_HOSTNAME_MAX 253 + +/* Maximum length of a full hostname, on the wire, including the final NUL byte */ +#define DNS_WIRE_FORMAT_HOSTNAME_MAX 255 + +/* Maximum number of labels per valid hostname */ +#define DNS_N_LABELS_MAX 127 + +typedef enum DNSLabelFlags { + DNS_LABEL_LDH = 1 << 0, /* Follow the "LDH" rule — only letters, digits, and internal hyphens. */ + DNS_LABEL_NO_ESCAPES = 1 << 1, /* Do not treat backslashes specially */ + DNS_LABEL_LEAVE_TRAILING_DOT = 1 << 2, /* Leave trailing dot in place */ +} DNSLabelFlags; + +int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags); +int dns_label_unescape_suffix(const char *name, const char **label_end, char *dest, size_t sz); +int dns_label_escape(const char *p, size_t l, char *dest, size_t sz); +int dns_label_escape_new(const char *p, size_t l, char **ret); + +static inline int dns_name_parent(const char **name) { + return dns_label_unescape(name, NULL, DNS_LABEL_MAX, 0); +} + +#if HAVE_LIBIDN +int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max); +int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max); +#endif + +int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **ret); + +static inline int dns_name_normalize(const char *s, DNSLabelFlags flags, char **ret) { + /* dns_name_concat() normalizes as a side-effect */ + return dns_name_concat(s, NULL, flags, ret); +} + +static inline int dns_name_is_valid(const char *s) { + int r; + + /* dns_name_normalize() verifies as a side effect */ + r = dns_name_normalize(s, 0, NULL); + if (r == -EINVAL) + return 0; + if (r < 0) + return r; + return 1; +} + +static inline int dns_name_is_valid_ldh(const char *s) { + int r; + + r = dns_name_concat(s, NULL, DNS_LABEL_LDH|DNS_LABEL_NO_ESCAPES, NULL); + if (r == -EINVAL) + return 0; + if (r < 0) + return r; + return 1; +} + +void dns_name_hash_func(const char *s, struct siphash *state); +int dns_name_compare_func(const char *a, const char *b); +extern const struct hash_ops dns_name_hash_ops; + +int dns_name_between(const char *a, const char *b, const char *c); +int dns_name_equal(const char *x, const char *y); +int dns_name_endswith(const char *name, const char *suffix); +int dns_name_startswith(const char *name, const char *prefix); + +int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret); + +int dns_name_reverse(int family, const union in_addr_union *a, char **ret); +int dns_name_address(const char *p, int *family, union in_addr_union *a); + +bool dns_name_is_root(const char *name); +bool dns_name_is_single_label(const char *name); + +int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical); + +bool dns_srv_type_is_valid(const char *name); +bool dnssd_srv_type_is_valid(const char *name); +bool dns_service_name_is_valid(const char *name); + +int dns_service_join(const char *name, const char *type, const char *domain, char **ret); +int dns_service_split(const char *joined, char **name, char **type, char **domain); + +int dns_name_suffix(const char *name, unsigned n_labels, const char **ret); +int dns_name_count_labels(const char *name); + +int dns_name_skip(const char *a, unsigned n_labels, const char **ret); +int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b); + +int dns_name_common_suffix(const char *a, const char *b, const char **ret); + +int dns_name_apply_idna(const char *name, char **ret); + +int dns_name_is_valid_or_address(const char *name); + +int dns_name_dot_suffixed(const char *name); diff --git a/src/shared/dropin.c b/src/shared/dropin.c new file mode 100644 index 0000000..89f4b8a --- /dev/null +++ b/src/shared/dropin.c @@ -0,0 +1,279 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +#include "alloc-util.h" +#include "conf-files.h" +#include "dirent-util.h" +#include "dropin.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio-label.h" +#include "fs-util.h" +#include "hashmap.h" +#include "log.h" +#include "macro.h" +#include "mkdir.h" +#include "path-util.h" +#include "set.h" +#include "string-util.h" +#include "strv.h" +#include "unit-name.h" + +int drop_in_file(const char *dir, const char *unit, unsigned level, + const char *name, char **ret_p, char **ret_q) { + + char prefix[DECIMAL_STR_MAX(unsigned)]; + _cleanup_free_ char *b = NULL, *p = NULL, *q = NULL; + + assert(unit); + assert(name); + assert(ret_p); + assert(ret_q); + + sprintf(prefix, "%u", level); + + b = xescape(name, "/."); + if (!b) + return -ENOMEM; + + if (!filename_is_valid(b)) + return -EINVAL; + + p = strjoin(dir, "/", unit, ".d"); + q = strjoin(p, "/", prefix, "-", b, ".conf"); + if (!p || !q) + return -ENOMEM; + + *ret_p = TAKE_PTR(p); + *ret_q = TAKE_PTR(q); + return 0; +} + +int write_drop_in(const char *dir, const char *unit, unsigned level, + const char *name, const char *data) { + + _cleanup_free_ char *p = NULL, *q = NULL; + int r; + + assert(dir); + assert(unit); + assert(name); + assert(data); + + r = drop_in_file(dir, unit, level, name, &p, &q); + if (r < 0) + return r; + + (void) mkdir_p(p, 0755); + return write_string_file_atomic_label(q, data); +} + +int write_drop_in_format(const char *dir, const char *unit, unsigned level, + const char *name, const char *format, ...) { + _cleanup_free_ char *p = NULL; + va_list ap; + int r; + + assert(dir); + assert(unit); + assert(name); + assert(format); + + va_start(ap, format); + r = vasprintf(&p, format, ap); + va_end(ap); + + if (r < 0) + return -ENOMEM; + + return write_drop_in(dir, unit, level, name, p); +} + +static int unit_file_add_dir( + const char *original_root, + const char *path, + char ***dirs) { + + _cleanup_free_ char *chased = NULL; + int r; + + assert(path); + + /* This adds [original_root]/path to dirs, if it exists. */ + + r = chase_symlinks(path, original_root, 0, &chased, NULL); + if (r == -ENOENT) /* Ignore -ENOENT, after all most units won't have a drop-in dir. */ + return 0; + if (r == -ENAMETOOLONG) { + /* Also, ignore -ENAMETOOLONG but log about it. After all, users are not even able to create the + * drop-in dir in such case. This mostly happens for device units with an overly long /sys path. */ + log_debug_errno(r, "Path '%s' too long, couldn't canonicalize, ignoring.", path); + return 0; + } + if (r < 0) + return log_warning_errno(r, "Failed to canonicalize path '%s': %m", path); + + if (strv_consume(dirs, TAKE_PTR(chased)) < 0) + return log_oom(); + + return 0; +} + +static int unit_file_find_dirs( + const char *original_root, + Set *unit_path_cache, + const char *unit_path, + const char *name, + const char *suffix, + char ***dirs) { + + _cleanup_free_ char *prefix = NULL, *instance = NULL, *built = NULL; + bool is_instance, chopped; + const char *dash; + UnitType type; + char *path; + size_t n; + int r; + + assert(unit_path); + assert(name); + assert(suffix); + + path = strjoina(unit_path, "/", name, suffix); + if (!unit_path_cache || set_get(unit_path_cache, path)) { + r = unit_file_add_dir(original_root, path, dirs); + if (r < 0) + return r; + } + + is_instance = unit_name_is_valid(name, UNIT_NAME_INSTANCE); + if (is_instance) { /* Also try the template dir */ + _cleanup_free_ char *template = NULL; + + r = unit_name_template(name, &template); + if (r < 0) + return log_error_errno(r, "Failed to generate template from unit name: %m"); + + r = unit_file_find_dirs(original_root, unit_path_cache, unit_path, template, suffix, dirs); + if (r < 0) + return r; + } + + /* Return early for top level drop-ins. */ + if (unit_type_from_string(name) >= 0) + return 0; + + /* Let's see if there's a "-" prefix for this unit name. If so, let's invoke ourselves for it. This will then + * recursively do the same for all our prefixes. i.e. this means given "foo-bar-waldo.service" we'll also + * search "foo-bar-.service" and "foo-.service". + * + * Note the order in which we do it: we traverse up adding drop-ins on each step. This means the more specific + * drop-ins may override the more generic drop-ins, which is the intended behaviour. */ + + r = unit_name_to_prefix(name, &prefix); + if (r < 0) + return log_error_errno(r, "Failed to derive unit name prefix from unit name: %m"); + + chopped = false; + for (;;) { + dash = strrchr(prefix, '-'); + if (!dash) /* No dash? if so we are done */ + return 0; + + n = (size_t) (dash - prefix); + if (n == 0) /* Leading dash? If so, we are done */ + return 0; + + if (prefix[n+1] != 0 || chopped) { + prefix[n+1] = 0; + break; + } + + /* Trailing dash? If so, chop it off and try again, but not more than once. */ + prefix[n] = 0; + chopped = true; + } + + if (!unit_prefix_is_valid(prefix)) + return 0; + + type = unit_name_to_type(name); + if (type < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to derive unit type from unit name: %s", + name); + + if (is_instance) { + r = unit_name_to_instance(name, &instance); + if (r < 0) + return log_error_errno(r, "Failed to derive unit name instance from unit name: %m"); + } + + r = unit_name_build_from_type(prefix, instance, type, &built); + if (r < 0) + return log_error_errno(r, "Failed to build prefix unit name: %m"); + + return unit_file_find_dirs(original_root, unit_path_cache, unit_path, built, suffix, dirs); +} + +int unit_file_find_dropin_paths( + const char *original_root, + char **lookup_path, + Set *unit_path_cache, + const char *dir_suffix, + const char *file_suffix, + const char *name, + const Set *aliases, + char ***ret) { + + _cleanup_strv_free_ char **dirs = NULL; + const char *n; + char **p; + int r; + + assert(ret); + + if (name) + STRV_FOREACH(p, lookup_path) + (void) unit_file_find_dirs(original_root, unit_path_cache, *p, name, dir_suffix, &dirs); + + SET_FOREACH(n, aliases) + STRV_FOREACH(p, lookup_path) + (void) unit_file_find_dirs(original_root, unit_path_cache, *p, n, dir_suffix, &dirs); + + /* All the names in the unit are of the same type so just grab one. */ + n = name ?: (const char*) set_first(aliases); + if (n) { + UnitType type = _UNIT_TYPE_INVALID; + + type = unit_name_to_type(n); + if (type < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to derive unit type from unit name: %s", n); + + /* Special top level drop in for "<unit type>.<suffix>". Add this last as it's the most generic + * and should be able to be overridden by more specific drop-ins. */ + STRV_FOREACH(p, lookup_path) + (void) unit_file_find_dirs(original_root, + unit_path_cache, + *p, + unit_type_to_string(type), + dir_suffix, + &dirs); + } + + if (strv_isempty(dirs)) { + *ret = NULL; + return 0; + } + + r = conf_files_list_strv(ret, file_suffix, NULL, 0, (const char**) dirs); + if (r < 0) + return log_warning_errno(r, "Failed to create the list of configuration files: %m"); + + return 1; +} diff --git a/src/shared/dropin.h b/src/shared/dropin.h new file mode 100644 index 0000000..54cceaf --- /dev/null +++ b/src/shared/dropin.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "hashmap.h" +#include "macro.h" +#include "set.h" +#include "unit-name.h" + +int drop_in_file(const char *dir, const char *unit, unsigned level, + const char *name, char **_p, char **_q); + +int write_drop_in(const char *dir, const char *unit, unsigned level, + const char *name, const char *data); + +int write_drop_in_format(const char *dir, const char *unit, unsigned level, + const char *name, const char *format, ...) _printf_(5, 6); + +int unit_file_find_dropin_paths( + const char *original_root, + char **lookup_path, + Set *unit_path_cache, + const char *dir_suffix, + const char *file_suffix, + const char *name, + const Set *aliases, + char ***paths); diff --git a/src/shared/efi-loader.c b/src/shared/efi-loader.c new file mode 100644 index 0000000..20f70da --- /dev/null +++ b/src/shared/efi-loader.c @@ -0,0 +1,806 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdlib.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "dirent-util.h" +#include "efi-loader.h" +#include "efivars.h" +#include "fd-util.h" +#include "io-util.h" +#include "parse-util.h" +#include "sort-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "utf8.h" +#include "virt.h" + +#if ENABLE_EFI + +#define LOAD_OPTION_ACTIVE 0x00000001 +#define MEDIA_DEVICE_PATH 0x04 +#define MEDIA_HARDDRIVE_DP 0x01 +#define MEDIA_FILEPATH_DP 0x04 +#define SIGNATURE_TYPE_GUID 0x02 +#define MBR_TYPE_EFI_PARTITION_TABLE_HEADER 0x02 +#define END_DEVICE_PATH_TYPE 0x7f +#define END_ENTIRE_DEVICE_PATH_SUBTYPE 0xff +#define EFI_OS_INDICATIONS_BOOT_TO_FW_UI 0x0000000000000001 + +#define boot_option__contents \ + { \ + uint32_t attr; \ + uint16_t path_len; \ + uint16_t title[]; \ + } + +struct boot_option boot_option__contents; +struct boot_option__packed boot_option__contents _packed_; +assert_cc(offsetof(struct boot_option, title) == offsetof(struct boot_option__packed, title)); +/* sizeof(struct boot_option) != sizeof(struct boot_option__packed), so + * the *size* of the structure should not be used anywhere below. */ + +struct drive_path { + uint32_t part_nr; + uint64_t part_start; + uint64_t part_size; + char signature[16]; + uint8_t mbr_type; + uint8_t signature_type; +} _packed_; + +#define device_path__contents \ + { \ + uint8_t type; \ + uint8_t sub_type; \ + uint16_t length; \ + union { \ + uint16_t path[0]; \ + struct drive_path drive; \ + }; \ + } + +struct device_path device_path__contents; +struct device_path__packed device_path__contents _packed_; +assert_cc(sizeof(struct device_path) == sizeof(struct device_path__packed)); + +int efi_reboot_to_firmware_supported(void) { + _cleanup_free_ void *v = NULL; + static int cache = -1; + uint64_t b; + size_t s; + int r; + + if (cache > 0) + return 0; + if (cache == 0) + return -EOPNOTSUPP; + + if (!is_efi_boot()) + goto not_supported; + + r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndicationsSupported", NULL, &v, &s); + if (r == -ENOENT) + goto not_supported; /* variable doesn't exist? it's not supported then */ + if (r < 0) + return r; + if (s != sizeof(uint64_t)) + return -EINVAL; + + b = *(uint64_t*) v; + if (!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI)) + goto not_supported; /* bit unset? it's not supported then */ + + cache = 1; + return 0; + +not_supported: + cache = 0; + return -EOPNOTSUPP; +} + +static int get_os_indications(uint64_t *ret) { + static struct stat cache_stat = {}; + _cleanup_free_ void *v = NULL; + _cleanup_free_ char *fn = NULL; + static uint64_t cache; + struct stat new_stat; + size_t s; + int r; + + assert(ret); + + /* Let's verify general support first */ + r = efi_reboot_to_firmware_supported(); + if (r < 0) + return r; + + fn = efi_variable_path(EFI_VENDOR_GLOBAL, "OsIndications"); + if (!fn) + return -ENOMEM; + + /* stat() the EFI variable, to see if the mtime changed. If it did we need to cache again. */ + if (stat(fn, &new_stat) < 0) { + if (errno != ENOENT) + return -errno; + + /* Doesn't exist? Then we can exit early (also see below) */ + *ret = 0; + return 0; + + } else if (stat_inode_unmodified(&new_stat, &cache_stat)) { + /* inode didn't change, we can return the cached value */ + *ret = cache; + return 0; + } + + r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndications", NULL, &v, &s); + if (r == -ENOENT) { + /* Some firmware implementations that do support OsIndications and report that with + * OsIndicationsSupported will remove the OsIndications variable when it is unset. Let's + * pretend it's 0 then, to hide this implementation detail. Note that this call will return + * -ENOENT then only if the support for OsIndications is missing entirely, as determined by + * efi_reboot_to_firmware_supported() above. */ + *ret = 0; + return 0; + } + if (r < 0) + return r; + if (s != sizeof(uint64_t)) + return -EINVAL; + + cache_stat = new_stat; + *ret = cache = *(uint64_t *)v; + return 0; +} + +int efi_get_reboot_to_firmware(void) { + int r; + uint64_t b; + + r = get_os_indications(&b); + if (r < 0) + return r; + + return !!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI); +} + +int efi_set_reboot_to_firmware(bool value) { + int r; + uint64_t b, b_new; + + r = get_os_indications(&b); + if (r < 0) + return r; + + b_new = UPDATE_FLAG(b, EFI_OS_INDICATIONS_BOOT_TO_FW_UI, value); + + /* Avoid writing to efi vars store if we can due to firmware bugs. */ + if (b != b_new) + return efi_set_variable(EFI_VENDOR_GLOBAL, "OsIndications", &b_new, sizeof(uint64_t)); + + return 0; +} + +static ssize_t utf16_size(const uint16_t *s, size_t buf_len_bytes) { + size_t l = 0; + + /* Returns the size of the string in bytes without the terminating two zero bytes */ + + if (buf_len_bytes % sizeof(uint16_t) != 0) + return -EINVAL; + + while (l < buf_len_bytes / sizeof(uint16_t)) { + if (s[l] == 0) + return (l + 1) * sizeof(uint16_t); + l++; + } + + return -EINVAL; /* The terminator was not found */ +} + +struct guid { + uint32_t u1; + uint16_t u2; + uint16_t u3; + uint8_t u4[8]; +} _packed_; + +static void efi_guid_to_id128(const void *guid, sd_id128_t *id128) { + uint32_t u1; + uint16_t u2, u3; + const struct guid *uuid = guid; + + memcpy(&u1, &uuid->u1, sizeof(uint32_t)); + id128->bytes[0] = (u1 >> 24) & 0xff; + id128->bytes[1] = (u1 >> 16) & 0xff; + id128->bytes[2] = (u1 >> 8) & 0xff; + id128->bytes[3] = u1 & 0xff; + memcpy(&u2, &uuid->u2, sizeof(uint16_t)); + id128->bytes[4] = (u2 >> 8) & 0xff; + id128->bytes[5] = u2 & 0xff; + memcpy(&u3, &uuid->u3, sizeof(uint16_t)); + id128->bytes[6] = (u3 >> 8) & 0xff; + id128->bytes[7] = u3 & 0xff; + memcpy(&id128->bytes[8], uuid->u4, sizeof(uuid->u4)); +} + +int efi_get_boot_option( + uint16_t id, + char **title, + sd_id128_t *part_uuid, + char **path, + bool *active) { + + char boot_id[9]; + _cleanup_free_ uint8_t *buf = NULL; + size_t l; + struct boot_option *header; + ssize_t title_size; + _cleanup_free_ char *s = NULL, *p = NULL; + sd_id128_t p_uuid = SD_ID128_NULL; + int r; + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + xsprintf(boot_id, "Boot%04X", id); + r = efi_get_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, (void **)&buf, &l); + if (r < 0) + return r; + if (l < offsetof(struct boot_option, title)) + return -ENOENT; + + header = (struct boot_option *)buf; + title_size = utf16_size(header->title, l - offsetof(struct boot_option, title)); + if (title_size < 0) + return title_size; + + if (title) { + s = utf16_to_utf8(header->title, title_size); + if (!s) + return -ENOMEM; + } + + if (header->path_len > 0) { + uint8_t *dbuf; + size_t dnext, doff; + + doff = offsetof(struct boot_option, title) + title_size; + dbuf = buf + doff; + if (header->path_len > l - doff) + return -EINVAL; + + dnext = 0; + while (dnext < header->path_len) { + struct device_path *dpath; + + dpath = (struct device_path *)(dbuf + dnext); + if (dpath->length < 4) + break; + + /* Type 0x7F – End of Hardware Device Path, Sub-Type 0xFF – End Entire Device Path */ + if (dpath->type == END_DEVICE_PATH_TYPE && dpath->sub_type == END_ENTIRE_DEVICE_PATH_SUBTYPE) + break; + + dnext += dpath->length; + + /* Type 0x04 – Media Device Path */ + if (dpath->type != MEDIA_DEVICE_PATH) + continue; + + /* Sub-Type 1 – Hard Drive */ + if (dpath->sub_type == MEDIA_HARDDRIVE_DP) { + /* 0x02 – GUID Partition Table */ + if (dpath->drive.mbr_type != MBR_TYPE_EFI_PARTITION_TABLE_HEADER) + continue; + + /* 0x02 – GUID signature */ + if (dpath->drive.signature_type != SIGNATURE_TYPE_GUID) + continue; + + if (part_uuid) + efi_guid_to_id128(dpath->drive.signature, &p_uuid); + continue; + } + + /* Sub-Type 4 – File Path */ + if (dpath->sub_type == MEDIA_FILEPATH_DP && !p && path) { + p = utf16_to_utf8(dpath->path, dpath->length-4); + if (!p) + return -ENOMEM; + + efi_tilt_backslashes(p); + continue; + } + } + } + + if (title) + *title = TAKE_PTR(s); + if (part_uuid) + *part_uuid = p_uuid; + if (path) + *path = TAKE_PTR(p); + if (active) + *active = header->attr & LOAD_OPTION_ACTIVE; + + return 0; +} + +static void to_utf16(uint16_t *dest, const char *src) { + int i; + + for (i = 0; src[i] != '\0'; i++) + dest[i] = src[i]; + dest[i] = '\0'; +} + +static void id128_to_efi_guid(sd_id128_t id, void *guid) { + struct guid uuid = { + .u1 = id.bytes[0] << 24 | id.bytes[1] << 16 | id.bytes[2] << 8 | id.bytes[3], + .u2 = id.bytes[4] << 8 | id.bytes[5], + .u3 = id.bytes[6] << 8 | id.bytes[7], + }; + memcpy(uuid.u4, id.bytes+8, sizeof(uuid.u4)); + memcpy(guid, &uuid, sizeof(uuid)); +} + +static uint16_t *tilt_slashes(uint16_t *s) { + uint16_t *p; + + for (p = s; *p; p++) + if (*p == '/') + *p = '\\'; + + return s; +} + +int efi_add_boot_option( + uint16_t id, + const char *title, + uint32_t part, + uint64_t pstart, + uint64_t psize, + sd_id128_t part_uuid, + const char *path) { + + size_t size, title_len, path_len; + _cleanup_free_ char *buf = NULL; + struct boot_option *option; + struct device_path *devicep; + char boot_id[9]; + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + title_len = (strlen(title)+1) * 2; + path_len = (strlen(path)+1) * 2; + + buf = malloc0(offsetof(struct boot_option, title) + title_len + + sizeof(struct drive_path) + + sizeof(struct device_path) + path_len); + if (!buf) + return -ENOMEM; + + /* header */ + option = (struct boot_option *)buf; + option->attr = LOAD_OPTION_ACTIVE; + option->path_len = offsetof(struct device_path, drive) + sizeof(struct drive_path) + + offsetof(struct device_path, path) + path_len + + offsetof(struct device_path, path); + to_utf16(option->title, title); + size = offsetof(struct boot_option, title) + title_len; + + /* partition info */ + devicep = (struct device_path *)(buf + size); + devicep->type = MEDIA_DEVICE_PATH; + devicep->sub_type = MEDIA_HARDDRIVE_DP; + devicep->length = offsetof(struct device_path, drive) + sizeof(struct drive_path); + memcpy(&devicep->drive.part_nr, &part, sizeof(uint32_t)); + memcpy(&devicep->drive.part_start, &pstart, sizeof(uint64_t)); + memcpy(&devicep->drive.part_size, &psize, sizeof(uint64_t)); + id128_to_efi_guid(part_uuid, devicep->drive.signature); + devicep->drive.mbr_type = MBR_TYPE_EFI_PARTITION_TABLE_HEADER; + devicep->drive.signature_type = SIGNATURE_TYPE_GUID; + size += devicep->length; + + /* path to loader */ + devicep = (struct device_path *)(buf + size); + devicep->type = MEDIA_DEVICE_PATH; + devicep->sub_type = MEDIA_FILEPATH_DP; + devicep->length = offsetof(struct device_path, path) + path_len; + to_utf16(devicep->path, path); + tilt_slashes(devicep->path); + size += devicep->length; + + /* end of path */ + devicep = (struct device_path *)(buf + size); + devicep->type = END_DEVICE_PATH_TYPE; + devicep->sub_type = END_ENTIRE_DEVICE_PATH_SUBTYPE; + devicep->length = offsetof(struct device_path, path); + size += devicep->length; + + xsprintf(boot_id, "Boot%04X", id); + return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, buf, size); +} + +int efi_remove_boot_option(uint16_t id) { + char boot_id[9]; + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + xsprintf(boot_id, "Boot%04X", id); + return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, 0); +} + +int efi_get_boot_order(uint16_t **order) { + _cleanup_free_ void *buf = NULL; + size_t l; + int r; + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + r = efi_get_variable(EFI_VENDOR_GLOBAL, "BootOrder", NULL, &buf, &l); + if (r < 0) + return r; + + if (l <= 0) + return -ENOENT; + + if (l % sizeof(uint16_t) > 0 || + l / sizeof(uint16_t) > INT_MAX) + return -EINVAL; + + *order = TAKE_PTR(buf); + return (int) (l / sizeof(uint16_t)); +} + +int efi_set_boot_order(uint16_t *order, size_t n) { + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + return efi_set_variable(EFI_VENDOR_GLOBAL, "BootOrder", order, n * sizeof(uint16_t)); +} + +static int boot_id_hex(const char s[static 4]) { + int id = 0, i; + + assert(s); + + for (i = 0; i < 4; i++) + if (s[i] >= '0' && s[i] <= '9') + id |= (s[i] - '0') << (3 - i) * 4; + else if (s[i] >= 'A' && s[i] <= 'F') + id |= (s[i] - 'A' + 10) << (3 - i) * 4; + else + return -EINVAL; + + return id; +} + +static int cmp_uint16(const uint16_t *a, const uint16_t *b) { + return CMP(*a, *b); +} + +int efi_get_boot_options(uint16_t **options) { + _cleanup_closedir_ DIR *dir = NULL; + _cleanup_free_ uint16_t *list = NULL; + struct dirent *de; + size_t alloc = 0; + int count = 0; + + assert(options); + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + dir = opendir("/sys/firmware/efi/efivars/"); + if (!dir) + return -errno; + + FOREACH_DIRENT(de, dir, return -errno) { + int id; + + if (strncmp(de->d_name, "Boot", 4) != 0) + continue; + + if (strlen(de->d_name) != 45) + continue; + + if (strcmp(de->d_name + 8, "-8be4df61-93ca-11d2-aa0d-00e098032b8c") != 0) + continue; + + id = boot_id_hex(de->d_name + 4); + if (id < 0) + continue; + + if (!GREEDY_REALLOC(list, alloc, count + 1)) + return -ENOMEM; + + list[count++] = id; + } + + typesafe_qsort(list, count, cmp_uint16); + + *options = TAKE_PTR(list); + + return count; +} + +static int read_usec(sd_id128_t vendor, const char *name, usec_t *u) { + _cleanup_free_ char *j = NULL; + int r; + uint64_t x = 0; + + assert(name); + assert(u); + + r = efi_get_variable_string(EFI_VENDOR_LOADER, name, &j); + if (r < 0) + return r; + + r = safe_atou64(j, &x); + if (r < 0) + return r; + + *u = x; + return 0; +} + +int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) { + uint64_t x, y; + int r; + + assert(firmware); + assert(loader); + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeInitUSec", &x); + if (r < 0) + return log_debug_errno(r, "Failed to read LoaderTimeInitUSec: %m"); + + r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeExecUSec", &y); + if (r < 0) + return log_debug_errno(r, "Failed to read LoaderTimeExecUSec: %m"); + + if (y == 0 || y < x || y - x > USEC_PER_HOUR) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), + "Bad LoaderTimeInitUSec=%"PRIu64", LoaderTimeExecUSec=%" PRIu64"; refusing.", + x, y); + + *firmware = x; + *loader = y; + + return 0; +} + +int efi_loader_get_device_part_uuid(sd_id128_t *u) { + _cleanup_free_ char *p = NULL; + int r, parsed[16]; + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderDevicePartUUID", &p); + if (r < 0) + return r; + + if (sscanf(p, SD_ID128_UUID_FORMAT_STR, + &parsed[0], &parsed[1], &parsed[2], &parsed[3], + &parsed[4], &parsed[5], &parsed[6], &parsed[7], + &parsed[8], &parsed[9], &parsed[10], &parsed[11], + &parsed[12], &parsed[13], &parsed[14], &parsed[15]) != 16) + return -EIO; + + if (u) { + unsigned i; + + for (i = 0; i < ELEMENTSOF(parsed); i++) + u->bytes[i] = parsed[i]; + } + + return 0; +} + +int efi_loader_get_entries(char ***ret) { + _cleanup_free_ char16_t *entries = NULL; + _cleanup_strv_free_ char **l = NULL; + size_t size, i, start; + int r; + + assert(ret); + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderEntries", NULL, (void**) &entries, &size); + if (r < 0) + return r; + + /* The variable contains a series of individually NUL terminated UTF-16 strings. */ + + for (i = 0, start = 0;; i++) { + _cleanup_free_ char *decoded = NULL; + bool end; + + /* Is this the end of the variable's data? */ + end = i * sizeof(char16_t) >= size; + + /* Are we in the middle of a string? (i.e. not at the end of the variable, nor at a NUL terminator?) If + * so, let's go to the next entry. */ + if (!end && entries[i] != 0) + continue; + + /* We reached the end of a string, let's decode it into UTF-8 */ + decoded = utf16_to_utf8(entries + start, (i - start) * sizeof(char16_t)); + if (!decoded) + return -ENOMEM; + + if (efi_loader_entry_name_valid(decoded)) { + r = strv_consume(&l, TAKE_PTR(decoded)); + if (r < 0) + return r; + } else + log_debug("Ignoring invalid loader entry '%s'.", decoded); + + /* We reached the end of the variable */ + if (end) + break; + + /* Continue after the NUL byte */ + start = i + 1; + } + + *ret = TAKE_PTR(l); + return 0; +} + +int efi_loader_get_features(uint64_t *ret) { + _cleanup_free_ void *v = NULL; + size_t s; + int r; + + if (!is_efi_boot()) { + *ret = 0; + return 0; + } + + r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderFeatures", NULL, &v, &s); + if (r == -ENOENT) { + _cleanup_free_ char *info = NULL; + + /* The new (v240+) LoaderFeatures variable is not supported, let's see if it's systemd-boot at all */ + r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderInfo", &info); + if (r < 0) { + if (r != -ENOENT) + return r; + + /* Variable not set, definitely means not systemd-boot */ + + } else if (first_word(info, "systemd-boot")) { + + /* An older systemd-boot version. Let's hardcode the feature set, since it was pretty + * static in all its versions. */ + + *ret = EFI_LOADER_FEATURE_CONFIG_TIMEOUT | + EFI_LOADER_FEATURE_ENTRY_DEFAULT | + EFI_LOADER_FEATURE_ENTRY_ONESHOT; + + return 0; + } + + /* No features supported */ + *ret = 0; + return 0; + } + if (r < 0) + return r; + + if (s != sizeof(uint64_t)) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "LoaderFeatures EFI variable doesn't have the right size."); + + memcpy(ret, v, sizeof(uint64_t)); + return 0; +} + +int efi_loader_get_config_timeout_one_shot(usec_t *ret) { + _cleanup_free_ char *v = NULL, *fn = NULL; + static struct stat cache_stat = {}; + struct stat new_stat; + static usec_t cache; + uint64_t sec; + int r; + + assert(ret); + + fn = efi_variable_path(EFI_VENDOR_LOADER, "LoaderConfigTimeoutOneShot"); + if (!fn) + return -ENOMEM; + + /* stat() the EFI variable, to see if the mtime changed. If it did we need to cache again. */ + if (stat(fn, &new_stat) < 0) + return -errno; + + if (stat_inode_unmodified(&new_stat, &cache_stat)) { + *ret = cache; + return 0; + } + + r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderConfigTimeoutOneShot", &v); + if (r < 0) + return r; + + r = safe_atou64(v, &sec); + if (r < 0) + return r; + if (sec > USEC_INFINITY / USEC_PER_SEC) + return -ERANGE; + + cache_stat = new_stat; + *ret = cache = sec * USEC_PER_SEC; /* return in µs */ + return 0; +} + +int efi_loader_update_entry_one_shot_cache(char **cache, struct stat *cache_stat) { + _cleanup_free_ char *fn = NULL, *v = NULL; + struct stat new_stat; + int r; + + assert(cache); + assert(cache_stat); + + fn = efi_variable_path(EFI_VENDOR_LOADER, "LoaderEntryOneShot"); + if (!fn) + return -ENOMEM; + + /* stat() the EFI variable, to see if the mtime changed. If it did we need to cache again. */ + if (stat(fn, &new_stat) < 0) + return -errno; + + if (stat_inode_unmodified(&new_stat, cache_stat)) + return 0; + + r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryOneShot", &v); + if (r < 0) + return r; + + if (!efi_loader_entry_name_valid(v)) + return -EINVAL; + + *cache_stat = new_stat; + free_and_replace(*cache, v); + + return 0; +} + +#endif + +bool efi_loader_entry_name_valid(const char *s) { + if (isempty(s)) + return false; + + if (strlen(s) > FILENAME_MAX) /* Make sure entry names fit in filenames */ + return false; + + return in_charset(s, ALPHANUMERICAL "+-_."); +} + +char *efi_tilt_backslashes(char *s) { + char *p; + + for (p = s; *p; p++) + if (*p == '\\') + *p = '/'; + + return s; +} diff --git a/src/shared/efi-loader.h b/src/shared/efi-loader.h new file mode 100644 index 0000000..34476f4 --- /dev/null +++ b/src/shared/efi-loader.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "efivars.h" + +#include <sys/stat.h> + +#if ENABLE_EFI + +int efi_reboot_to_firmware_supported(void); +int efi_get_reboot_to_firmware(void); +int efi_set_reboot_to_firmware(bool value); + +int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active); +int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path); +int efi_remove_boot_option(uint16_t id); +int efi_get_boot_order(uint16_t **order); +int efi_set_boot_order(uint16_t *order, size_t n); +int efi_get_boot_options(uint16_t **options); + +int efi_loader_get_device_part_uuid(sd_id128_t *u); +int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader); + +int efi_loader_get_entries(char ***ret); + +int efi_loader_get_features(uint64_t *ret); + +int efi_loader_get_config_timeout_one_shot(usec_t *ret); +int efi_loader_update_entry_one_shot_cache(char **cache, struct stat *cache_stat); + +#else + +static inline int efi_reboot_to_firmware_supported(void) { + return -EOPNOTSUPP; +} + +static inline int efi_get_reboot_to_firmware(void) { + return -EOPNOTSUPP; +} + +static inline int efi_set_reboot_to_firmware(bool value) { + return -EOPNOTSUPP; +} + +static inline int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active) { + return -EOPNOTSUPP; +} + +static inline int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path) { + return -EOPNOTSUPP; +} + +static inline int efi_remove_boot_option(uint16_t id) { + return -EOPNOTSUPP; +} + +static inline int efi_get_boot_order(uint16_t **order) { + return -EOPNOTSUPP; +} + +static inline int efi_set_boot_order(uint16_t *order, size_t n) { + return -EOPNOTSUPP; +} + +static inline int efi_get_boot_options(uint16_t **options) { + return -EOPNOTSUPP; +} + +static inline int efi_loader_get_device_part_uuid(sd_id128_t *u) { + return -EOPNOTSUPP; +} + +static inline int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) { + return -EOPNOTSUPP; +} + +static inline int efi_loader_get_entries(char ***ret) { + return -EOPNOTSUPP; +} + +static inline int efi_loader_get_features(uint64_t *ret) { + return -EOPNOTSUPP; +} + +static inline int efi_loader_get_config_timeout_one_shot(usec_t *ret) { + return -EOPNOTSUPP; +} + +static inline int efi_loader_update_entry_one_shot_cache(char **cache, struct stat *cache_stat) { + return -EOPNOTSUPP; +} + +#endif + +bool efi_loader_entry_name_valid(const char *s); + +char *efi_tilt_backslashes(char *s); diff --git a/src/shared/enable-mempool.c b/src/shared/enable-mempool.c new file mode 100644 index 0000000..1abfccb --- /dev/null +++ b/src/shared/enable-mempool.c @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "mempool.h" + +const bool mempool_use_allowed = true; diff --git a/src/shared/env-file-label.c b/src/shared/env-file-label.c new file mode 100644 index 0000000..468afce --- /dev/null +++ b/src/shared/env-file-label.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/stat.h> + +#include "env-file-label.h" +#include "env-file.h" +#include "selinux-util.h" + +int write_env_file_label(const char *fname, char **l) { + int r; + + r = mac_selinux_create_file_prepare(fname, S_IFREG); + if (r < 0) + return r; + + r = write_env_file(fname, l); + + mac_selinux_create_file_clear(); + + return r; +} diff --git a/src/shared/env-file-label.h b/src/shared/env-file-label.h new file mode 100644 index 0000000..d68058a --- /dev/null +++ b/src/shared/env-file-label.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to + * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not + * for all */ + +int write_env_file_label(const char *fname, char **l); diff --git a/src/shared/ethtool-util.c b/src/shared/ethtool-util.c new file mode 100644 index 0000000..e6fab26 --- /dev/null +++ b/src/shared/ethtool-util.c @@ -0,0 +1,1149 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <net/if.h> +#include <sys/ioctl.h> +#include <linux/ethtool.h> +#include <linux/netdevice.h> +#include <linux/sockios.h> + +#include "conf-parser.h" +#include "ethtool-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "log.h" +#include "memory-util.h" +#include "socket-util.h" +#include "string-table.h" +#include "strxcpyx.h" + +static const char* const duplex_table[_DUP_MAX] = { + [DUP_FULL] = "full", + [DUP_HALF] = "half" +}; + +DEFINE_STRING_TABLE_LOOKUP(duplex, Duplex); +DEFINE_CONFIG_PARSE_ENUM(config_parse_duplex, duplex, Duplex, "Failed to parse duplex setting"); + +static const char* const wol_table[_WOL_MAX] = { + [WOL_PHY] = "phy", + [WOL_UCAST] = "unicast", + [WOL_MCAST] = "multicast", + [WOL_BCAST] = "broadcast", + [WOL_ARP] = "arp", + [WOL_MAGIC] = "magic", + [WOL_MAGICSECURE] = "secureon", + [WOL_OFF] = "off", +}; + +DEFINE_STRING_TABLE_LOOKUP(wol, WakeOnLan); +DEFINE_CONFIG_PARSE_ENUM(config_parse_wol, wol, WakeOnLan, "Failed to parse WakeOnLan setting"); + +static const char* const port_table[] = { + [NET_DEV_PORT_TP] = "tp", + [NET_DEV_PORT_AUI] = "aui", + [NET_DEV_PORT_MII] = "mii", + [NET_DEV_PORT_FIBRE] = "fibre", + [NET_DEV_PORT_BNC] = "bnc", +}; + +DEFINE_STRING_TABLE_LOOKUP(port, NetDevPort); +DEFINE_CONFIG_PARSE_ENUM(config_parse_port, port, NetDevPort, "Failed to parse Port setting"); + +static const char* const netdev_feature_table[_NET_DEV_FEAT_MAX] = { + [NET_DEV_FEAT_RX] = "rx-checksum", + [NET_DEV_FEAT_TX] = "tx-checksum-", /* The suffix "-" means any feature beginning with "tx-checksum-" */ + [NET_DEV_FEAT_GSO] = "tx-generic-segmentation", + [NET_DEV_FEAT_GRO] = "rx-gro", + [NET_DEV_FEAT_LRO] = "rx-lro", + [NET_DEV_FEAT_TSO] = "tx-tcp-segmentation", + [NET_DEV_FEAT_TSO6] = "tx-tcp6-segmentation", +}; + +static const char* const ethtool_link_mode_bit_table[] = { + [ETHTOOL_LINK_MODE_10baseT_Half_BIT] = "10baset-half", + [ETHTOOL_LINK_MODE_10baseT_Full_BIT] = "10baset-full", + [ETHTOOL_LINK_MODE_100baseT_Half_BIT] = "100baset-half", + [ETHTOOL_LINK_MODE_100baseT_Full_BIT] = "100baset-full", + [ETHTOOL_LINK_MODE_1000baseT_Half_BIT] = "1000baset-half", + [ETHTOOL_LINK_MODE_1000baseT_Full_BIT] = "1000baset-full", + [ETHTOOL_LINK_MODE_Autoneg_BIT] = "autonegotiation", + [ETHTOOL_LINK_MODE_TP_BIT] = "tp", + [ETHTOOL_LINK_MODE_AUI_BIT] = "aui", + [ETHTOOL_LINK_MODE_MII_BIT] = "mii", + [ETHTOOL_LINK_MODE_FIBRE_BIT] = "fibre", + [ETHTOOL_LINK_MODE_BNC_BIT] = "bnc", + [ETHTOOL_LINK_MODE_10000baseT_Full_BIT] = "10000baset-full", + [ETHTOOL_LINK_MODE_Pause_BIT] = "pause", + [ETHTOOL_LINK_MODE_Asym_Pause_BIT] = "asym-pause", + [ETHTOOL_LINK_MODE_2500baseX_Full_BIT] = "2500basex-full", + [ETHTOOL_LINK_MODE_Backplane_BIT] = "backplane", + [ETHTOOL_LINK_MODE_1000baseKX_Full_BIT] = "1000basekx-full", + [ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT] = "10000basekx4-full", + [ETHTOOL_LINK_MODE_10000baseKR_Full_BIT] = "10000basekr-full", + [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = "10000baser-fec", + [ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT] = "20000basemld2-full", + [ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT] = "20000basekr2-full", + [ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT] = "40000basekr4-full", + [ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT] = "40000basecr4-full", + [ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT] = "40000basesr4-full", + [ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT] = "40000baselr4-full", + [ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT] = "56000basekr4-full", + [ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT] = "56000basecr4-full", + [ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT] = "56000basesr4-full", + [ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT] = "56000baselr4-full", + [ETHTOOL_LINK_MODE_25000baseCR_Full_BIT] = "25000basecr-full", + [ETHTOOL_LINK_MODE_25000baseKR_Full_BIT] = "25000basekr-full", + [ETHTOOL_LINK_MODE_25000baseSR_Full_BIT] = "25000basesr-full", + [ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT] = "50000basecr2-full", + [ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT] = "50000basekr2-full", + [ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT] = "100000basekr4-full", + [ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT] = "100000basesr4-full", + [ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT] = "100000basecr4-full", + [ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT] = "100000baselr4-er4-full", + [ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT] = "50000basesr2-full", + [ETHTOOL_LINK_MODE_1000baseX_Full_BIT] = "1000basex-full", + [ETHTOOL_LINK_MODE_10000baseCR_Full_BIT] = "10000basecr-full", + [ETHTOOL_LINK_MODE_10000baseSR_Full_BIT] = "10000basesr-full", + [ETHTOOL_LINK_MODE_10000baseLR_Full_BIT] = "10000baselr-full", + [ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT] = "10000baselrm-full", + [ETHTOOL_LINK_MODE_10000baseER_Full_BIT] = "10000baseer-full", + [ETHTOOL_LINK_MODE_2500baseT_Full_BIT] = "2500baset-full", + [ETHTOOL_LINK_MODE_5000baseT_Full_BIT] = "5000baset-full", + [ETHTOOL_LINK_MODE_FEC_NONE_BIT] = "fec-none", + [ETHTOOL_LINK_MODE_FEC_RS_BIT] = "fec-rs", + [ETHTOOL_LINK_MODE_FEC_BASER_BIT] = "fec-baser", + [ETHTOOL_LINK_MODE_50000baseKR_Full_BIT] = "50000basekr-full", + [ETHTOOL_LINK_MODE_50000baseSR_Full_BIT] = "50000basesr-full", + [ETHTOOL_LINK_MODE_50000baseCR_Full_BIT] = "50000basecr-full", + [ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT] = "50000baselr-er-fr-full", + [ETHTOOL_LINK_MODE_50000baseDR_Full_BIT] = "50000basedr-full", + [ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT] = "100000basekr2-full", + [ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT] = "100000basesr2-full", + [ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT] = "100000basecr2-full", + [ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT] = "100000baselr2-er2-fr2-full", + [ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT] = "100000basedr2-full", + [ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT] = "200000basekr4-full", + [ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT] = "200000basesr4-full", + [ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT] = "200000baselr4-er4-fr4-full", + [ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT] = "200000basedr4-full", + [ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT] = "200000basecr4-full", + [ETHTOOL_LINK_MODE_100baseT1_Full_BIT] = "100baset1-full", + [ETHTOOL_LINK_MODE_1000baseT1_Full_BIT] = "1000baset1-full", + [ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT] = "400000basekr8-full", + [ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT] = "400000basesr8-full", + [ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT] = "400000baselr8-er8-fr8-full", + [ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT] = "400000basedr8-full", + [ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT] = "400000basecr8-full", + [ETHTOOL_LINK_MODE_FEC_LLRS_BIT] = "fec-llrs", + [ETHTOOL_LINK_MODE_100000baseKR_Full_BIT] = "100000basekr-full", + [ETHTOOL_LINK_MODE_100000baseSR_Full_BIT] = "100000basesr-full", + [ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT] = "100000baselr-er-fr-full", + [ETHTOOL_LINK_MODE_100000baseCR_Full_BIT] = "100000basecr-full", + [ETHTOOL_LINK_MODE_100000baseDR_Full_BIT] = "100000basedr-full", + [ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT] = "200000basekr2-full", + [ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT] = "200000basesr2-full", + [ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT] = "200000baselr2-er2-fr2-full", + [ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT] = "200000basedr2-full", + [ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT] = "200000basecr2-full", + [ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT] = "400000basekr4-full", + [ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT] = "400000basesr4-full", + [ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT] = "400000baselr4-er4-fr4-full", + [ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT] = "400000basedr4-full", + [ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT] = "400000basecr4-full", +}; +/* Make sure the array is large enough to fit all bits */ +assert_cc((ELEMENTSOF(ethtool_link_mode_bit_table)-1) / 32 < N_ADVERTISE); + +DEFINE_STRING_TABLE_LOOKUP(ethtool_link_mode_bit, enum ethtool_link_mode_bit_indices); + +static int ethtool_connect_or_warn(int *ret, bool warn) { + int fd; + + assert_return(ret, -EINVAL); + + fd = socket_ioctl_fd(); + if (fd < 0) + return log_full_errno(warn ? LOG_WARNING: LOG_DEBUG, fd, + "ethtool: could not create control socket: %m"); + + *ret = fd; + + return 0; +} + +int ethtool_get_driver(int *ethtool_fd, const char *ifname, char **ret) { + struct ethtool_drvinfo ecmd = { + .cmd = ETHTOOL_GDRVINFO, + }; + struct ifreq ifr = { + .ifr_data = (void*) &ecmd, + }; + char *d; + int r; + + assert(ethtool_fd); + assert(ifname); + assert(ret); + + if (*ethtool_fd < 0) { + r = ethtool_connect_or_warn(ethtool_fd, true); + if (r < 0) + return r; + } + + strscpy(ifr.ifr_name, IFNAMSIZ, ifname); + + r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + + if (isempty(ecmd.driver)) + return -ENODATA; + + d = strdup(ecmd.driver); + if (!d) + return -ENOMEM; + + *ret = d; + return 0; +} + +int ethtool_get_link_info( + int *ethtool_fd, + const char *ifname, + int *ret_autonegotiation, + uint64_t *ret_speed, + Duplex *ret_duplex, + NetDevPort *ret_port) { + + struct ethtool_cmd ecmd = { + .cmd = ETHTOOL_GSET, + }; + struct ifreq ifr = { + .ifr_data = (void*) &ecmd, + }; + int r; + + assert(ethtool_fd); + assert(ifname); + + if (*ethtool_fd < 0) { + r = ethtool_connect_or_warn(ethtool_fd, false); + if (r < 0) + return r; + } + + strscpy(ifr.ifr_name, IFNAMSIZ, ifname); + + r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + + if (ret_autonegotiation) + *ret_autonegotiation = ecmd.autoneg; + + if (ret_speed) { + uint32_t speed; + + speed = ethtool_cmd_speed(&ecmd); + *ret_speed = speed == (uint32_t) SPEED_UNKNOWN ? + UINT64_MAX : (uint64_t) speed * 1000 * 1000; + } + + if (ret_duplex) + *ret_duplex = ecmd.duplex; + + if (ret_port) + *ret_port = ecmd.port; + + return 0; +} + +int ethtool_get_permanent_macaddr(int *ethtool_fd, const char *ifname, struct ether_addr *ret) { + _cleanup_close_ int fd = -1; + struct { + struct ethtool_perm_addr addr; + uint8_t space[MAX_ADDR_LEN]; + } epaddr = { + .addr.cmd = ETHTOOL_GPERMADDR, + .addr.size = MAX_ADDR_LEN, + }; + struct ifreq ifr = { + .ifr_data = (caddr_t) &epaddr, + }; + int r; + + assert(ifname); + assert(ret); + + if (!ethtool_fd) + ethtool_fd = &fd; + + if (*ethtool_fd < 0) { + r = ethtool_connect_or_warn(ethtool_fd, false); + if (r < 0) + return r; + } + + strscpy(ifr.ifr_name, IFNAMSIZ, ifname); + + r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + + if (epaddr.addr.size != 6) + return -EOPNOTSUPP; + +#pragma GCC diagnostic push +#if HAVE_ZERO_LENGTH_BOUNDS +# pragma GCC diagnostic ignored "-Wzero-length-bounds" +#endif + for (size_t i = 0; i < epaddr.addr.size; i++) + ret->ether_addr_octet[i] = epaddr.addr.data[i]; +#pragma GCC diagnostic pop + + return 0; +} + +int ethtool_set_speed(int *ethtool_fd, const char *ifname, unsigned speed, Duplex duplex) { + struct ethtool_cmd ecmd = { + .cmd = ETHTOOL_GSET, + }; + struct ifreq ifr = { + .ifr_data = (void*) &ecmd, + }; + bool need_update = false; + int r; + + assert(ethtool_fd); + assert(ifname); + + if (speed == 0 && duplex == _DUP_INVALID) + return 0; + + if (*ethtool_fd < 0) { + r = ethtool_connect_or_warn(ethtool_fd, true); + if (r < 0) + return r; + } + + strscpy(ifr.ifr_name, IFNAMSIZ, ifname); + + r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + + if (ethtool_cmd_speed(&ecmd) != speed) { + ethtool_cmd_speed_set(&ecmd, speed); + need_update = true; + } + + switch (duplex) { + case DUP_HALF: + if (ecmd.duplex != DUPLEX_HALF) { + ecmd.duplex = DUPLEX_HALF; + need_update = true; + } + break; + case DUP_FULL: + if (ecmd.duplex != DUPLEX_FULL) { + ecmd.duplex = DUPLEX_FULL; + need_update = true; + } + break; + default: + break; + } + + if (need_update) { + ecmd.cmd = ETHTOOL_SSET; + + r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + } + + return 0; +} + +int ethtool_set_wol(int *ethtool_fd, const char *ifname, WakeOnLan wol) { + struct ethtool_wolinfo ecmd = { + .cmd = ETHTOOL_GWOL, + }; + struct ifreq ifr = { + .ifr_data = (void*) &ecmd, + }; + bool need_update = false; + int r; + + assert(ethtool_fd); + assert(ifname); + + if (wol == _WOL_INVALID) + return 0; + + if (*ethtool_fd < 0) { + r = ethtool_connect_or_warn(ethtool_fd, true); + if (r < 0) + return r; + } + + strscpy(ifr.ifr_name, IFNAMSIZ, ifname); + + r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + + switch (wol) { + case WOL_PHY: + if (ecmd.wolopts != WAKE_PHY) { + ecmd.wolopts = WAKE_PHY; + need_update = true; + } + break; + case WOL_UCAST: + if (ecmd.wolopts != WAKE_UCAST) { + ecmd.wolopts = WAKE_UCAST; + need_update = true; + } + break; + case WOL_MCAST: + if (ecmd.wolopts != WAKE_MCAST) { + ecmd.wolopts = WAKE_MCAST; + need_update = true; + } + break; + case WOL_BCAST: + if (ecmd.wolopts != WAKE_BCAST) { + ecmd.wolopts = WAKE_BCAST; + need_update = true; + } + break; + case WOL_ARP: + if (ecmd.wolopts != WAKE_ARP) { + ecmd.wolopts = WAKE_ARP; + need_update = true; + } + break; + case WOL_MAGIC: + if (ecmd.wolopts != WAKE_MAGIC) { + ecmd.wolopts = WAKE_MAGIC; + need_update = true; + } + break; + case WOL_MAGICSECURE: + if (ecmd.wolopts != WAKE_MAGICSECURE) { + ecmd.wolopts = WAKE_MAGICSECURE; + need_update = true; + } + break; + case WOL_OFF: + if (ecmd.wolopts != 0) { + ecmd.wolopts = 0; + need_update = true; + } + break; + default: + break; + } + + if (need_update) { + ecmd.cmd = ETHTOOL_SWOL; + + r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + } + + return 0; +} + +int ethtool_set_nic_buffer_size(int *ethtool_fd, const char *ifname, const netdev_ring_param *ring) { + struct ethtool_ringparam ecmd = { + .cmd = ETHTOOL_GRINGPARAM, + }; + struct ifreq ifr = { + .ifr_data = (void*) &ecmd, + }; + bool need_update = false; + int r; + + assert(ethtool_fd); + assert(ifname); + assert(ring); + + if (*ethtool_fd < 0) { + r = ethtool_connect_or_warn(ethtool_fd, true); + if (r < 0) + return r; + } + + strscpy(ifr.ifr_name, IFNAMSIZ, ifname); + + r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + + if (ring->rx_pending_set && ecmd.rx_pending != ring->rx_pending) { + ecmd.rx_pending = ring->rx_pending; + need_update = true; + } + + if (ring->rx_mini_pending_set && ecmd.rx_mini_pending != ring->rx_mini_pending) { + ecmd.rx_mini_pending = ring->rx_mini_pending; + need_update = true; + } + + if (ring->rx_jumbo_pending_set && ecmd.rx_jumbo_pending != ring->rx_jumbo_pending) { + ecmd.rx_jumbo_pending = ring->rx_jumbo_pending; + need_update = true; + } + + if (ring->tx_pending_set && ecmd.tx_pending != ring->tx_pending) { + ecmd.tx_pending = ring->tx_pending; + need_update = true; + } + + if (need_update) { + ecmd.cmd = ETHTOOL_SRINGPARAM; + + r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + } + + return 0; +} + +static int get_stringset(int ethtool_fd, struct ifreq *ifr, int stringset_id, struct ethtool_gstrings **ret) { + _cleanup_free_ struct ethtool_gstrings *strings = NULL; + struct { + struct ethtool_sset_info info; + uint32_t space; + } buffer = { + .info = { + .cmd = ETHTOOL_GSSET_INFO, + .sset_mask = UINT64_C(1) << stringset_id, + }, + }; + unsigned len; + int r; + + assert(ethtool_fd >= 0); + assert(ifr); + assert(ret); + + ifr->ifr_data = (void *) &buffer.info; + + r = ioctl(ethtool_fd, SIOCETHTOOL, ifr); + if (r < 0) + return -errno; + + if (!buffer.info.sset_mask) + return -EINVAL; + +#pragma GCC diagnostic push +#if HAVE_ZERO_LENGTH_BOUNDS +# pragma GCC diagnostic ignored "-Wzero-length-bounds" +#endif + len = buffer.info.data[0]; +#pragma GCC diagnostic pop + + strings = malloc0(sizeof(struct ethtool_gstrings) + len * ETH_GSTRING_LEN); + if (!strings) + return -ENOMEM; + + strings->cmd = ETHTOOL_GSTRINGS; + strings->string_set = stringset_id; + strings->len = len; + + ifr->ifr_data = (void *) strings; + + r = ioctl(ethtool_fd, SIOCETHTOOL, ifr); + if (r < 0) + return -errno; + + *ret = TAKE_PTR(strings); + + return 0; +} + +static int set_features_bit( + const struct ethtool_gstrings *strings, + const char *feature, + bool flag, + struct ethtool_sfeatures *sfeatures) { + bool found = false; + + assert(strings); + assert(feature); + assert(sfeatures); + + for (size_t i = 0; i < strings->len; i++) + if (streq((char *) &strings->data[i * ETH_GSTRING_LEN], feature) || + (endswith(feature, "-") && startswith((char *) &strings->data[i * ETH_GSTRING_LEN], feature))) { + size_t block, bit; + + block = i / 32; + bit = i % 32; + + sfeatures->features[block].valid |= 1 << bit; + SET_FLAG(sfeatures->features[block].requested, 1 << bit, flag); + found = true; + } + + return found ? 0 : -ENODATA; +} + +int ethtool_set_features(int *ethtool_fd, const char *ifname, const int *features) { + _cleanup_free_ struct ethtool_gstrings *strings = NULL; + struct ethtool_sfeatures *sfeatures; + struct ifreq ifr = {}; + int i, r; + + assert(ethtool_fd); + assert(ifname); + assert(features); + + if (*ethtool_fd < 0) { + r = ethtool_connect_or_warn(ethtool_fd, true); + if (r < 0) + return r; + } + + strscpy(ifr.ifr_name, IFNAMSIZ, ifname); + + r = get_stringset(*ethtool_fd, &ifr, ETH_SS_FEATURES, &strings); + if (r < 0) + return log_warning_errno(r, "ethtool: could not get ethtool features for %s", ifname); + + sfeatures = alloca0(sizeof(struct ethtool_sfeatures) + DIV_ROUND_UP(strings->len, 32U) * sizeof(sfeatures->features[0])); + sfeatures->cmd = ETHTOOL_SFEATURES; + sfeatures->size = DIV_ROUND_UP(strings->len, 32U); + + for (i = 0; i < _NET_DEV_FEAT_MAX; i++) + if (features[i] != -1) { + r = set_features_bit(strings, netdev_feature_table[i], features[i], sfeatures); + if (r < 0) { + log_warning_errno(r, "ethtool: could not find feature, ignoring: %s", netdev_feature_table[i]); + continue; + } + } + + ifr.ifr_data = (void *) sfeatures; + + r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr); + if (r < 0) + return log_warning_errno(r, "ethtool: could not set ethtool features for %s", ifname); + + return 0; +} + +static int get_glinksettings(int fd, struct ifreq *ifr, struct ethtool_link_usettings **ret) { + struct ecmd { + struct ethtool_link_settings req; + __u32 link_mode_data[3 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32]; + } ecmd = { + .req.cmd = ETHTOOL_GLINKSETTINGS, + }; + struct ethtool_link_usettings *u; + unsigned offset; + int r; + + assert(fd >= 0); + assert(ifr); + assert(ret); + + /* The interaction user/kernel via the new API requires a small ETHTOOL_GLINKSETTINGS + handshake first to agree on the length of the link mode bitmaps. If kernel doesn't + agree with user, it returns the bitmap length it is expecting from user as a negative + length (and cmd field is 0). When kernel and user agree, kernel returns valid info in + all fields (ie. link mode length > 0 and cmd is ETHTOOL_GLINKSETTINGS). Based on + https://github.com/torvalds/linux/commit/3f1ac7a700d039c61d8d8b99f28d605d489a60cf + */ + + ifr->ifr_data = (void *) &ecmd; + + r = ioctl(fd, SIOCETHTOOL, ifr); + if (r < 0) + return -errno; + + if (ecmd.req.link_mode_masks_nwords >= 0 || ecmd.req.cmd != ETHTOOL_GLINKSETTINGS) + return -EOPNOTSUPP; + + ecmd.req.link_mode_masks_nwords = -ecmd.req.link_mode_masks_nwords; + + ifr->ifr_data = (void *) &ecmd; + + r = ioctl(fd, SIOCETHTOOL, ifr); + if (r < 0) + return -errno; + + if (ecmd.req.link_mode_masks_nwords <= 0 || ecmd.req.cmd != ETHTOOL_GLINKSETTINGS) + return -EOPNOTSUPP; + + u = new(struct ethtool_link_usettings, 1); + if (!u) + return -ENOMEM; + + *u = (struct ethtool_link_usettings) { + .base = ecmd.req, + }; + + offset = 0; + memcpy(u->link_modes.supported, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords); + + offset += ecmd.req.link_mode_masks_nwords; + memcpy(u->link_modes.advertising, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords); + + offset += ecmd.req.link_mode_masks_nwords; + memcpy(u->link_modes.lp_advertising, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords); + + *ret = u; + + return 0; +} + +static int get_gset(int fd, struct ifreq *ifr, struct ethtool_link_usettings **ret) { + struct ethtool_link_usettings *e; + struct ethtool_cmd ecmd = { + .cmd = ETHTOOL_GSET, + }; + int r; + + assert(fd >= 0); + assert(ifr); + assert(ret); + + ifr->ifr_data = (void *) &ecmd; + + r = ioctl(fd, SIOCETHTOOL, ifr); + if (r < 0) + return -errno; + + e = new(struct ethtool_link_usettings, 1); + if (!e) + return -ENOMEM; + + *e = (struct ethtool_link_usettings) { + .base.cmd = ETHTOOL_GSET, + .base.link_mode_masks_nwords = 1, + .base.speed = ethtool_cmd_speed(&ecmd), + .base.duplex = ecmd.duplex, + .base.port = ecmd.port, + .base.phy_address = ecmd.phy_address, + .base.autoneg = ecmd.autoneg, + .base.mdio_support = ecmd.mdio_support, + + .link_modes.supported[0] = ecmd.supported, + .link_modes.advertising[0] = ecmd.advertising, + .link_modes.lp_advertising[0] = ecmd.lp_advertising, + }; + + *ret = e; + + return 0; +} + +static int set_slinksettings(int fd, struct ifreq *ifr, const struct ethtool_link_usettings *u) { + struct { + struct ethtool_link_settings req; + __u32 link_mode_data[3 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32]; + } ecmd = {}; + unsigned offset; + int r; + + assert(fd >= 0); + assert(ifr); + assert(u); + + if (u->base.cmd != ETHTOOL_GLINKSETTINGS || u->base.link_mode_masks_nwords <= 0) + return -EINVAL; + + ecmd.req = u->base; + ecmd.req.cmd = ETHTOOL_SLINKSETTINGS; + offset = 0; + memcpy(&ecmd.link_mode_data[offset], u->link_modes.supported, 4 * ecmd.req.link_mode_masks_nwords); + + offset += ecmd.req.link_mode_masks_nwords; + memcpy(&ecmd.link_mode_data[offset], u->link_modes.advertising, 4 * ecmd.req.link_mode_masks_nwords); + + offset += ecmd.req.link_mode_masks_nwords; + memcpy(&ecmd.link_mode_data[offset], u->link_modes.lp_advertising, 4 * ecmd.req.link_mode_masks_nwords); + + ifr->ifr_data = (void *) &ecmd; + + r = ioctl(fd, SIOCETHTOOL, ifr); + if (r < 0) + return -errno; + + return 0; +} + +static int set_sset(int fd, struct ifreq *ifr, const struct ethtool_link_usettings *u) { + struct ethtool_cmd ecmd = { + .cmd = ETHTOOL_SSET, + }; + int r; + + assert(fd >= 0); + assert(ifr); + assert(u); + + if (u->base.cmd != ETHTOOL_GSET || u->base.link_mode_masks_nwords <= 0) + return -EINVAL; + + ecmd.supported = u->link_modes.supported[0]; + ecmd.advertising = u->link_modes.advertising[0]; + ecmd.lp_advertising = u->link_modes.lp_advertising[0]; + + ethtool_cmd_speed_set(&ecmd, u->base.speed); + + ecmd.duplex = u->base.duplex; + ecmd.port = u->base.port; + ecmd.phy_address = u->base.phy_address; + ecmd.autoneg = u->base.autoneg; + ecmd.mdio_support = u->base.mdio_support; + ecmd.eth_tp_mdix = u->base.eth_tp_mdix; + ecmd.eth_tp_mdix_ctrl = u->base.eth_tp_mdix_ctrl; + + ifr->ifr_data = (void *) &ecmd; + + r = ioctl(fd, SIOCETHTOOL, ifr); + if (r < 0) + return -errno; + + return 0; +} + +/* If autonegotiation is disabled, the speed and duplex represent the fixed link + * mode and are writable if the driver supports multiple link modes. If it is + * enabled then they are read-only. If the link is up they represent the negotiated + * link mode; if the link is down, the speed is 0, %SPEED_UNKNOWN or the highest + * enabled speed and @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode. + */ +int ethtool_set_glinksettings( + int *fd, + const char *ifname, + int autonegotiation, + const uint32_t advertise[static N_ADVERTISE], + uint64_t speed, + Duplex duplex, + NetDevPort port) { + + _cleanup_free_ struct ethtool_link_usettings *u = NULL; + struct ifreq ifr = {}; + int r; + + assert(fd); + assert(ifname); + assert(advertise); + + if (autonegotiation != AUTONEG_DISABLE && memeqzero(advertise, sizeof(uint32_t) * N_ADVERTISE)) { + log_info("ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable."); + return 0; + } + + if (*fd < 0) { + r = ethtool_connect_or_warn(fd, true); + if (r < 0) + return r; + } + + strscpy(ifr.ifr_name, IFNAMSIZ, ifname); + + r = get_glinksettings(*fd, &ifr, &u); + if (r < 0) { + r = get_gset(*fd, &ifr, &u); + if (r < 0) + return log_warning_errno(r, "ethtool: Cannot get device settings for %s : %m", ifname); + } + + if (speed > 0) + u->base.speed = DIV_ROUND_UP(speed, 1000000); + + if (duplex != _DUP_INVALID) + u->base.duplex = duplex; + + if (port != _NET_DEV_PORT_INVALID) + u->base.port = port; + + if (autonegotiation >= 0) + u->base.autoneg = autonegotiation; + + if (!memeqzero(advertise, sizeof(uint32_t) * N_ADVERTISE)) { + u->base.autoneg = AUTONEG_ENABLE; + memcpy(&u->link_modes.advertising, advertise, sizeof(uint32_t) * N_ADVERTISE); + memzero((uint8_t*) &u->link_modes.advertising + sizeof(uint32_t) * N_ADVERTISE, + ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NBYTES - sizeof(uint32_t) * N_ADVERTISE); + } + + if (u->base.cmd == ETHTOOL_GLINKSETTINGS) + r = set_slinksettings(*fd, &ifr, u); + else + r = set_sset(*fd, &ifr, u); + if (r < 0) + return log_warning_errno(r, "ethtool: Cannot set device settings for %s: %m", ifname); + + return r; +} + +int ethtool_set_channels(int *fd, const char *ifname, const netdev_channels *channels) { + struct ethtool_channels ecmd = { + .cmd = ETHTOOL_GCHANNELS, + }; + struct ifreq ifr = { + .ifr_data = (void*) &ecmd, + }; + bool need_update = false; + int r; + + assert(fd); + assert(ifname); + assert(channels); + + if (*fd < 0) { + r = ethtool_connect_or_warn(fd, true); + if (r < 0) + return r; + } + + strscpy(ifr.ifr_name, IFNAMSIZ, ifname); + + r = ioctl(*fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + + if (channels->rx_count_set && ecmd.rx_count != channels->rx_count) { + ecmd.rx_count = channels->rx_count; + need_update = true; + } + + if (channels->tx_count_set && ecmd.tx_count != channels->tx_count) { + ecmd.tx_count = channels->tx_count; + need_update = true; + } + + if (channels->other_count_set && ecmd.other_count != channels->other_count) { + ecmd.other_count = channels->other_count; + need_update = true; + } + + if (channels->combined_count_set && ecmd.combined_count != channels->combined_count) { + ecmd.combined_count = channels->combined_count; + need_update = true; + } + + if (need_update) { + ecmd.cmd = ETHTOOL_SCHANNELS; + + r = ioctl(*fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + } + + return 0; +} + +int ethtool_set_flow_control(int *fd, const char *ifname, int rx, int tx, int autoneg) { + struct ethtool_pauseparam ecmd = { + .cmd = ETHTOOL_GPAUSEPARAM, + }; + struct ifreq ifr = { + .ifr_data = (void*) &ecmd, + }; + bool need_update = false; + int r; + + assert(fd); + assert(ifname); + + if (*fd < 0) { + r = ethtool_connect_or_warn(fd, true); + if (r < 0) + return r; + } + + strscpy(ifr.ifr_name, IFNAMSIZ, ifname); + + r = ioctl(*fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + + if (rx >= 0 && ecmd.rx_pause != (uint32_t) rx) { + ecmd.rx_pause = rx; + need_update = true; + } + + if (tx >= 0 && ecmd.tx_pause != (uint32_t) tx) { + ecmd.tx_pause = tx; + need_update = true; + } + + if (autoneg >= 0 && ecmd.autoneg != (uint32_t) autoneg) { + ecmd.autoneg = autoneg; + need_update = true; + } + + if (need_update) { + ecmd.cmd = ETHTOOL_SPAUSEPARAM; + + r = ioctl(*fd, SIOCETHTOOL, &ifr); + if (r < 0) + return -errno; + } + + return 0; +} + +int config_parse_channel(const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + netdev_channels *channels = data; + uint32_t k; + int r; + + assert(filename); + assert(section); + assert(lvalue); + assert(rvalue); + assert(data); + + r = safe_atou32(rvalue, &k); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse channel value for %s=, ignoring: %s", lvalue, rvalue); + return 0; + } + if (k < 1) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid %s= value, ignoring: %s", lvalue, rvalue); + return 0; + } + + if (streq(lvalue, "RxChannels")) { + channels->rx_count = k; + channels->rx_count_set = true; + } else if (streq(lvalue, "TxChannels")) { + channels->tx_count = k; + channels->tx_count_set = true; + } else if (streq(lvalue, "OtherChannels")) { + channels->other_count = k; + channels->other_count_set = true; + } else if (streq(lvalue, "CombinedChannels")) { + channels->combined_count = k; + channels->combined_count_set = true; + } + + return 0; +} + +int config_parse_advertise(const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + uint32_t *advertise = data; + const char *p; + int r; + + assert(filename); + assert(section); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + /* Empty string resets the value. */ + memzero(advertise, sizeof(uint32_t) * N_ADVERTISE); + return 0; + } + + for (p = rvalue;;) { + _cleanup_free_ char *w = NULL; + enum ethtool_link_mode_bit_indices mode; + + r = extract_first_word(&p, &w, NULL, 0); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to split advertise modes '%s', ignoring assignment: %m", rvalue); + return 0; + } + if (r == 0) + return 0; + + mode = ethtool_link_mode_bit_from_string(w); + /* We reuse the kernel provided enum which does not contain negative value. So, the cast + * below is mandatory. Otherwise, the check below always passes and access an invalid address. */ + if ((int) mode < 0) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Failed to parse advertise mode, ignoring: %s", w); + continue; + } + + advertise[mode / 32] |= 1UL << (mode % 32); + } +} + +int config_parse_nic_buffer_size(const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + netdev_ring_param *ring = data; + uint32_t k; + int r; + + assert(filename); + assert(section); + assert(lvalue); + assert(rvalue); + assert(data); + + r = safe_atou32(rvalue, &k); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse interface buffer value, ignoring: %s", rvalue); + return 0; + } + if (k < 1) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Invalid %s= value, ignoring: %s", lvalue, rvalue); + return 0; + } + + if (streq(lvalue, "RxBufferSize")) { + ring->rx_pending = k; + ring->rx_pending_set = true; + } else if (streq(lvalue, "RxMiniBufferSize")) { + ring->rx_mini_pending = k; + ring->rx_mini_pending_set = true; + } else if (streq(lvalue, "RxJumboBufferSize")) { + ring->rx_jumbo_pending = k; + ring->rx_jumbo_pending_set = true; + } else if (streq(lvalue, "TxBufferSize")) { + ring->tx_pending = k; + ring->tx_pending_set = true; + } + + return 0; +} diff --git a/src/shared/ethtool-util.h b/src/shared/ethtool-util.h new file mode 100644 index 0000000..f94b3e1 --- /dev/null +++ b/src/shared/ethtool-util.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <macro.h> +#include <net/ethernet.h> +#include <linux/ethtool.h> + +#include "conf-parser.h" + +#define N_ADVERTISE 3 + +/* we can't use DUPLEX_ prefix, as it + * clashes with <linux/ethtool.h> */ +typedef enum Duplex { + DUP_HALF = DUPLEX_HALF, + DUP_FULL = DUPLEX_FULL, + _DUP_MAX, + _DUP_INVALID = -1 +} Duplex; + +typedef enum WakeOnLan { + WOL_PHY, + WOL_UCAST, + WOL_MCAST, + WOL_BCAST, + WOL_ARP, + WOL_MAGIC, + WOL_MAGICSECURE, + WOL_OFF, + _WOL_MAX, + _WOL_INVALID = -1 +} WakeOnLan; + +typedef enum NetDevFeature { + NET_DEV_FEAT_RX, + NET_DEV_FEAT_TX, + NET_DEV_FEAT_GSO, + NET_DEV_FEAT_GRO, + NET_DEV_FEAT_LRO, + NET_DEV_FEAT_TSO, + NET_DEV_FEAT_TSO6, + _NET_DEV_FEAT_MAX, + _NET_DEV_FEAT_INVALID = -1 +} NetDevFeature; + +typedef enum NetDevPort { + NET_DEV_PORT_TP = PORT_TP, + NET_DEV_PORT_AUI = PORT_AUI, + NET_DEV_PORT_MII = PORT_MII, + NET_DEV_PORT_FIBRE = PORT_FIBRE, + NET_DEV_PORT_BNC = PORT_BNC, + NET_DEV_PORT_DA = PORT_DA, + NET_DEV_PORT_NONE = PORT_NONE, + NET_DEV_PORT_OTHER = PORT_OTHER, + _NET_DEV_PORT_MAX, + _NET_DEV_PORT_INVALID = -1 +} NetDevPort; + +#define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32 (SCHAR_MAX) +#define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NBYTES (4 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32) + +/* layout of the struct passed from/to userland */ +struct ethtool_link_usettings { + struct ethtool_link_settings base; + + struct { + uint32_t supported[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32]; + uint32_t advertising[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32]; + uint32_t lp_advertising[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32]; + } link_modes; +}; + +typedef struct netdev_channels { + uint32_t rx_count; + uint32_t tx_count; + uint32_t other_count; + uint32_t combined_count; + + bool rx_count_set; + bool tx_count_set; + bool other_count_set; + bool combined_count_set; +} netdev_channels; + +typedef struct netdev_ring_param { + uint32_t rx_pending; + uint32_t rx_mini_pending; + uint32_t rx_jumbo_pending; + uint32_t tx_pending; + + bool rx_pending_set; + bool rx_mini_pending_set; + bool rx_jumbo_pending_set; + bool tx_pending_set; +} netdev_ring_param; + +int ethtool_get_driver(int *ethtool_fd, const char *ifname, char **ret); +int ethtool_get_link_info(int *ethtool_fd, const char *ifname, + int *ret_autonegotiation, uint64_t *ret_speed, + Duplex *ret_duplex, NetDevPort *ret_port); +int ethtool_get_permanent_macaddr(int *ethtool_fd, const char *ifname, struct ether_addr *ret); +int ethtool_set_speed(int *ethtool_fd, const char *ifname, unsigned speed, Duplex duplex); +int ethtool_set_wol(int *ethtool_fd, const char *ifname, WakeOnLan wol); +int ethtool_set_nic_buffer_size(int *ethtool_fd, const char *ifname, const netdev_ring_param *ring); +int ethtool_set_features(int *ethtool_fd, const char *ifname, const int *features); +int ethtool_set_glinksettings(int *ethtool_fd, const char *ifname, + int autonegotiation, const uint32_t advertise[static N_ADVERTISE], + uint64_t speed, Duplex duplex, NetDevPort port); +int ethtool_set_channels(int *ethtool_fd, const char *ifname, const netdev_channels *channels); +int ethtool_set_flow_control(int *fd, const char *ifname, int rx, int tx, int autoneg); + +const char *duplex_to_string(Duplex d) _const_; +Duplex duplex_from_string(const char *d) _pure_; + +const char *wol_to_string(WakeOnLan wol) _const_; +WakeOnLan wol_from_string(const char *wol) _pure_; + +const char *port_to_string(NetDevPort port) _const_; +NetDevPort port_from_string(const char *port) _pure_; + +const char *ethtool_link_mode_bit_to_string(enum ethtool_link_mode_bit_indices val) _const_; +enum ethtool_link_mode_bit_indices ethtool_link_mode_bit_from_string(const char *str) _pure_; + +CONFIG_PARSER_PROTOTYPE(config_parse_duplex); +CONFIG_PARSER_PROTOTYPE(config_parse_wol); +CONFIG_PARSER_PROTOTYPE(config_parse_port); +CONFIG_PARSER_PROTOTYPE(config_parse_channel); +CONFIG_PARSER_PROTOTYPE(config_parse_advertise); +CONFIG_PARSER_PROTOTYPE(config_parse_nic_buffer_size); diff --git a/src/shared/exec-util.c b/src/shared/exec-util.c new file mode 100644 index 0000000..61ee3b1 --- /dev/null +++ b/src/shared/exec-util.c @@ -0,0 +1,446 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <dirent.h> +#include <errno.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> + +#include "alloc-util.h" +#include "conf-files.h" +#include "env-file.h" +#include "env-util.h" +#include "exec-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "hashmap.h" +#include "macro.h" +#include "process-util.h" +#include "rlimit-util.h" +#include "serialize.h" +#include "set.h" +#include "signal-util.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "tmpfile-util.h" +#include "util.h" + +/* Put this test here for a lack of better place */ +assert_cc(EAGAIN == EWOULDBLOCK); + +static int do_spawn(const char *path, char *argv[], int stdout_fd, pid_t *pid) { + + pid_t _pid; + int r; + + if (null_or_empty_path(path)) { + log_debug("%s is empty (a mask).", path); + return 0; + } + + r = safe_fork("(direxec)", FORK_DEATHSIG|FORK_LOG, &_pid); + if (r < 0) + return r; + if (r == 0) { + char *_argv[2]; + + if (stdout_fd >= 0) { + r = rearrange_stdio(STDIN_FILENO, stdout_fd, STDERR_FILENO); + if (r < 0) + _exit(EXIT_FAILURE); + } + + (void) rlimit_nofile_safe(); + + if (!argv) { + _argv[0] = (char*) path; + _argv[1] = NULL; + argv = _argv; + } else + argv[0] = (char*) path; + + execv(path, argv); + log_error_errno(errno, "Failed to execute %s: %m", path); + _exit(EXIT_FAILURE); + } + + *pid = _pid; + return 1; +} + +static int do_execute( + char **directories, + usec_t timeout, + gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX], + void* const callback_args[_STDOUT_CONSUME_MAX], + int output_fd, + char *argv[], + char *envp[], + ExecDirFlags flags) { + + _cleanup_hashmap_free_free_ Hashmap *pids = NULL; + _cleanup_strv_free_ char **paths = NULL; + char **path, **e; + int r; + bool parallel_execution; + + /* We fork this all off from a child process so that we can somewhat cleanly make + * use of SIGALRM to set a time limit. + * + * We attempt to perform parallel execution if configured by the user, however + * if `callbacks` is nonnull, execution must be serial. + */ + parallel_execution = FLAGS_SET(flags, EXEC_DIR_PARALLEL) && !callbacks; + + r = conf_files_list_strv(&paths, NULL, NULL, CONF_FILES_EXECUTABLE|CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, (const char* const*) directories); + if (r < 0) + return log_error_errno(r, "Failed to enumerate executables: %m"); + + if (parallel_execution) { + pids = hashmap_new(NULL); + if (!pids) + return log_oom(); + } + + /* Abort execution of this process after the timeout. We simply rely on SIGALRM as + * default action terminating the process, and turn on alarm(). */ + + if (timeout != USEC_INFINITY) + alarm(DIV_ROUND_UP(timeout, USEC_PER_SEC)); + + STRV_FOREACH(e, envp) + if (putenv(*e) != 0) + return log_error_errno(errno, "Failed to set environment variable: %m"); + + STRV_FOREACH(path, paths) { + _cleanup_free_ char *t = NULL; + _cleanup_close_ int fd = -1; + pid_t pid; + + t = strdup(*path); + if (!t) + return log_oom(); + + if (callbacks) { + fd = open_serialization_fd(basename(*path)); + if (fd < 0) + return log_error_errno(fd, "Failed to open serialization file: %m"); + } + + r = do_spawn(t, argv, fd, &pid); + if (r <= 0) + continue; + + if (parallel_execution) { + r = hashmap_put(pids, PID_TO_PTR(pid), t); + if (r < 0) + return log_oom(); + t = NULL; + } else { + r = wait_for_terminate_and_check(t, pid, WAIT_LOG); + if (FLAGS_SET(flags, EXEC_DIR_IGNORE_ERRORS)) { + if (r < 0) + continue; + } else if (r > 0) + return r; + + if (callbacks) { + if (lseek(fd, 0, SEEK_SET) < 0) + return log_error_errno(errno, "Failed to seek on serialization fd: %m"); + + r = callbacks[STDOUT_GENERATE](fd, callback_args[STDOUT_GENERATE]); + fd = -1; + if (r < 0) + return log_error_errno(r, "Failed to process output from %s: %m", *path); + } + } + } + + if (callbacks) { + r = callbacks[STDOUT_COLLECT](output_fd, callback_args[STDOUT_COLLECT]); + if (r < 0) + return log_error_errno(r, "Callback two failed: %m"); + } + + while (!hashmap_isempty(pids)) { + _cleanup_free_ char *t = NULL; + pid_t pid; + + pid = PTR_TO_PID(hashmap_first_key(pids)); + assert(pid > 0); + + t = hashmap_remove(pids, PID_TO_PTR(pid)); + assert(t); + + r = wait_for_terminate_and_check(t, pid, WAIT_LOG); + if (!FLAGS_SET(flags, EXEC_DIR_IGNORE_ERRORS) && r > 0) + return r; + } + + return 0; +} + +int execute_directories( + const char* const* directories, + usec_t timeout, + gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX], + void* const callback_args[_STDOUT_CONSUME_MAX], + char *argv[], + char *envp[], + ExecDirFlags flags) { + + char **dirs = (char**) directories; + _cleanup_close_ int fd = -1; + char *name; + int r; + pid_t executor_pid; + + assert(!strv_isempty(dirs)); + + name = basename(dirs[0]); + assert(!isempty(name)); + + if (callbacks) { + assert(callback_args); + assert(callbacks[STDOUT_GENERATE]); + assert(callbacks[STDOUT_COLLECT]); + assert(callbacks[STDOUT_CONSUME]); + + fd = open_serialization_fd(name); + if (fd < 0) + return log_error_errno(fd, "Failed to open serialization file: %m"); + } + + /* Executes all binaries in the directories serially or in parallel and waits for + * them to finish. Optionally a timeout is applied. If a file with the same name + * exists in more than one directory, the earliest one wins. */ + + r = safe_fork("(sd-executor)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &executor_pid); + if (r < 0) + return r; + if (r == 0) { + r = do_execute(dirs, timeout, callbacks, callback_args, fd, argv, envp, flags); + _exit(r < 0 ? EXIT_FAILURE : r); + } + + r = wait_for_terminate_and_check("(sd-executor)", executor_pid, 0); + if (r < 0) + return r; + if (!FLAGS_SET(flags, EXEC_DIR_IGNORE_ERRORS) && r > 0) + return r; + + if (!callbacks) + return 0; + + if (lseek(fd, 0, SEEK_SET) < 0) + return log_error_errno(errno, "Failed to rewind serialization fd: %m"); + + r = callbacks[STDOUT_CONSUME](fd, callback_args[STDOUT_CONSUME]); + fd = -1; + if (r < 0) + return log_error_errno(r, "Failed to parse returned data: %m"); + return 0; +} + +static int gather_environment_generate(int fd, void *arg) { + char ***env = arg, **x, **y; + _cleanup_fclose_ FILE *f = NULL; + _cleanup_strv_free_ char **new = NULL; + int r; + + /* Read a series of VAR=value assignments from fd, use them to update the list of + * variables in env. Also update the exported environment. + * + * fd is always consumed, even on error. + */ + + assert(env); + + f = fdopen(fd, "r"); + if (!f) { + safe_close(fd); + return -errno; + } + + r = load_env_file_pairs(f, NULL, &new); + if (r < 0) + return r; + + STRV_FOREACH_PAIR(x, y, new) { + char *p; + + if (!env_name_is_valid(*x)) { + log_warning("Invalid variable assignment \"%s=...\", ignoring.", *x); + continue; + } + + p = strjoin(*x, "=", *y); + if (!p) + return -ENOMEM; + + r = strv_env_replace(env, p); + if (r < 0) + return r; + + if (setenv(*x, *y, true) < 0) + return -errno; + } + + return r; +} + +static int gather_environment_collect(int fd, void *arg) { + _cleanup_fclose_ FILE *f = NULL; + char ***env = arg; + int r; + + /* Write out a series of env=cescape(VAR=value) assignments to fd. */ + + assert(env); + + f = fdopen(fd, "w"); + if (!f) { + safe_close(fd); + return -errno; + } + + r = serialize_strv(f, "env", *env); + if (r < 0) + return r; + + r = fflush_and_check(f); + if (r < 0) + return r; + + return 0; +} + +static int gather_environment_consume(int fd, void *arg) { + _cleanup_fclose_ FILE *f = NULL; + char ***env = arg; + int r = 0; + + /* Read a series of env=cescape(VAR=value) assignments from fd into env. */ + + assert(env); + + f = fdopen(fd, "r"); + if (!f) { + safe_close(fd); + return -errno; + } + + for (;;) { + _cleanup_free_ char *line = NULL; + const char *v; + int k; + + k = read_line(f, LONG_LINE_MAX, &line); + if (k < 0) + return k; + if (k == 0) + break; + + v = startswith(line, "env="); + if (!v) { + log_debug("Serialization line \"%s\" unexpectedly didn't start with \"env=\".", line); + if (r == 0) + r = -EINVAL; + + continue; + } + + k = deserialize_environment(v, env); + if (k < 0) { + log_debug_errno(k, "Invalid serialization line \"%s\": %m", line); + + if (r == 0) + r = k; + } + } + + return r; +} + +int exec_command_flags_from_strv(char **ex_opts, ExecCommandFlags *flags) { + ExecCommandFlags ex_flag, ret_flags = 0; + char **opt; + + assert(flags); + + STRV_FOREACH(opt, ex_opts) { + ex_flag = exec_command_flags_from_string(*opt); + if (ex_flag >= 0) + ret_flags |= ex_flag; + else + return -EINVAL; + } + + *flags = ret_flags; + + return 0; +} + +int exec_command_flags_to_strv(ExecCommandFlags flags, char ***ex_opts) { + _cleanup_strv_free_ char **ret_opts = NULL; + ExecCommandFlags it = flags; + const char *str; + int i, r; + + assert(ex_opts); + + for (i = 0; it != 0; it &= ~(1 << i), i++) { + if (FLAGS_SET(flags, (1 << i))) { + str = exec_command_flags_to_string(1 << i); + if (!str) + return -EINVAL; + + r = strv_extend(&ret_opts, str); + if (r < 0) + return r; + } + } + + *ex_opts = TAKE_PTR(ret_opts); + + return 0; +} + +const gather_stdout_callback_t gather_environment[] = { + gather_environment_generate, + gather_environment_collect, + gather_environment_consume, +}; + +static const char* const exec_command_strings[] = { + "ignore-failure", /* EXEC_COMMAND_IGNORE_FAILURE */ + "privileged", /* EXEC_COMMAND_FULLY_PRIVILEGED */ + "no-setuid", /* EXEC_COMMAND_NO_SETUID */ + "ambient", /* EXEC_COMMAND_AMBIENT_MAGIC */ + "no-env-expand", /* EXEC_COMMAND_NO_ENV_EXPAND */ +}; + +const char* exec_command_flags_to_string(ExecCommandFlags i) { + size_t idx; + + for (idx = 0; idx < ELEMENTSOF(exec_command_strings); idx++) + if (i == (1 << idx)) + return exec_command_strings[idx]; + + return NULL; +} + +ExecCommandFlags exec_command_flags_from_string(const char *s) { + ssize_t idx; + + idx = string_table_lookup(exec_command_strings, ELEMENTSOF(exec_command_strings), s); + + if (idx < 0) + return _EXEC_COMMAND_FLAGS_INVALID; + else + return 1 << idx; +} diff --git a/src/shared/exec-util.h b/src/shared/exec-util.h new file mode 100644 index 0000000..a69d57c --- /dev/null +++ b/src/shared/exec-util.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "time-util.h" + +typedef int (*gather_stdout_callback_t) (int fd, void *arg); + +enum { + STDOUT_GENERATE, /* from generators to helper process */ + STDOUT_COLLECT, /* from helper process to main process */ + STDOUT_CONSUME, /* process data in main process */ + _STDOUT_CONSUME_MAX, +}; + +typedef enum { + EXEC_DIR_NONE = 0, /* No execdir flags */ + EXEC_DIR_PARALLEL = 1 << 0, /* Execute scripts in parallel, if possible */ + EXEC_DIR_IGNORE_ERRORS = 1 << 1, /* Ignore non-zero exit status of scripts */ +} ExecDirFlags; + +typedef enum ExecCommandFlags { + EXEC_COMMAND_IGNORE_FAILURE = 1 << 0, + EXEC_COMMAND_FULLY_PRIVILEGED = 1 << 1, + EXEC_COMMAND_NO_SETUID = 1 << 2, + EXEC_COMMAND_AMBIENT_MAGIC = 1 << 3, + EXEC_COMMAND_NO_ENV_EXPAND = 1 << 4, + _EXEC_COMMAND_FLAGS_INVALID = -1, +} ExecCommandFlags; + +int execute_directories( + const char* const* directories, + usec_t timeout, + gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX], + void* const callback_args[_STDOUT_CONSUME_MAX], + char *argv[], + char *envp[], + ExecDirFlags flags); + +int exec_command_flags_from_strv(char **ex_opts, ExecCommandFlags *flags); +int exec_command_flags_to_strv(ExecCommandFlags flags, char ***ex_opts); + +extern const gather_stdout_callback_t gather_environment[_STDOUT_CONSUME_MAX]; + +const char* exec_command_flags_to_string(ExecCommandFlags i); +ExecCommandFlags exec_command_flags_from_string(const char *s); diff --git a/src/shared/exit-status.c b/src/shared/exit-status.c new file mode 100644 index 0000000..b71dd7a --- /dev/null +++ b/src/shared/exit-status.c @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <signal.h> +#include <stdlib.h> +#include <sysexits.h> + +#include "exit-status.h" +#include "macro.h" +#include "parse-util.h" +#include "set.h" +#include "string-util.h" + +const ExitStatusMapping exit_status_mappings[256] = { + /* Exit status ranges: + * + * 0…1 │ ISO C, EXIT_SUCCESS + EXIT_FAILURE + * 2…7 │ LSB exit codes for init scripts + * 8…63 │ (Currently unmapped) + * 64…78 │ BSD defined exit codes + * 79…199 │ (Currently unmapped) + * 200…242 │ systemd's private error codes (might be extended to 254 in future development) + * 243…254 │ (Currently unmapped, but see above) + * + * 255 │ EXIT_EXCEPTION (We use this to propagate exit-by-signal events. It's frequently used by others apps (like bash) + * │ to indicate exit reason that cannot really be expressed in a single exit status value — such as a propagated + * │ signal or such, and we follow that logic here.) + */ + + [EXIT_SUCCESS] = { "SUCCESS", EXIT_STATUS_LIBC }, + [EXIT_FAILURE] = { "FAILURE", EXIT_STATUS_LIBC }, + + [EXIT_CHDIR] = { "CHDIR", EXIT_STATUS_SYSTEMD }, + [EXIT_NICE] = { "NICE", EXIT_STATUS_SYSTEMD }, + [EXIT_FDS] = { "FDS", EXIT_STATUS_SYSTEMD }, + [EXIT_EXEC] = { "EXEC", EXIT_STATUS_SYSTEMD }, + [EXIT_MEMORY] = { "MEMORY", EXIT_STATUS_SYSTEMD }, + [EXIT_LIMITS] = { "LIMITS", EXIT_STATUS_SYSTEMD }, + [EXIT_OOM_ADJUST] = { "OOM_ADJUST", EXIT_STATUS_SYSTEMD }, + [EXIT_SIGNAL_MASK] = { "SIGNAL_MASK", EXIT_STATUS_SYSTEMD }, + [EXIT_STDIN] = { "STDIN", EXIT_STATUS_SYSTEMD }, + [EXIT_STDOUT] = { "STDOUT", EXIT_STATUS_SYSTEMD }, + [EXIT_CHROOT] = { "CHROOT", EXIT_STATUS_SYSTEMD }, + [EXIT_IOPRIO] = { "IOPRIO", EXIT_STATUS_SYSTEMD }, + [EXIT_TIMERSLACK] = { "TIMERSLACK", EXIT_STATUS_SYSTEMD }, + [EXIT_SECUREBITS] = { "SECUREBITS", EXIT_STATUS_SYSTEMD }, + [EXIT_SETSCHEDULER] = { "SETSCHEDULER", EXIT_STATUS_SYSTEMD }, + [EXIT_CPUAFFINITY] = { "CPUAFFINITY", EXIT_STATUS_SYSTEMD }, + [EXIT_GROUP] = { "GROUP", EXIT_STATUS_SYSTEMD }, + [EXIT_USER] = { "USER", EXIT_STATUS_SYSTEMD }, + [EXIT_CAPABILITIES] = { "CAPABILITIES", EXIT_STATUS_SYSTEMD }, + [EXIT_CGROUP] = { "CGROUP", EXIT_STATUS_SYSTEMD }, + [EXIT_SETSID] = { "SETSID", EXIT_STATUS_SYSTEMD }, + [EXIT_CONFIRM] = { "CONFIRM", EXIT_STATUS_SYSTEMD }, + [EXIT_STDERR] = { "STDERR", EXIT_STATUS_SYSTEMD }, + [EXIT_PAM] = { "PAM", EXIT_STATUS_SYSTEMD }, + [EXIT_NETWORK] = { "NETWORK", EXIT_STATUS_SYSTEMD }, + [EXIT_NAMESPACE] = { "NAMESPACE", EXIT_STATUS_SYSTEMD }, + [EXIT_NO_NEW_PRIVILEGES] = { "NO_NEW_PRIVILEGES", EXIT_STATUS_SYSTEMD }, + [EXIT_SECCOMP] = { "SECCOMP", EXIT_STATUS_SYSTEMD }, + [EXIT_SELINUX_CONTEXT] = { "SELINUX_CONTEXT", EXIT_STATUS_SYSTEMD }, + [EXIT_PERSONALITY] = { "PERSONALITY", EXIT_STATUS_SYSTEMD }, + [EXIT_APPARMOR_PROFILE] = { "APPARMOR", EXIT_STATUS_SYSTEMD }, + [EXIT_ADDRESS_FAMILIES] = { "ADDRESS_FAMILIES", EXIT_STATUS_SYSTEMD }, + [EXIT_RUNTIME_DIRECTORY] = { "RUNTIME_DIRECTORY", EXIT_STATUS_SYSTEMD }, + [EXIT_CHOWN] = { "CHOWN", EXIT_STATUS_SYSTEMD }, + [EXIT_SMACK_PROCESS_LABEL] = { "SMACK_PROCESS_LABEL", EXIT_STATUS_SYSTEMD }, + [EXIT_KEYRING] = { "KEYRING", EXIT_STATUS_SYSTEMD }, + [EXIT_STATE_DIRECTORY] = { "STATE_DIRECTORY", EXIT_STATUS_SYSTEMD }, + [EXIT_CACHE_DIRECTORY] = { "CACHE_DIRECTORY", EXIT_STATUS_SYSTEMD }, + [EXIT_LOGS_DIRECTORY] = { "LOGS_DIRECTORY", EXIT_STATUS_SYSTEMD }, + [EXIT_CONFIGURATION_DIRECTORY] = { "CONFIGURATION_DIRECTORY", EXIT_STATUS_SYSTEMD }, + [EXIT_NUMA_POLICY] = { "NUMA_POLICY", EXIT_STATUS_SYSTEMD }, + [EXIT_CREDENTIALS] = { "CREDENTIALS", EXIT_STATUS_SYSTEMD }, + + [EXIT_EXCEPTION] = { "EXCEPTION", EXIT_STATUS_SYSTEMD }, + + [EXIT_INVALIDARGUMENT] = { "INVALIDARGUMENT", EXIT_STATUS_LSB }, + [EXIT_NOTIMPLEMENTED] = { "NOTIMPLEMENTED", EXIT_STATUS_LSB }, + [EXIT_NOPERMISSION] = { "NOPERMISSION", EXIT_STATUS_LSB }, + [EXIT_NOTINSTALLED] = { "NOTINSTALLED", EXIT_STATUS_LSB }, + [EXIT_NOTCONFIGURED] = { "NOTCONFIGURED", EXIT_STATUS_LSB }, + [EXIT_NOTRUNNING] = { "NOTRUNNING", EXIT_STATUS_LSB }, + + [EX_USAGE] = { "USAGE", EXIT_STATUS_BSD }, + [EX_DATAERR] = { "DATAERR", EXIT_STATUS_BSD }, + [EX_NOINPUT] = { "NOINPUT", EXIT_STATUS_BSD }, + [EX_NOUSER] = { "NOUSER", EXIT_STATUS_BSD }, + [EX_NOHOST] = { "NOHOST", EXIT_STATUS_BSD }, + [EX_UNAVAILABLE] = { "UNAVAILABLE", EXIT_STATUS_BSD }, + [EX_SOFTWARE] = { "SOFTWARE", EXIT_STATUS_BSD }, + [EX_OSERR] = { "OSERR", EXIT_STATUS_BSD }, + [EX_OSFILE] = { "OSFILE", EXIT_STATUS_BSD }, + [EX_CANTCREAT] = { "CANTCREAT", EXIT_STATUS_BSD }, + [EX_IOERR] = { "IOERR", EXIT_STATUS_BSD }, + [EX_TEMPFAIL] = { "TEMPFAIL", EXIT_STATUS_BSD }, + [EX_PROTOCOL] = { "PROTOCOL", EXIT_STATUS_BSD }, + [EX_NOPERM] = { "NOPERM", EXIT_STATUS_BSD }, + [EX_CONFIG] = { "CONFIG", EXIT_STATUS_BSD }, +}; + +const char* exit_status_to_string(int code, ExitStatusClass class) { + if (code < 0 || (size_t) code >= ELEMENTSOF(exit_status_mappings)) + return NULL; + return class & exit_status_mappings[code].class ? exit_status_mappings[code].name : NULL; +} + +const char* exit_status_class(int code) { + if (code < 0 || (size_t) code >= ELEMENTSOF(exit_status_mappings)) + return NULL; + + switch (exit_status_mappings[code].class) { + case EXIT_STATUS_LIBC: + return "libc"; + case EXIT_STATUS_SYSTEMD: + return "systemd"; + case EXIT_STATUS_LSB: + return "LSB"; + case EXIT_STATUS_BSD: + return "BSD"; + default: return NULL; + } +} + +int exit_status_from_string(const char *s) { + uint8_t val; + int r; + + for (size_t i = 0; i < ELEMENTSOF(exit_status_mappings); i++) + if (streq_ptr(s, exit_status_mappings[i].name)) + return i; + + r = safe_atou8(s, &val); + if (r < 0) + return r; + + return val; +} + +bool is_clean_exit(int code, int status, ExitClean clean, const ExitStatusSet *success_status) { + if (code == CLD_EXITED) + return status == 0 || + (success_status && + bitmap_isset(&success_status->status, status)); + + /* If a daemon does not implement handlers for some of the signals, we do not consider this an + unclean shutdown */ + if (code == CLD_KILLED) + return + (clean == EXIT_CLEAN_DAEMON && IN_SET(status, SIGHUP, SIGINT, SIGTERM, SIGPIPE)) || + (success_status && + bitmap_isset(&success_status->signal, status)); + + return false; +} + +void exit_status_set_free(ExitStatusSet *x) { + assert(x); + + bitmap_clear(&x->status); + bitmap_clear(&x->signal); +} + +bool exit_status_set_is_empty(const ExitStatusSet *x) { + if (!x) + return true; + + return bitmap_isclear(&x->status) && bitmap_isclear(&x->signal); +} + +bool exit_status_set_test(const ExitStatusSet *x, int code, int status) { + if (code == CLD_EXITED && bitmap_isset(&x->status, status)) + return true; + + if (IN_SET(code, CLD_KILLED, CLD_DUMPED) && bitmap_isset(&x->signal, status)) + return true; + + return false; +} diff --git a/src/shared/exit-status.h b/src/shared/exit-status.h new file mode 100644 index 0000000..05707bf --- /dev/null +++ b/src/shared/exit-status.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "bitmap.h" +#include "hashmap.h" +#include "macro.h" + +/* This defines pretty names for the LSB 'start' verb exit codes. Note that they shouldn't be confused with + * the LSB 'status' verb exit codes which are defined very differently. For details see: + * + * https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html + */ + +enum { + /* EXIT_SUCCESS defined by libc */ + /* EXIT_FAILURE defined by libc */ + EXIT_INVALIDARGUMENT = 2, + EXIT_NOTIMPLEMENTED = 3, + EXIT_NOPERMISSION = 4, + EXIT_NOTINSTALLED = 5, + EXIT_NOTCONFIGURED = 6, + EXIT_NOTRUNNING = 7, + + /* BSD's sysexits.h defines a couple EX_xyz exit codes in the range 64 … 78 */ + + /* The LSB suggests that error codes >= 200 are "reserved". We use them here under the assumption + * that they hence are unused by init scripts. */ + EXIT_CHDIR = 200, + EXIT_NICE, + EXIT_FDS, + EXIT_EXEC, + EXIT_MEMORY, + EXIT_LIMITS, + EXIT_OOM_ADJUST, + EXIT_SIGNAL_MASK, + EXIT_STDIN, + EXIT_STDOUT, + EXIT_CHROOT, /* 210 */ + EXIT_IOPRIO, + EXIT_TIMERSLACK, + EXIT_SECUREBITS, + EXIT_SETSCHEDULER, + EXIT_CPUAFFINITY, + EXIT_GROUP, + EXIT_USER, + EXIT_CAPABILITIES, + EXIT_CGROUP, + EXIT_SETSID, /* 220 */ + EXIT_CONFIRM, + EXIT_STDERR, + _EXIT_RESERVED, /* used to be tcpwrap, don't reuse! */ + EXIT_PAM, + EXIT_NETWORK, + EXIT_NAMESPACE, + EXIT_NO_NEW_PRIVILEGES, + EXIT_SECCOMP, + EXIT_SELINUX_CONTEXT, + EXIT_PERSONALITY, /* 230 */ + EXIT_APPARMOR_PROFILE, + EXIT_ADDRESS_FAMILIES, + EXIT_RUNTIME_DIRECTORY, + _EXIT_RESERVED2, /* used to be used by kdbus, don't reuse */ + EXIT_CHOWN, + EXIT_SMACK_PROCESS_LABEL, + EXIT_KEYRING, + EXIT_STATE_DIRECTORY, + EXIT_CACHE_DIRECTORY, + EXIT_LOGS_DIRECTORY, /* 240 */ + EXIT_CONFIGURATION_DIRECTORY, + EXIT_NUMA_POLICY, + EXIT_CREDENTIALS, + + EXIT_EXCEPTION = 255, /* Whenever we want to propagate an abnormal/signal exit, in line with bash */ +}; + +typedef enum ExitStatusClass { + EXIT_STATUS_LIBC = 1 << 0, /* libc EXIT_STATUS/EXIT_FAILURE */ + EXIT_STATUS_SYSTEMD = 1 << 1, /* systemd's own exit codes */ + EXIT_STATUS_LSB = 1 << 2, /* LSB exit codes */ + EXIT_STATUS_BSD = 1 << 3, /* BSD (EX_xyz) exit codes */ + EXIT_STATUS_FULL = EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD | EXIT_STATUS_LSB | EXIT_STATUS_BSD, +} ExitStatusClass; + +typedef struct ExitStatusSet { + Bitmap status; + Bitmap signal; +} ExitStatusSet; + +const char* exit_status_to_string(int code, ExitStatusClass class) _const_; +const char* exit_status_class(int code) _const_; +int exit_status_from_string(const char *s) _pure_; + +typedef struct ExitStatusMapping { + const char *name; + ExitStatusClass class; +} ExitStatusMapping; + +extern const ExitStatusMapping exit_status_mappings[256]; + +typedef enum ExitClean { + EXIT_CLEAN_DAEMON, + EXIT_CLEAN_COMMAND, +} ExitClean; + +bool is_clean_exit(int code, int status, ExitClean clean, const ExitStatusSet *success_status); + +void exit_status_set_free(ExitStatusSet *x); +bool exit_status_set_is_empty(const ExitStatusSet *x); +bool exit_status_set_test(const ExitStatusSet *x, int code, int status); diff --git a/src/shared/fdset.c b/src/shared/fdset.c new file mode 100644 index 0000000..679e4aa --- /dev/null +++ b/src/shared/fdset.c @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> + +#include "sd-daemon.h" + +#include "alloc-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fdset.h" +#include "log.h" +#include "macro.h" +#include "parse-util.h" +#include "path-util.h" +#include "set.h" + +#define MAKE_SET(s) ((Set*) s) +#define MAKE_FDSET(s) ((FDSet*) s) + +FDSet *fdset_new(void) { + return MAKE_FDSET(set_new(NULL)); +} + +int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds) { + size_t i; + FDSet *s; + int r; + + assert(ret); + + s = fdset_new(); + if (!s) + return -ENOMEM; + + for (i = 0; i < n_fds; i++) { + + r = fdset_put(s, fds[i]); + if (r < 0) { + set_free(MAKE_SET(s)); + return r; + } + } + + *ret = s; + return 0; +} + +void fdset_close(FDSet *s) { + void *p; + + while ((p = set_steal_first(MAKE_SET(s)))) { + /* Valgrind's fd might have ended up in this set here, due to fdset_new_fill(). We'll ignore + * all failures here, so that the EBADFD that valgrind will return us on close() doesn't + * influence us */ + + /* When reloading duplicates of the private bus connection fds and suchlike are closed here, + * which has no effect at all, since they are only duplicates. So don't be surprised about + * these log messages. */ + + log_debug("Closing set fd %i", PTR_TO_FD(p)); + (void) close_nointr(PTR_TO_FD(p)); + } +} + +FDSet* fdset_free(FDSet *s) { + fdset_close(s); + set_free(MAKE_SET(s)); + return NULL; +} + +int fdset_put(FDSet *s, int fd) { + assert(s); + assert(fd >= 0); + + return set_put(MAKE_SET(s), FD_TO_PTR(fd)); +} + +int fdset_put_dup(FDSet *s, int fd) { + int copy, r; + + assert(s); + assert(fd >= 0); + + copy = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (copy < 0) + return -errno; + + r = fdset_put(s, copy); + if (r < 0) { + safe_close(copy); + return r; + } + + return copy; +} + +bool fdset_contains(FDSet *s, int fd) { + assert(s); + assert(fd >= 0); + + return !!set_get(MAKE_SET(s), FD_TO_PTR(fd)); +} + +int fdset_remove(FDSet *s, int fd) { + assert(s); + assert(fd >= 0); + + return set_remove(MAKE_SET(s), FD_TO_PTR(fd)) ? fd : -ENOENT; +} + +int fdset_new_fill(FDSet **_s) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + int r = 0; + FDSet *s; + + assert(_s); + + /* Creates an fdset and fills in all currently open file + * descriptors. */ + + d = opendir("/proc/self/fd"); + if (!d) + return -errno; + + s = fdset_new(); + if (!s) { + r = -ENOMEM; + goto finish; + } + + FOREACH_DIRENT(de, d, return -errno) { + int fd = -1; + + r = safe_atoi(de->d_name, &fd); + if (r < 0) + goto finish; + + if (fd < 3) + continue; + + if (fd == dirfd(d)) + continue; + + r = fdset_put(s, fd); + if (r < 0) + goto finish; + } + + r = 0; + *_s = TAKE_PTR(s); + +finish: + /* We won't close the fds here! */ + if (s) + set_free(MAKE_SET(s)); + + return r; +} + +int fdset_cloexec(FDSet *fds, bool b) { + void *p; + int r; + + assert(fds); + + SET_FOREACH(p, MAKE_SET(fds)) { + r = fd_cloexec(PTR_TO_FD(p), b); + if (r < 0) + return r; + } + + return 0; +} + +int fdset_new_listen_fds(FDSet **_s, bool unset) { + int n, fd, r; + FDSet *s; + + assert(_s); + + /* Creates an fdset and fills in all passed file descriptors */ + + s = fdset_new(); + if (!s) { + r = -ENOMEM; + goto fail; + } + + n = sd_listen_fds(unset); + for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) { + r = fdset_put(s, fd); + if (r < 0) + goto fail; + } + + *_s = s; + return 0; + +fail: + if (s) + set_free(MAKE_SET(s)); + + return r; +} + +int fdset_close_others(FDSet *fds) { + void *e; + int *a = NULL; + size_t j = 0, m; + + m = fdset_size(fds); + + if (m > 0) { + a = newa(int, m); + SET_FOREACH(e, MAKE_SET(fds)) + a[j++] = PTR_TO_FD(e); + } + + assert(j == m); + + return close_all_fds(a, j); +} + +unsigned fdset_size(FDSet *fds) { + return set_size(MAKE_SET(fds)); +} + +bool fdset_isempty(FDSet *fds) { + return set_isempty(MAKE_SET(fds)); +} + +int fdset_iterate(FDSet *s, Iterator *i) { + void *p; + + if (!set_iterate(MAKE_SET(s), i, &p)) + return -ENOENT; + + return PTR_TO_FD(p); +} + +int fdset_steal_first(FDSet *fds) { + void *p; + + p = set_steal_first(MAKE_SET(fds)); + if (!p) + return -ENOENT; + + return PTR_TO_FD(p); +} diff --git a/src/shared/fdset.h b/src/shared/fdset.h new file mode 100644 index 0000000..39d15ee --- /dev/null +++ b/src/shared/fdset.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "hashmap.h" +#include "macro.h" +#include "set.h" + +typedef struct FDSet FDSet; + +FDSet* fdset_new(void); +FDSet* fdset_free(FDSet *s); + +int fdset_put(FDSet *s, int fd); +int fdset_put_dup(FDSet *s, int fd); + +bool fdset_contains(FDSet *s, int fd); +int fdset_remove(FDSet *s, int fd); + +int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds); +int fdset_new_fill(FDSet **ret); +int fdset_new_listen_fds(FDSet **ret, bool unset); + +int fdset_cloexec(FDSet *fds, bool b); + +int fdset_close_others(FDSet *fds); + +unsigned fdset_size(FDSet *fds); +bool fdset_isempty(FDSet *fds); + +int fdset_iterate(FDSet *s, Iterator *i); + +int fdset_steal_first(FDSet *fds); + +void fdset_close(FDSet *fds); + +#define _FDSET_FOREACH(fd, fds, i) \ + for (Iterator i = ITERATOR_FIRST; ((fd) = fdset_iterate((fds), &i)) >= 0; ) +#define FDSET_FOREACH(fd, fds) \ + _FDSET_FOREACH(fd, fds, UNIQ_T(i, UNIQ)) + +DEFINE_TRIVIAL_CLEANUP_FUNC(FDSet*, fdset_free); +#define _cleanup_fdset_free_ _cleanup_(fdset_freep) diff --git a/src/shared/fileio-label.c b/src/shared/fileio-label.c new file mode 100644 index 0000000..d03b054 --- /dev/null +++ b/src/shared/fileio-label.c @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/stat.h> + +#include "fileio-label.h" +#include "fileio.h" +#include "selinux-util.h" + +int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts) { + int r; + + r = mac_selinux_create_file_prepare(fn, S_IFREG); + if (r < 0) + return r; + + r = write_string_file_ts(fn, line, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC, ts); + + mac_selinux_create_file_clear(); + + return r; +} + +int create_shutdown_run_nologin_or_warn(void) { + int r; + + /* This is used twice: once in systemd-user-sessions.service, in order to block logins when we actually go + * down, and once in systemd-logind.service when shutdowns are scheduled, and logins are to be turned off a bit + * in advance. We use the same wording of the message in both cases. */ + + r = write_string_file_atomic_label("/run/nologin", + "System is going down. Unprivileged users are not permitted to log in anymore. " + "For technical details, see pam_nologin(8)."); + if (r < 0) + return log_error_errno(r, "Failed to create /run/nologin: %m"); + + return 0; +} diff --git a/src/shared/fileio-label.h b/src/shared/fileio-label.h new file mode 100644 index 0000000..03b4a16 --- /dev/null +++ b/src/shared/fileio-label.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdio.h> + +/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to + * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not + * for all */ + +int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts); +static inline int write_string_file_atomic_label(const char *fn, const char *line) { + return write_string_file_atomic_label_ts(fn, line, NULL); +} + +int create_shutdown_run_nologin_or_warn(void); diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c new file mode 100644 index 0000000..007d2cb --- /dev/null +++ b/src/shared/firewall-util.c @@ -0,0 +1,350 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* Temporary work-around for broken glibc vs. linux kernel header definitions + * This is already fixed upstream, remove this when distributions have updated. + */ +#define _NET_IF_H 1 + +#include <arpa/inet.h> +#include <endian.h> +#include <errno.h> +#include <stddef.h> +#include <string.h> +#include <net/if.h> +#ifndef IFNAMSIZ +#define IFNAMSIZ 16 +#endif +#include <linux/if.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/nf_nat.h> +#include <linux/netfilter/xt_addrtype.h> +#include <libiptc/libiptc.h> + +#include "alloc-util.h" +#include "firewall-util.h" +#include "in-addr-util.h" +#include "macro.h" +#include "socket-util.h" + +DEFINE_TRIVIAL_CLEANUP_FUNC(struct xtc_handle*, iptc_free); + +static int entry_fill_basics( + struct ipt_entry *entry, + int protocol, + const char *in_interface, + const union in_addr_union *source, + unsigned source_prefixlen, + const char *out_interface, + const union in_addr_union *destination, + unsigned destination_prefixlen) { + + assert(entry); + + if (out_interface && !ifname_valid(out_interface)) + return -EINVAL; + if (in_interface && !ifname_valid(in_interface)) + return -EINVAL; + + entry->ip.proto = protocol; + + if (in_interface) { + size_t l; + + l = strlen(in_interface); + assert(l < sizeof entry->ip.iniface); + assert(l < sizeof entry->ip.iniface_mask); + + strcpy(entry->ip.iniface, in_interface); + memset(entry->ip.iniface_mask, 0xFF, l + 1); + } + if (source) { + entry->ip.src = source->in; + in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen); + } + + if (out_interface) { + size_t l = strlen(out_interface); + assert(l < sizeof entry->ip.outiface); + assert(l < sizeof entry->ip.outiface_mask); + + strcpy(entry->ip.outiface, out_interface); + memset(entry->ip.outiface_mask, 0xFF, l + 1); + } + if (destination) { + entry->ip.dst = destination->in; + in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen); + } + + return 0; +} + +int fw_add_masquerade( + bool add, + int af, + int protocol, + const union in_addr_union *source, + unsigned source_prefixlen, + const char *out_interface, + const union in_addr_union *destination, + unsigned destination_prefixlen) { + + static const xt_chainlabel chain = "POSTROUTING"; + _cleanup_(iptc_freep) struct xtc_handle *h = NULL; + struct ipt_entry *entry, *mask; + struct ipt_entry_target *t; + size_t sz; + struct nf_nat_ipv4_multi_range_compat *mr; + int r; + + if (af != AF_INET) + return -EOPNOTSUPP; + + if (!IN_SET(protocol, 0, IPPROTO_TCP, IPPROTO_UDP)) + return -EOPNOTSUPP; + + h = iptc_init("nat"); + if (!h) + return -errno; + + sz = XT_ALIGN(sizeof(struct ipt_entry)) + + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + + /* Put together the entry we want to add or remove */ + entry = alloca0(sz); + entry->next_offset = sz; + entry->target_offset = XT_ALIGN(sizeof(struct ipt_entry)); + r = entry_fill_basics(entry, protocol, NULL, source, source_prefixlen, out_interface, destination, destination_prefixlen); + if (r < 0) + return r; + + /* Fill in target part */ + t = ipt_get_target(entry); + t->u.target_size = + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + strncpy(t->u.user.name, "MASQUERADE", sizeof(t->u.user.name)); + mr = (struct nf_nat_ipv4_multi_range_compat*) t->data; + mr->rangesize = 1; + + /* Create a search mask entry */ + mask = alloca(sz); + memset(mask, 0xFF, sz); + + if (add) { + if (iptc_check_entry(chain, entry, (unsigned char*) mask, h)) + return 0; + if (errno != ENOENT) /* if other error than not existing yet, fail */ + return -errno; + + if (!iptc_insert_entry(chain, entry, 0, h)) + return -errno; + } else { + if (!iptc_delete_entry(chain, entry, (unsigned char*) mask, h)) { + if (errno == ENOENT) /* if it's already gone, all is good! */ + return 0; + + return -errno; + } + } + + if (!iptc_commit(h)) + return -errno; + + return 0; +} + +int fw_add_local_dnat( + bool add, + int af, + int protocol, + const char *in_interface, + const union in_addr_union *source, + unsigned source_prefixlen, + const union in_addr_union *destination, + unsigned destination_prefixlen, + uint16_t local_port, + const union in_addr_union *remote, + uint16_t remote_port, + const union in_addr_union *previous_remote) { + + static const xt_chainlabel chain_pre = "PREROUTING", chain_output = "OUTPUT"; + _cleanup_(iptc_freep) struct xtc_handle *h = NULL; + struct ipt_entry *entry, *mask; + struct ipt_entry_target *t; + struct ipt_entry_match *m; + struct xt_addrtype_info_v1 *at; + struct nf_nat_ipv4_multi_range_compat *mr; + size_t sz, msz; + int r; + + assert(add || !previous_remote); + + if (af != AF_INET) + return -EOPNOTSUPP; + + if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP)) + return -EOPNOTSUPP; + + if (local_port <= 0) + return -EINVAL; + + if (remote_port <= 0) + return -EINVAL; + + h = iptc_init("nat"); + if (!h) + return -errno; + + sz = XT_ALIGN(sizeof(struct ipt_entry)) + + XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) + + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + + if (protocol == IPPROTO_TCP) + msz = XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_tcp)); + else + msz = XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_udp)); + + sz += msz; + + /* Fill in basic part */ + entry = alloca0(sz); + entry->next_offset = sz; + entry->target_offset = + XT_ALIGN(sizeof(struct ipt_entry)) + + XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) + + msz; + r = entry_fill_basics(entry, protocol, in_interface, source, source_prefixlen, NULL, destination, destination_prefixlen); + if (r < 0) + return r; + + /* Fill in first match */ + m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry))); + m->u.match_size = msz; + if (protocol == IPPROTO_TCP) { + struct xt_tcp *tcp; + + strncpy(m->u.user.name, "tcp", sizeof(m->u.user.name)); + tcp = (struct xt_tcp*) m->data; + tcp->dpts[0] = tcp->dpts[1] = local_port; + tcp->spts[0] = 0; + tcp->spts[1] = 0xFFFF; + + } else { + struct xt_udp *udp; + + strncpy(m->u.user.name, "udp", sizeof(m->u.user.name)); + udp = (struct xt_udp*) m->data; + udp->dpts[0] = udp->dpts[1] = local_port; + udp->spts[0] = 0; + udp->spts[1] = 0xFFFF; + } + + /* Fill in second match */ + m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)) + msz); + m->u.match_size = + XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_addrtype_info_v1)); + strncpy(m->u.user.name, "addrtype", sizeof(m->u.user.name)); + m->u.user.revision = 1; + at = (struct xt_addrtype_info_v1*) m->data; + at->dest = XT_ADDRTYPE_LOCAL; + + /* Fill in target part */ + t = ipt_get_target(entry); + t->u.target_size = + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + strncpy(t->u.user.name, "DNAT", sizeof(t->u.user.name)); + mr = (struct nf_nat_ipv4_multi_range_compat*) t->data; + mr->rangesize = 1; + mr->range[0].flags = NF_NAT_RANGE_PROTO_SPECIFIED|NF_NAT_RANGE_MAP_IPS; + mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr; + if (protocol == IPPROTO_TCP) + mr->range[0].min.tcp.port = mr->range[0].max.tcp.port = htobe16(remote_port); + else + mr->range[0].min.udp.port = mr->range[0].max.udp.port = htobe16(remote_port); + + mask = alloca0(sz); + memset(mask, 0xFF, sz); + + if (add) { + /* Add the PREROUTING rule, if it is missing so far */ + if (!iptc_check_entry(chain_pre, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -EINVAL; + + if (!iptc_insert_entry(chain_pre, entry, 0, h)) + return -errno; + } + + /* If a previous remote is set, remove its entry */ + if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) { + mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr; + + if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + + mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr; + } + + /* Add the OUTPUT rule, if it is missing so far */ + if (!in_interface) { + + /* Don't apply onto loopback addresses */ + if (!destination) { + entry->ip.dst.s_addr = htobe32(0x7F000000); + entry->ip.dmsk.s_addr = htobe32(0xFF000000); + entry->ip.invflags = IPT_INV_DSTIP; + } + + if (!iptc_check_entry(chain_output, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + + if (!iptc_insert_entry(chain_output, entry, 0, h)) + return -errno; + } + + /* If a previous remote is set, remove its entry */ + if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) { + mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr; + + if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + } + } + } else { + if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + + if (!in_interface) { + if (!destination) { + entry->ip.dst.s_addr = htobe32(0x7F000000); + entry->ip.dmsk.s_addr = htobe32(0xFF000000); + entry->ip.invflags = IPT_INV_DSTIP; + } + + if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + } + } + + if (!iptc_commit(h)) + return -errno; + + return 0; +} diff --git a/src/shared/firewall-util.h b/src/shared/firewall-util.h new file mode 100644 index 0000000..0a51a3c --- /dev/null +++ b/src/shared/firewall-util.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include "in-addr-util.h" + +#if HAVE_LIBIPTC + +int fw_add_masquerade( + bool add, + int af, + int protocol, + const union in_addr_union *source, + unsigned source_prefixlen, + const char *out_interface, + const union in_addr_union *destination, + unsigned destination_prefixlen); + +int fw_add_local_dnat( + bool add, + int af, + int protocol, + const char *in_interface, + const union in_addr_union *source, + unsigned source_prefixlen, + const union in_addr_union *destination, + unsigned destination_prefixlen, + uint16_t local_port, + const union in_addr_union *remote, + uint16_t remote_port, + const union in_addr_union *previous_remote); + +#else + +static inline int fw_add_masquerade( + bool add, + int af, + int protocol, + const union in_addr_union *source, + unsigned source_prefixlen, + const char *out_interface, + const union in_addr_union *destination, + unsigned destination_prefixlen) { + return -EOPNOTSUPP; +} + +static inline int fw_add_local_dnat( + bool add, + int af, + int protocol, + const char *in_interface, + const union in_addr_union *source, + unsigned source_prefixlen, + const union in_addr_union *destination, + unsigned destination_prefixlen, + uint16_t local_port, + const union in_addr_union *remote, + uint16_t remote_port, + const union in_addr_union *previous_remote) { + return -EOPNOTSUPP; +} + +#endif diff --git a/src/shared/format-table.c b/src/shared/format-table.c new file mode 100644 index 0000000..a13a198 --- /dev/null +++ b/src/shared/format-table.c @@ -0,0 +1,2549 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <ctype.h> +#include <net/if.h> +#include <unistd.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-table.h" +#include "format-util.h" +#include "gunicode.h" +#include "id128-util.h" +#include "in-addr-util.h" +#include "locale-util.h" +#include "memory-util.h" +#include "pager.h" +#include "parse-util.h" +#include "path-util.h" +#include "pretty-print.h" +#include "sort-util.h" +#include "string-util.h" +#include "strxcpyx.h" +#include "terminal-util.h" +#include "time-util.h" +#include "utf8.h" +#include "util.h" + +#define DEFAULT_WEIGHT 100 + +/* + A few notes on implementation details: + + - TableCell is a 'fake' structure, it's just used as data type to pass references to specific cell positions in the + table. It can be easily converted to an index number and back. + + - TableData is where the actual data is stored: it encapsulates the data and formatting for a specific cell. It's + 'pseudo-immutable' and ref-counted. When a cell's data's formatting is to be changed, we duplicate the object if the + ref-counting is larger than 1. Note that TableData and its ref-counting is mostly not visible to the outside. The + outside only sees Table and TableCell. + + - The Table object stores a simple one-dimensional array of references to TableData objects, one row after the + previous one. + + - There's no special concept of a "row" or "column" in the table, and no special concept of the "header" row. It's all + derived from the cell index: we know how many cells are to be stored in a row, and can determine the rest from + that. The first row is always the header row. If header display is turned off we simply skip outputting the first + row. Also, when sorting rows we always leave the first row where it is, as the header shouldn't move. + + - Note because there's no row and no column object some properties that might be appropriate as row/column properties + are exposed as cell properties instead. For example, the "weight" of a column (which is used to determine where to + add/remove space preferable when expanding/compressing tables horizontally) is actually made the "weight" of a + cell. Given that we usually need it per-column though we will calculate the average across every cell of the column + instead. + + - To make things easy, when cells are added without any explicit configured formatting, then we'll copy the formatting + from the same cell in the previous cell. This is particularly useful for the "weight" of the cell (see above), as + this means setting the weight of the cells of the header row will nicely propagate to all cells in the other rows. +*/ + +typedef struct TableData { + unsigned n_ref; + TableDataType type; + + size_t minimum_width; /* minimum width for the column */ + size_t maximum_width; /* maximum width for the column */ + size_t formatted_for_width; /* the width we tried to format for */ + unsigned weight; /* the horizontal weight for this column, in case the table is expanded/compressed */ + unsigned ellipsize_percent; /* 0 … 100, where to place the ellipsis when compression is needed */ + unsigned align_percent; /* 0 … 100, where to pad with spaces when expanding is needed. 0: left-aligned, 100: right-aligned */ + + bool uppercase; /* Uppercase string on display */ + + const char *color; /* ANSI color string to use for this cell. When written to terminal should not move cursor. Will automatically be reset after the cell */ + const char *rgap_color; /* The ANSI color to use for the gap right of this cell. Usually used to underline entire rows in a gapless fashion */ + char *url; /* A URL to use for a clickable hyperlink */ + char *formatted; /* A cached textual representation of the cell data, before ellipsation/alignment */ + + union { + uint8_t data[0]; /* data is generic array */ + bool boolean; + usec_t timestamp; + usec_t timespan; + uint64_t size; + char string[0]; + char **strv; + int int_val; + int8_t int8; + int16_t int16; + int32_t int32; + int64_t int64; + unsigned uint_val; + uint8_t uint8; + uint16_t uint16; + uint32_t uint32; + uint64_t uint64; + int percent; /* we use 'int' as datatype for percent values in order to match the result of parse_percent() */ + int ifindex; + union in_addr_union address; + sd_id128_t id128; + /* … add more here as we start supporting more cell data types … */ + }; +} TableData; + +static size_t TABLE_CELL_TO_INDEX(TableCell *cell) { + size_t i; + + assert(cell); + + i = PTR_TO_SIZE(cell); + assert(i > 0); + + return i-1; +} + +static TableCell* TABLE_INDEX_TO_CELL(size_t index) { + assert(index != (size_t) -1); + return SIZE_TO_PTR(index + 1); +} + +struct Table { + size_t n_columns; + size_t n_cells; + + bool header; /* Whether to show the header row? */ + size_t width; /* If == 0 format this as wide as necessary. If (size_t) -1 format this to console + * width or less wide, but not wider. Otherwise the width to format this table in. */ + size_t cell_height_max; /* Maximum number of lines per cell. (If there are more, ellipsis is shown. If (size_t) -1 then no limit is set, the default. == 0 is not allowed.) */ + + TableData **data; + size_t n_allocated; + + size_t *display_map; /* List of columns to show (by their index). It's fine if columns are listed multiple times or not at all */ + size_t n_display_map; + + size_t *sort_map; /* The columns to order rows by, in order of preference. */ + size_t n_sort_map; + + bool *reverse_map; + + char *empty_string; +}; + +Table *table_new_raw(size_t n_columns) { + _cleanup_(table_unrefp) Table *t = NULL; + + assert(n_columns > 0); + + t = new(Table, 1); + if (!t) + return NULL; + + *t = (struct Table) { + .n_columns = n_columns, + .header = true, + .width = (size_t) -1, + .cell_height_max = (size_t) -1, + }; + + return TAKE_PTR(t); +} + +Table *table_new_internal(const char *first_header, ...) { + _cleanup_(table_unrefp) Table *t = NULL; + size_t n_columns = 1; + va_list ap; + int r; + + assert(first_header); + + va_start(ap, first_header); + for (;;) { + if (!va_arg(ap, const char*)) + break; + + n_columns++; + } + va_end(ap); + + t = table_new_raw(n_columns); + if (!t) + return NULL; + + va_start(ap, first_header); + for (const char *h = first_header; h; h = va_arg(ap, const char*)) { + TableCell *cell; + + r = table_add_cell(t, &cell, TABLE_STRING, h); + if (r < 0) { + va_end(ap); + return NULL; + } + + /* Make the table header uppercase */ + r = table_set_uppercase(t, cell, true); + if (r < 0) { + va_end(ap); + return NULL; + } + } + va_end(ap); + + assert(t->n_columns == t->n_cells); + return TAKE_PTR(t); +} + +static TableData *table_data_free(TableData *d) { + assert(d); + + free(d->formatted); + free(d->url); + + if (IN_SET(d->type, TABLE_STRV, TABLE_STRV_WRAPPED)) + strv_free(d->strv); + + return mfree(d); +} + +DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(TableData, table_data, table_data_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(TableData*, table_data_unref); + +Table *table_unref(Table *t) { + if (!t) + return NULL; + + for (size_t i = 0; i < t->n_cells; i++) + table_data_unref(t->data[i]); + + free(t->data); + free(t->display_map); + free(t->sort_map); + free(t->reverse_map); + free(t->empty_string); + + return mfree(t); +} + +static size_t table_data_size(TableDataType type, const void *data) { + + switch (type) { + + case TABLE_EMPTY: + return 0; + + case TABLE_STRING: + case TABLE_PATH: + return strlen(data) + 1; + + case TABLE_STRV: + case TABLE_STRV_WRAPPED: + return sizeof(char **); + + case TABLE_BOOLEAN: + return sizeof(bool); + + case TABLE_TIMESTAMP: + case TABLE_TIMESTAMP_UTC: + case TABLE_TIMESTAMP_RELATIVE: + case TABLE_TIMESPAN: + case TABLE_TIMESPAN_MSEC: + return sizeof(usec_t); + + case TABLE_SIZE: + case TABLE_INT64: + case TABLE_UINT64: + case TABLE_BPS: + return sizeof(uint64_t); + + case TABLE_INT32: + case TABLE_UINT32: + return sizeof(uint32_t); + + case TABLE_INT16: + case TABLE_UINT16: + return sizeof(uint16_t); + + case TABLE_INT8: + case TABLE_UINT8: + return sizeof(uint8_t); + + case TABLE_INT: + case TABLE_UINT: + case TABLE_PERCENT: + case TABLE_IFINDEX: + return sizeof(int); + + case TABLE_IN_ADDR: + return sizeof(struct in_addr); + + case TABLE_IN6_ADDR: + return sizeof(struct in6_addr); + + case TABLE_UUID: + case TABLE_ID128: + return sizeof(sd_id128_t); + + default: + assert_not_reached("Uh? Unexpected cell type"); + } +} + +static bool table_data_matches( + TableData *d, + TableDataType type, + const void *data, + size_t minimum_width, + size_t maximum_width, + unsigned weight, + unsigned align_percent, + unsigned ellipsize_percent) { + + size_t k, l; + assert(d); + + if (d->type != type) + return false; + + if (d->minimum_width != minimum_width) + return false; + + if (d->maximum_width != maximum_width) + return false; + + if (d->weight != weight) + return false; + + if (d->align_percent != align_percent) + return false; + + if (d->ellipsize_percent != ellipsize_percent) + return false; + + /* If a color/url/uppercase flag is set, refuse to merge */ + if (d->color || d->rgap_color) + return false; + if (d->url) + return false; + if (d->uppercase) + return false; + + k = table_data_size(type, data); + l = table_data_size(d->type, d->data); + if (k != l) + return false; + + return memcmp_safe(data, d->data, l) == 0; +} + +static TableData *table_data_new( + TableDataType type, + const void *data, + size_t minimum_width, + size_t maximum_width, + unsigned weight, + unsigned align_percent, + unsigned ellipsize_percent) { + + _cleanup_free_ TableData *d = NULL; + size_t data_size; + + data_size = table_data_size(type, data); + + d = malloc0(offsetof(TableData, data) + data_size); + if (!d) + return NULL; + + d->n_ref = 1; + d->type = type; + d->minimum_width = minimum_width; + d->maximum_width = maximum_width; + d->weight = weight; + d->align_percent = align_percent; + d->ellipsize_percent = ellipsize_percent; + + if (IN_SET(type, TABLE_STRV, TABLE_STRV_WRAPPED)) { + d->strv = strv_copy(data); + if (!d->strv) + return NULL; + } else + memcpy_safe(d->data, data, data_size); + + return TAKE_PTR(d); +} + +int table_add_cell_full( + Table *t, + TableCell **ret_cell, + TableDataType type, + const void *data, + size_t minimum_width, + size_t maximum_width, + unsigned weight, + unsigned align_percent, + unsigned ellipsize_percent) { + + _cleanup_(table_data_unrefp) TableData *d = NULL; + TableData *p; + + assert(t); + assert(type >= 0); + assert(type < _TABLE_DATA_TYPE_MAX); + + /* Special rule: patch NULL data fields to the empty field */ + if (!data) + type = TABLE_EMPTY; + + /* Determine the cell adjacent to the current one, but one row up */ + if (t->n_cells >= t->n_columns) + assert_se(p = t->data[t->n_cells - t->n_columns]); + else + p = NULL; + + /* If formatting parameters are left unspecified, copy from the previous row */ + if (minimum_width == (size_t) -1) + minimum_width = p ? p->minimum_width : 1; + + if (weight == (unsigned) -1) + weight = p ? p->weight : DEFAULT_WEIGHT; + + if (align_percent == (unsigned) -1) + align_percent = p ? p->align_percent : 0; + + if (ellipsize_percent == (unsigned) -1) + ellipsize_percent = p ? p->ellipsize_percent : 100; + + assert(align_percent <= 100); + assert(ellipsize_percent <= 100); + + /* Small optimization: Pretty often adjacent cells in two subsequent lines have the same data and + * formatting. Let's see if we can reuse the cell data and ref it once more. */ + + if (p && table_data_matches(p, type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent)) + d = table_data_ref(p); + else { + d = table_data_new(type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent); + if (!d) + return -ENOMEM; + } + + if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns))) + return -ENOMEM; + + if (ret_cell) + *ret_cell = TABLE_INDEX_TO_CELL(t->n_cells); + + t->data[t->n_cells++] = TAKE_PTR(d); + + return 0; +} + +int table_add_cell_stringf(Table *t, TableCell **ret_cell, const char *format, ...) { + _cleanup_free_ char *buffer = NULL; + va_list ap; + int r; + + va_start(ap, format); + r = vasprintf(&buffer, format, ap); + va_end(ap); + if (r < 0) + return -ENOMEM; + + return table_add_cell(t, ret_cell, TABLE_STRING, buffer); +} + +int table_fill_empty(Table *t, size_t until_column) { + int r; + + assert(t); + + /* Fill the rest of the current line with empty cells until we reach the specified column. Will add + * at least one cell. Pass 0 in order to fill a line to the end or insert an empty line. */ + + if (until_column >= t->n_columns) + return -EINVAL; + + do { + r = table_add_cell(t, NULL, TABLE_EMPTY, NULL); + if (r < 0) + return r; + + } while ((t->n_cells % t->n_columns) != until_column); + + return 0; +} + +int table_dup_cell(Table *t, TableCell *cell) { + size_t i; + + assert(t); + + /* Add the data of the specified cell a second time as a new cell to the end. */ + + i = TABLE_CELL_TO_INDEX(cell); + if (i >= t->n_cells) + return -ENXIO; + + if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns))) + return -ENOMEM; + + t->data[t->n_cells++] = table_data_ref(t->data[i]); + return 0; +} + +static int table_dedup_cell(Table *t, TableCell *cell) { + _cleanup_free_ char *curl = NULL; + TableData *nd, *od; + size_t i; + + assert(t); + + /* Helper call that ensures the specified cell's data object has a ref count of 1, which we can use before + * changing a cell's formatting without effecting every other cell's formatting that shares the same data */ + + i = TABLE_CELL_TO_INDEX(cell); + if (i >= t->n_cells) + return -ENXIO; + + assert_se(od = t->data[i]); + if (od->n_ref == 1) + return 0; + + assert(od->n_ref > 1); + + if (od->url) { + curl = strdup(od->url); + if (!curl) + return -ENOMEM; + } + + nd = table_data_new( + od->type, + od->data, + od->minimum_width, + od->maximum_width, + od->weight, + od->align_percent, + od->ellipsize_percent); + if (!nd) + return -ENOMEM; + + nd->color = od->color; + nd->rgap_color = od->rgap_color; + nd->url = TAKE_PTR(curl); + nd->uppercase = od->uppercase; + + table_data_unref(od); + t->data[i] = nd; + + assert(nd->n_ref == 1); + + return 1; +} + +static TableData *table_get_data(Table *t, TableCell *cell) { + size_t i; + + assert(t); + assert(cell); + + /* Get the data object of the specified cell, or NULL if it doesn't exist */ + + i = TABLE_CELL_TO_INDEX(cell); + if (i >= t->n_cells) + return NULL; + + assert(t->data[i]); + assert(t->data[i]->n_ref > 0); + + return t->data[i]; +} + +int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width) { + int r; + + assert(t); + assert(cell); + + if (minimum_width == (size_t) -1) + minimum_width = 1; + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->minimum_width = minimum_width; + return 0; +} + +int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width) { + int r; + + assert(t); + assert(cell); + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->maximum_width = maximum_width; + return 0; +} + +int table_set_weight(Table *t, TableCell *cell, unsigned weight) { + int r; + + assert(t); + assert(cell); + + if (weight == (unsigned) -1) + weight = DEFAULT_WEIGHT; + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->weight = weight; + return 0; +} + +int table_set_align_percent(Table *t, TableCell *cell, unsigned percent) { + int r; + + assert(t); + assert(cell); + + if (percent == (unsigned) -1) + percent = 0; + + assert(percent <= 100); + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->align_percent = percent; + return 0; +} + +int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent) { + int r; + + assert(t); + assert(cell); + + if (percent == (unsigned) -1) + percent = 100; + + assert(percent <= 100); + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->ellipsize_percent = percent; + return 0; +} + +int table_set_color(Table *t, TableCell *cell, const char *color) { + int r; + + assert(t); + assert(cell); + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->color = empty_to_null(color); + return 0; +} + +int table_set_rgap_color(Table *t, TableCell *cell, const char *color) { + int r; + + assert(t); + assert(cell); + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->rgap_color = empty_to_null(color); + return 0; +} + +int table_set_url(Table *t, TableCell *cell, const char *url) { + _cleanup_free_ char *copy = NULL; + int r; + + assert(t); + assert(cell); + + if (url) { + copy = strdup(url); + if (!copy) + return -ENOMEM; + } + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + return free_and_replace(table_get_data(t, cell)->url, copy); +} + +int table_set_uppercase(Table *t, TableCell *cell, bool b) { + TableData *d; + int r; + + assert(t); + assert(cell); + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + assert_se(d = table_get_data(t, cell)); + + if (d->uppercase == b) + return 0; + + d->formatted = mfree(d->formatted); + d->uppercase = b; + return 1; +} + +int table_update(Table *t, TableCell *cell, TableDataType type, const void *data) { + _cleanup_free_ char *curl = NULL; + TableData *nd, *od; + size_t i; + + assert(t); + assert(cell); + + i = TABLE_CELL_TO_INDEX(cell); + if (i >= t->n_cells) + return -ENXIO; + + assert_se(od = t->data[i]); + + if (od->url) { + curl = strdup(od->url); + if (!curl) + return -ENOMEM; + } + + nd = table_data_new( + type, + data, + od->minimum_width, + od->maximum_width, + od->weight, + od->align_percent, + od->ellipsize_percent); + if (!nd) + return -ENOMEM; + + nd->color = od->color; + nd->rgap_color = od->rgap_color; + nd->url = TAKE_PTR(curl); + nd->uppercase = od->uppercase; + + table_data_unref(od); + t->data[i] = nd; + + return 0; +} + +int table_add_many_internal(Table *t, TableDataType first_type, ...) { + TableDataType type; + va_list ap; + TableCell *last_cell = NULL; + int r; + + assert(t); + assert(first_type >= 0); + assert(first_type < _TABLE_DATA_TYPE_MAX); + + type = first_type; + + va_start(ap, first_type); + for (;;) { + const void *data; + union { + uint64_t size; + usec_t usec; + int int_val; + int8_t int8; + int16_t int16; + int32_t int32; + int64_t int64; + unsigned uint_val; + uint8_t uint8; + uint16_t uint16; + uint32_t uint32; + uint64_t uint64; + int percent; + int ifindex; + bool b; + union in_addr_union address; + sd_id128_t id128; + } buffer; + + switch (type) { + + case TABLE_EMPTY: + data = NULL; + break; + + case TABLE_STRING: + case TABLE_PATH: + data = va_arg(ap, const char *); + break; + + case TABLE_STRV: + case TABLE_STRV_WRAPPED: + data = va_arg(ap, char * const *); + break; + + case TABLE_BOOLEAN: + buffer.b = va_arg(ap, int); + data = &buffer.b; + break; + + case TABLE_TIMESTAMP: + case TABLE_TIMESTAMP_UTC: + case TABLE_TIMESTAMP_RELATIVE: + case TABLE_TIMESPAN: + case TABLE_TIMESPAN_MSEC: + buffer.usec = va_arg(ap, usec_t); + data = &buffer.usec; + break; + + case TABLE_SIZE: + case TABLE_BPS: + buffer.size = va_arg(ap, uint64_t); + data = &buffer.size; + break; + + case TABLE_INT: + buffer.int_val = va_arg(ap, int); + data = &buffer.int_val; + break; + + case TABLE_INT8: { + int x = va_arg(ap, int); + assert(x >= INT8_MIN && x <= INT8_MAX); + + buffer.int8 = x; + data = &buffer.int8; + break; + } + + case TABLE_INT16: { + int x = va_arg(ap, int); + assert(x >= INT16_MIN && x <= INT16_MAX); + + buffer.int16 = x; + data = &buffer.int16; + break; + } + + case TABLE_INT32: + buffer.int32 = va_arg(ap, int32_t); + data = &buffer.int32; + break; + + case TABLE_INT64: + buffer.int64 = va_arg(ap, int64_t); + data = &buffer.int64; + break; + + case TABLE_UINT: + buffer.uint_val = va_arg(ap, unsigned); + data = &buffer.uint_val; + break; + + case TABLE_UINT8: { + unsigned x = va_arg(ap, unsigned); + assert(x <= UINT8_MAX); + + buffer.uint8 = x; + data = &buffer.uint8; + break; + } + + case TABLE_UINT16: { + unsigned x = va_arg(ap, unsigned); + assert(x <= UINT16_MAX); + + buffer.uint16 = x; + data = &buffer.uint16; + break; + } + + case TABLE_UINT32: + buffer.uint32 = va_arg(ap, uint32_t); + data = &buffer.uint32; + break; + + case TABLE_UINT64: + buffer.uint64 = va_arg(ap, uint64_t); + data = &buffer.uint64; + break; + + case TABLE_PERCENT: + buffer.percent = va_arg(ap, int); + data = &buffer.percent; + break; + + case TABLE_IFINDEX: + buffer.ifindex = va_arg(ap, int); + data = &buffer.ifindex; + break; + + case TABLE_IN_ADDR: + buffer.address = *va_arg(ap, union in_addr_union *); + data = &buffer.address.in; + break; + + case TABLE_IN6_ADDR: + buffer.address = *va_arg(ap, union in_addr_union *); + data = &buffer.address.in6; + break; + + case TABLE_UUID: + case TABLE_ID128: + buffer.id128 = va_arg(ap, sd_id128_t); + data = &buffer.id128; + break; + + case TABLE_SET_MINIMUM_WIDTH: { + size_t w = va_arg(ap, size_t); + + r = table_set_minimum_width(t, last_cell, w); + break; + } + + case TABLE_SET_MAXIMUM_WIDTH: { + size_t w = va_arg(ap, size_t); + r = table_set_maximum_width(t, last_cell, w); + break; + } + + case TABLE_SET_WEIGHT: { + unsigned w = va_arg(ap, unsigned); + r = table_set_weight(t, last_cell, w); + break; + } + + case TABLE_SET_ALIGN_PERCENT: { + unsigned p = va_arg(ap, unsigned); + r = table_set_align_percent(t, last_cell, p); + break; + } + + case TABLE_SET_ELLIPSIZE_PERCENT: { + unsigned p = va_arg(ap, unsigned); + r = table_set_ellipsize_percent(t, last_cell, p); + break; + } + + case TABLE_SET_COLOR: { + const char *c = va_arg(ap, const char*); + r = table_set_color(t, last_cell, c); + break; + } + + case TABLE_SET_RGAP_COLOR: { + const char *c = va_arg(ap, const char*); + r = table_set_rgap_color(t, last_cell, c); + break; + } + + case TABLE_SET_BOTH_COLORS: { + const char *c = va_arg(ap, const char*); + + r = table_set_color(t, last_cell, c); + if (r < 0) { + va_end(ap); + return r; + } + + r = table_set_rgap_color(t, last_cell, c); + break; + } + + case TABLE_SET_URL: { + const char *u = va_arg(ap, const char*); + r = table_set_url(t, last_cell, u); + break; + } + + case TABLE_SET_UPPERCASE: { + int u = va_arg(ap, int); + r = table_set_uppercase(t, last_cell, u); + break; + } + + case _TABLE_DATA_TYPE_MAX: + /* Used as end marker */ + va_end(ap); + return 0; + + default: + assert_not_reached("Uh? Unexpected data type."); + } + + if (type < _TABLE_DATA_TYPE_MAX) + r = table_add_cell(t, &last_cell, type, data); + + if (r < 0) { + va_end(ap); + return r; + } + + type = va_arg(ap, TableDataType); + } +} + +void table_set_header(Table *t, bool b) { + assert(t); + + t->header = b; +} + +void table_set_width(Table *t, size_t width) { + assert(t); + + t->width = width; +} + +void table_set_cell_height_max(Table *t, size_t height) { + assert(t); + assert(height >= 1 || height == (size_t) -1); + + t->cell_height_max = height; +} + +int table_set_empty_string(Table *t, const char *empty) { + assert(t); + + return free_and_strdup(&t->empty_string, empty); +} + +int table_set_display_all(Table *t) { + assert(t); + + size_t allocated = t->n_display_map; + + if (!GREEDY_REALLOC(t->display_map, allocated, MAX(t->n_columns, allocated))) + return -ENOMEM; + + for (size_t i = 0; i < t->n_columns; i++) + t->display_map[i] = i; + + t->n_display_map = t->n_columns; + + return 0; +} + +int table_set_display(Table *t, size_t first_column, ...) { + size_t allocated, column; + va_list ap; + + assert(t); + + allocated = t->n_display_map; + column = first_column; + + va_start(ap, first_column); + for (;;) { + assert(column < t->n_columns); + + if (!GREEDY_REALLOC(t->display_map, allocated, MAX(t->n_columns, t->n_display_map+1))) { + va_end(ap); + return -ENOMEM; + } + + t->display_map[t->n_display_map++] = column; + + column = va_arg(ap, size_t); + if (column == (size_t) -1) + break; + + } + va_end(ap); + + return 0; +} + +int table_set_sort(Table *t, size_t first_column, ...) { + size_t allocated, column; + va_list ap; + + assert(t); + + allocated = t->n_sort_map; + column = first_column; + + va_start(ap, first_column); + for (;;) { + assert(column < t->n_columns); + + if (!GREEDY_REALLOC(t->sort_map, allocated, MAX(t->n_columns, t->n_sort_map+1))) { + va_end(ap); + return -ENOMEM; + } + + t->sort_map[t->n_sort_map++] = column; + + column = va_arg(ap, size_t); + if (column == (size_t) -1) + break; + } + va_end(ap); + + return 0; +} + +int table_hide_column_from_display(Table *t, size_t column) { + int r; + + assert(t); + assert(column < t->n_columns); + + /* If the display map is empty, initialize it with all available columns */ + if (!t->display_map) { + r = table_set_display_all(t); + if (r < 0) + return r; + } + + size_t allocated = t->n_display_map, cur = 0; + + for (size_t i = 0; i < allocated; i++) { + if (t->display_map[i] == column) + continue; + + t->display_map[cur++] = t->display_map[i]; + } + + t->n_display_map = cur; + + return 0; +} + +static int cell_data_compare(TableData *a, size_t index_a, TableData *b, size_t index_b) { + assert(a); + assert(b); + + if (a->type == b->type) { + + /* We only define ordering for cells of the same data type. If cells with different data types are + * compared we follow the order the cells were originally added in */ + + switch (a->type) { + + case TABLE_STRING: + return strcmp(a->string, b->string); + + case TABLE_PATH: + return path_compare(a->string, b->string); + + case TABLE_STRV: + case TABLE_STRV_WRAPPED: + return strv_compare(a->strv, b->strv); + + case TABLE_BOOLEAN: + if (!a->boolean && b->boolean) + return -1; + if (a->boolean && !b->boolean) + return 1; + return 0; + + case TABLE_TIMESTAMP: + case TABLE_TIMESTAMP_UTC: + case TABLE_TIMESTAMP_RELATIVE: + return CMP(a->timestamp, b->timestamp); + + case TABLE_TIMESPAN: + case TABLE_TIMESPAN_MSEC: + return CMP(a->timespan, b->timespan); + + case TABLE_SIZE: + case TABLE_BPS: + return CMP(a->size, b->size); + + case TABLE_INT: + return CMP(a->int_val, b->int_val); + + case TABLE_INT8: + return CMP(a->int8, b->int8); + + case TABLE_INT16: + return CMP(a->int16, b->int16); + + case TABLE_INT32: + return CMP(a->int32, b->int32); + + case TABLE_INT64: + return CMP(a->int64, b->int64); + + case TABLE_UINT: + return CMP(a->uint_val, b->uint_val); + + case TABLE_UINT8: + return CMP(a->uint8, b->uint8); + + case TABLE_UINT16: + return CMP(a->uint16, b->uint16); + + case TABLE_UINT32: + return CMP(a->uint32, b->uint32); + + case TABLE_UINT64: + return CMP(a->uint64, b->uint64); + + case TABLE_PERCENT: + return CMP(a->percent, b->percent); + + case TABLE_IFINDEX: + return CMP(a->ifindex, b->ifindex); + + case TABLE_IN_ADDR: + return CMP(a->address.in.s_addr, b->address.in.s_addr); + + case TABLE_IN6_ADDR: + return memcmp(&a->address.in6, &b->address.in6, FAMILY_ADDRESS_SIZE(AF_INET6)); + + case TABLE_UUID: + case TABLE_ID128: + return memcmp(&a->id128, &b->id128, sizeof(sd_id128_t)); + + default: + ; + } + } + + /* Generic fallback using the original order in which the cells where added. */ + return CMP(index_a, index_b); +} + +static int table_data_compare(const size_t *a, const size_t *b, Table *t) { + int r; + + assert(t); + assert(t->sort_map); + + /* Make sure the header stays at the beginning */ + if (*a < t->n_columns && *b < t->n_columns) + return 0; + if (*a < t->n_columns) + return -1; + if (*b < t->n_columns) + return 1; + + /* Order other lines by the sorting map */ + for (size_t i = 0; i < t->n_sort_map; i++) { + TableData *d, *dd; + + d = t->data[*a + t->sort_map[i]]; + dd = t->data[*b + t->sort_map[i]]; + + r = cell_data_compare(d, *a, dd, *b); + if (r != 0) + return t->reverse_map && t->reverse_map[t->sort_map[i]] ? -r : r; + } + + /* Order identical lines by the order there were originally added in */ + return CMP(*a, *b); +} + +static char* format_strv_width(char **strv, size_t column_width) { + _cleanup_fclose_ FILE *f = NULL; + size_t sz = 0; + _cleanup_free_ char *buf = NULL; + + f = open_memstream_unlocked(&buf, &sz); + if (!f) + return NULL; + + size_t position = 0; + char **p; + STRV_FOREACH(p, strv) { + size_t our_len = utf8_console_width(*p); /* This returns -1 on invalid utf-8 (which shouldn't happen). + * If that happens, we'll just print one item per line. */ + + if (position == 0) { + fputs(*p, f); + position = our_len; + } else if (size_add(size_add(position, 1), our_len) <= column_width) { + fprintf(f, " %s", *p); + position = size_add(size_add(position, 1), our_len); + } else { + fprintf(f, "\n%s", *p); + position = our_len; + } + } + + if (fflush_and_check(f) < 0) + return NULL; + + f = safe_fclose(f); + return TAKE_PTR(buf); +} + +static const char *table_data_format(Table *t, TableData *d, bool avoid_uppercasing, size_t column_width, bool *have_soft) { + assert(d); + + if (d->formatted && + /* Only TABLE_STRV_WRAPPED adjust based on column_width so far… */ + (d->type != TABLE_STRV_WRAPPED || d->formatted_for_width == column_width)) + return d->formatted; + + switch (d->type) { + case TABLE_EMPTY: + return strempty(t->empty_string); + + case TABLE_STRING: + case TABLE_PATH: + if (d->uppercase && !avoid_uppercasing) { + d->formatted = new(char, strlen(d->string) + 1); + if (!d->formatted) + return NULL; + + char *q = d->formatted; + for (char *p = d->string; *p; p++, q++) + *q = (char) toupper((unsigned char) *p); + *q = 0; + + return d->formatted; + } + + return d->string; + + case TABLE_STRV: + if (strv_isempty(d->strv)) + return strempty(t->empty_string); + + d->formatted = strv_join(d->strv, "\n"); + if (!d->formatted) + return NULL; + break; + + case TABLE_STRV_WRAPPED: { + if (strv_isempty(d->strv)) + return strempty(t->empty_string); + + char *buf = format_strv_width(d->strv, column_width); + if (!buf) + return NULL; + + free_and_replace(d->formatted, buf); + d->formatted_for_width = column_width; + if (have_soft) + *have_soft = true; + + break; + } + + case TABLE_BOOLEAN: + return yes_no(d->boolean); + + case TABLE_TIMESTAMP: + case TABLE_TIMESTAMP_UTC: + case TABLE_TIMESTAMP_RELATIVE: { + _cleanup_free_ char *p; + char *ret; + + p = new(char, FORMAT_TIMESTAMP_MAX); + if (!p) + return NULL; + + if (d->type == TABLE_TIMESTAMP) + ret = format_timestamp(p, FORMAT_TIMESTAMP_MAX, d->timestamp); + else if (d->type == TABLE_TIMESTAMP_UTC) + ret = format_timestamp_style(p, FORMAT_TIMESTAMP_MAX, d->timestamp, TIMESTAMP_UTC); + else + ret = format_timestamp_relative(p, FORMAT_TIMESTAMP_MAX, d->timestamp); + if (!ret) + return "n/a"; + + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_TIMESPAN: + case TABLE_TIMESPAN_MSEC: { + _cleanup_free_ char *p; + + p = new(char, FORMAT_TIMESPAN_MAX); + if (!p) + return NULL; + + if (!format_timespan(p, FORMAT_TIMESPAN_MAX, d->timespan, + d->type == TABLE_TIMESPAN ? 0 : USEC_PER_MSEC)) + return "n/a"; + + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_SIZE: { + _cleanup_free_ char *p; + + p = new(char, FORMAT_BYTES_MAX); + if (!p) + return NULL; + + if (!format_bytes(p, FORMAT_BYTES_MAX, d->size)) + return "n/a"; + + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_BPS: { + _cleanup_free_ char *p; + size_t n; + + p = new(char, FORMAT_BYTES_MAX+2); + if (!p) + return NULL; + + if (!format_bytes_full(p, FORMAT_BYTES_MAX, d->size, 0)) + return "n/a"; + + n = strlen(p); + strscpy(p + n, FORMAT_BYTES_MAX + 2 - n, "bps"); + + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_INT: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->int_val) + 1); + if (!p) + return NULL; + + sprintf(p, "%i", d->int_val); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_INT8: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->int8) + 1); + if (!p) + return NULL; + + sprintf(p, "%" PRIi8, d->int8); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_INT16: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->int16) + 1); + if (!p) + return NULL; + + sprintf(p, "%" PRIi16, d->int16); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_INT32: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->int32) + 1); + if (!p) + return NULL; + + sprintf(p, "%" PRIi32, d->int32); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_INT64: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->int64) + 1); + if (!p) + return NULL; + + sprintf(p, "%" PRIi64, d->int64); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_UINT: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->uint_val) + 1); + if (!p) + return NULL; + + sprintf(p, "%u", d->uint_val); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_UINT8: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->uint8) + 1); + if (!p) + return NULL; + + sprintf(p, "%" PRIu8, d->uint8); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_UINT16: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->uint16) + 1); + if (!p) + return NULL; + + sprintf(p, "%" PRIu16, d->uint16); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_UINT32: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->uint32) + 1); + if (!p) + return NULL; + + sprintf(p, "%" PRIu32, d->uint32); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_UINT64: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->uint64) + 1); + if (!p) + return NULL; + + sprintf(p, "%" PRIu64, d->uint64); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_PERCENT: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->percent) + 2); + if (!p) + return NULL; + + sprintf(p, "%i%%" , d->percent); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_IFINDEX: { + _cleanup_free_ char *p = NULL; + char name[IF_NAMESIZE + 1]; + + if (format_ifname(d->ifindex, name)) { + p = strdup(name); + if (!p) + return NULL; + } else { + if (asprintf(&p, "%i" , d->ifindex) < 0) + return NULL; + } + + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_IN_ADDR: + case TABLE_IN6_ADDR: { + _cleanup_free_ char *p = NULL; + + if (in_addr_to_string(d->type == TABLE_IN_ADDR ? AF_INET : AF_INET6, + &d->address, &p) < 0) + return NULL; + + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_ID128: { + char *p; + + p = new(char, SD_ID128_STRING_MAX); + if (!p) + return NULL; + + d->formatted = sd_id128_to_string(d->id128, p); + break; + } + + case TABLE_UUID: { + char *p; + + p = new(char, ID128_UUID_STRING_MAX); + if (!p) + return NULL; + + d->formatted = id128_to_uuid_string(d->id128, p); + break; + } + + default: + assert_not_reached("Unexpected type?"); + } + + return d->formatted; +} + +static int console_width_height( + const char *s, + size_t *ret_width, + size_t *ret_height) { + + size_t max_width = 0, height = 0; + const char *p; + + assert(s); + + /* Determine the width and height in console character cells the specified string needs. */ + + do { + size_t k; + + p = strchr(s, '\n'); + if (p) { + _cleanup_free_ char *c = NULL; + + c = strndup(s, p - s); + if (!c) + return -ENOMEM; + + k = utf8_console_width(c); + s = p + 1; + } else { + k = utf8_console_width(s); + s = NULL; + } + if (k == (size_t) -1) + return -EINVAL; + if (k > max_width) + max_width = k; + + height++; + } while (!isempty(s)); + + if (ret_width) + *ret_width = max_width; + + if (ret_height) + *ret_height = height; + + return 0; +} + +static int table_data_requested_width_height( + Table *table, + TableData *d, + size_t available_width, + size_t *ret_width, + size_t *ret_height, + bool *have_soft) { + + _cleanup_free_ char *truncated = NULL; + bool truncation_applied = false; + size_t width, height; + const char *t; + int r; + bool soft = false; + + t = table_data_format(table, d, false, available_width, &soft); + if (!t) + return -ENOMEM; + + if (table->cell_height_max != (size_t) -1) { + r = string_truncate_lines(t, table->cell_height_max, &truncated); + if (r < 0) + return r; + if (r > 0) + truncation_applied = true; + + t = truncated; + } + + r = console_width_height(t, &width, &height); + if (r < 0) + return r; + + if (d->maximum_width != (size_t) -1 && width > d->maximum_width) + width = d->maximum_width; + + if (width < d->minimum_width) + width = d->minimum_width; + + if (ret_width) + *ret_width = width; + if (ret_height) + *ret_height = height; + if (have_soft && soft) + *have_soft = true; + + return truncation_applied; +} + +static char *align_string_mem(const char *str, const char *url, size_t new_length, unsigned percent) { + size_t w = 0, space, lspace, old_length, clickable_length; + _cleanup_free_ char *clickable = NULL; + const char *p; + char *ret; + int r; + + /* As with ellipsize_mem(), 'old_length' is a byte size while 'new_length' is a width in character cells */ + + assert(str); + assert(percent <= 100); + + old_length = strlen(str); + + if (url) { + r = terminal_urlify(url, str, &clickable); + if (r < 0) + return NULL; + + clickable_length = strlen(clickable); + } else + clickable_length = old_length; + + /* Determine current width on screen */ + p = str; + while (p < str + old_length) { + char32_t c; + + if (utf8_encoded_to_unichar(p, &c) < 0) { + p++, w++; /* count invalid chars as 1 */ + continue; + } + + p = utf8_next_char(p); + w += unichar_iswide(c) ? 2 : 1; + } + + /* Already wider than the target, if so, don't do anything */ + if (w >= new_length) + return clickable ? TAKE_PTR(clickable) : strdup(str); + + /* How much spaces shall we add? An how much on the left side? */ + space = new_length - w; + lspace = space * percent / 100U; + + ret = new(char, space + clickable_length + 1); + if (!ret) + return NULL; + + for (size_t i = 0; i < lspace; i++) + ret[i] = ' '; + memcpy(ret + lspace, clickable ?: str, clickable_length); + for (size_t i = lspace + clickable_length; i < space + clickable_length; i++) + ret[i] = ' '; + + ret[space + clickable_length] = 0; + return ret; +} + +static bool table_data_isempty(TableData *d) { + assert(d); + + if (d->type == TABLE_EMPTY) + return true; + + /* Let's also consider an empty strv as truly empty. */ + if (IN_SET(d->type, TABLE_STRV, TABLE_STRV_WRAPPED)) + return strv_isempty(d->strv); + + /* Note that an empty string we do not consider empty here! */ + return false; +} + +static const char* table_data_color(TableData *d) { + assert(d); + + if (d->color) + return d->color; + + /* Let's implicitly color all "empty" cells in grey, in case an "empty_string" is set that is not empty */ + if (table_data_isempty(d)) + return ansi_grey(); + + return NULL; +} + +static const char* table_data_rgap_color(TableData *d) { + assert(d); + + if (d->rgap_color) + return d->rgap_color; + + return NULL; +} + +int table_print(Table *t, FILE *f) { + size_t n_rows, *minimum_width, *maximum_width, display_columns, *requested_width, + table_minimum_width, table_maximum_width, table_requested_width, table_effective_width, + *width = NULL; + _cleanup_free_ size_t *sorted = NULL; + uint64_t *column_weight, weight_sum; + int r; + + assert(t); + + if (!f) + f = stdout; + + /* Ensure we have no incomplete rows */ + assert(t->n_cells % t->n_columns == 0); + + n_rows = t->n_cells / t->n_columns; + assert(n_rows > 0); /* at least the header row must be complete */ + + if (t->sort_map) { + /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */ + + sorted = new(size_t, n_rows); + if (!sorted) + return -ENOMEM; + + for (size_t i = 0; i < n_rows; i++) + sorted[i] = i * t->n_columns; + + typesafe_qsort_r(sorted, n_rows, table_data_compare, t); + } + + if (t->display_map) + display_columns = t->n_display_map; + else + display_columns = t->n_columns; + + assert(display_columns > 0); + + minimum_width = newa(size_t, display_columns); + maximum_width = newa(size_t, display_columns); + requested_width = newa(size_t, display_columns); + column_weight = newa0(uint64_t, display_columns); + + for (size_t j = 0; j < display_columns; j++) { + minimum_width[j] = 1; + maximum_width[j] = (size_t) -1; + } + + for (unsigned pass = 0; pass < 2; pass++) { + /* First pass: determine column sizes */ + + for (size_t j = 0; j < display_columns; j++) + requested_width[j] = (size_t) -1; + + bool any_soft = false; + + for (size_t i = t->header ? 0 : 1; i < n_rows; i++) { + TableData **row; + + /* Note that we don't care about ordering at this time, as we just want to determine column sizes, + * hence we don't care for sorted[] during the first pass. */ + row = t->data + i * t->n_columns; + + for (size_t j = 0; j < display_columns; j++) { + TableData *d; + size_t req_width, req_height; + + assert_se(d = row[t->display_map ? t->display_map[j] : j]); + + r = table_data_requested_width_height(t, d, + width ? width[j] : SIZE_MAX, + &req_width, &req_height, &any_soft); + if (r < 0) + return r; + if (r > 0) { /* Truncated because too many lines? */ + _cleanup_free_ char *last = NULL; + const char *field; + + /* If we are going to show only the first few lines of a cell that has + * multiple make sure that we have enough space horizontally to show an + * ellipsis. Hence, let's figure out the last line, and account for its + * length plus ellipsis. */ + + field = table_data_format(t, d, false, + width ? width[j] : SIZE_MAX, + &any_soft); + if (!field) + return -ENOMEM; + + assert_se(t->cell_height_max > 0); + r = string_extract_line(field, t->cell_height_max-1, &last); + if (r < 0) + return r; + + req_width = MAX(req_width, + utf8_console_width(last) + + utf8_console_width(special_glyph(SPECIAL_GLYPH_ELLIPSIS))); + } + + /* Determine the biggest width that any cell in this column would like to have */ + if (requested_width[j] == (size_t) -1 || + requested_width[j] < req_width) + requested_width[j] = req_width; + + /* Determine the minimum width any cell in this column needs */ + if (minimum_width[j] < d->minimum_width) + minimum_width[j] = d->minimum_width; + + /* Determine the maximum width any cell in this column needs */ + if (d->maximum_width != (size_t) -1 && + (maximum_width[j] == (size_t) -1 || + maximum_width[j] > d->maximum_width)) + maximum_width[j] = d->maximum_width; + + /* Determine the full columns weight */ + column_weight[j] += d->weight; + } + } + + /* One space between each column */ + table_requested_width = table_minimum_width = table_maximum_width = display_columns - 1; + + /* Calculate the total weight for all columns, plus the minimum, maximum and requested width for the table. */ + weight_sum = 0; + for (size_t j = 0; j < display_columns; j++) { + weight_sum += column_weight[j]; + + table_minimum_width += minimum_width[j]; + + if (maximum_width[j] == (size_t) -1) + table_maximum_width = (size_t) -1; + else + table_maximum_width += maximum_width[j]; + + table_requested_width += requested_width[j]; + } + + /* Calculate effective table width */ + if (t->width != 0 && t->width != (size_t) -1) + table_effective_width = t->width; + else if (t->width == 0 || + ((pass > 0 || !any_soft) && (pager_have() || !isatty(STDOUT_FILENO)))) + table_effective_width = table_requested_width; + else + table_effective_width = MIN(table_requested_width, columns()); + + if (table_maximum_width != (size_t) -1 && table_effective_width > table_maximum_width) + table_effective_width = table_maximum_width; + + if (table_effective_width < table_minimum_width) + table_effective_width = table_minimum_width; + + if (!width) + width = newa(size_t, display_columns); + + if (table_effective_width >= table_requested_width) { + size_t extra; + + /* We have extra room, let's distribute it among columns according to their weights. We first provide + * each column with what it asked for and the distribute the rest. */ + + extra = table_effective_width - table_requested_width; + + for (size_t j = 0; j < display_columns; j++) { + size_t delta; + + if (weight_sum == 0) + width[j] = requested_width[j] + extra / (display_columns - j); /* Avoid division by zero */ + else + width[j] = requested_width[j] + (extra * column_weight[j]) / weight_sum; + + if (maximum_width[j] != (size_t) -1 && width[j] > maximum_width[j]) + width[j] = maximum_width[j]; + + if (width[j] < minimum_width[j]) + width[j] = minimum_width[j]; + + assert(width[j] >= requested_width[j]); + delta = width[j] - requested_width[j]; + + /* Subtract what we just added from the rest */ + if (extra > delta) + extra -= delta; + else + extra = 0; + + assert(weight_sum >= column_weight[j]); + weight_sum -= column_weight[j]; + } + + break; /* Every column should be happy, no need to repeat calculations. */ + } else { + /* We need to compress the table, columns can't get what they asked for. We first provide each column + * with the minimum they need, and then distribute anything left. */ + bool finalize = false; + size_t extra; + + extra = table_effective_width - table_minimum_width; + + for (size_t j = 0; j < display_columns; j++) + width[j] = (size_t) -1; + + for (;;) { + bool restart = false; + + for (size_t j = 0; j < display_columns; j++) { + size_t delta, w; + + /* Did this column already get something assigned? If so, let's skip to the next */ + if (width[j] != (size_t) -1) + continue; + + if (weight_sum == 0) + w = minimum_width[j] + extra / (display_columns - j); /* avoid division by zero */ + else + w = minimum_width[j] + (extra * column_weight[j]) / weight_sum; + + if (w >= requested_width[j]) { + /* Never give more than requested. If we hit a column like this, there's more + * space to allocate to other columns which means we need to restart the + * iteration. However, if we hit a column like this, let's assign it the space + * it wanted for good early.*/ + + w = requested_width[j]; + restart = true; + + } else if (!finalize) + continue; + + width[j] = w; + + assert(w >= minimum_width[j]); + delta = w - minimum_width[j]; + + assert(delta <= extra); + extra -= delta; + + assert(weight_sum >= column_weight[j]); + weight_sum -= column_weight[j]; + + if (restart && !finalize) + break; + } + + if (finalize) + break; + + if (!restart) + finalize = true; + } + + if (!any_soft) /* Some columns got less than requested. If some cells were "soft", + * let's try to reformat them with the new widths. Otherwise, let's + * move on. */ + break; + } + } + + /* Second pass: show output */ + for (size_t i = t->header ? 0 : 1; i < n_rows; i++) { + size_t n_subline = 0; + bool more_sublines; + TableData **row; + + if (sorted) + row = t->data + sorted[i]; + else + row = t->data + i * t->n_columns; + + do { + const char *gap_color = NULL; + more_sublines = false; + + for (size_t j = 0; j < display_columns; j++) { + _cleanup_free_ char *buffer = NULL, *extracted = NULL; + bool lines_truncated = false; + const char *field, *color = NULL; + TableData *d; + size_t l; + + assert_se(d = row[t->display_map ? t->display_map[j] : j]); + + field = table_data_format(t, d, false, width[j], NULL); + if (!field) + return -ENOMEM; + + r = string_extract_line(field, n_subline, &extracted); + if (r < 0) + return r; + if (r > 0) { + /* There are more lines to come */ + if ((t->cell_height_max == (size_t) -1 || n_subline + 1 < t->cell_height_max)) + more_sublines = true; /* There are more lines to come */ + else + lines_truncated = true; + } + if (extracted) + field = extracted; + + l = utf8_console_width(field); + if (l > width[j]) { + /* Field is wider than allocated space. Let's ellipsize */ + + buffer = ellipsize(field, width[j], /* ellipsize at the end if we truncated coming lines, otherwise honour configuration */ + lines_truncated ? 100 : d->ellipsize_percent); + if (!buffer) + return -ENOMEM; + + field = buffer; + } else { + if (lines_truncated) { + _cleanup_free_ char *padded = NULL; + + /* We truncated more lines of this cell, let's add an + * ellipsis. We first append it, but that might make our + * string grow above what we have space for, hence ellipsize + * right after. This will truncate the ellipsis and add a new + * one. */ + + padded = strjoin(field, special_glyph(SPECIAL_GLYPH_ELLIPSIS)); + if (!padded) + return -ENOMEM; + + buffer = ellipsize(padded, width[j], 100); + if (!buffer) + return -ENOMEM; + + field = buffer; + l = utf8_console_width(field); + } + + if (l < width[j]) { + _cleanup_free_ char *aligned = NULL; + /* Field is shorter than allocated space. Let's align with spaces */ + + aligned = align_string_mem(field, d->url, width[j], d->align_percent); + if (!aligned) + return -ENOMEM; + + free_and_replace(buffer, aligned); + field = buffer; + } + } + + if (l >= width[j] && d->url) { + _cleanup_free_ char *clickable = NULL; + + r = terminal_urlify(d->url, field, &clickable); + if (r < 0) + return r; + + free_and_replace(buffer, clickable); + field = buffer; + } + + if (colors_enabled()) { + if (gap_color) + fputs(gap_color, f); + else if (row == t->data) /* underline header line fully, including the column separator */ + fputs(ansi_underline(), f); + } + + if (j > 0) + fputc(' ', f); /* column separator left of cell */ + + if (colors_enabled()) { + color = table_data_color(d); + + /* Undo gap color */ + if (gap_color || (color && row == t->data)) + fputs(ANSI_NORMAL, f); + + if (color) + fputs(color, f); + else if (gap_color && row == t->data) /* underline header line cell */ + fputs(ansi_underline(), f); + } + + fputs(field, f); + + if (colors_enabled() && (color || row == t->data)) + fputs(ANSI_NORMAL, f); + + gap_color = table_data_rgap_color(d); + } + + fputc('\n', f); + n_subline ++; + } while (more_sublines); + } + + return fflush_and_check(f); +} + +int table_format(Table *t, char **ret) { + _cleanup_fclose_ FILE *f = NULL; + char *buf = NULL; + size_t sz = 0; + int r; + + f = open_memstream_unlocked(&buf, &sz); + if (!f) + return -ENOMEM; + + r = table_print(t, f); + if (r < 0) + return r; + + f = safe_fclose(f); + + *ret = buf; + + return 0; +} + +size_t table_get_rows(Table *t) { + if (!t) + return 0; + + assert(t->n_columns > 0); + return t->n_cells / t->n_columns; +} + +size_t table_get_columns(Table *t) { + if (!t) + return 0; + + assert(t->n_columns > 0); + return t->n_columns; +} + +int table_set_reverse(Table *t, size_t column, bool b) { + assert(t); + assert(column < t->n_columns); + + if (!t->reverse_map) { + if (!b) + return 0; + + t->reverse_map = new0(bool, t->n_columns); + if (!t->reverse_map) + return -ENOMEM; + } + + t->reverse_map[column] = b; + return 0; +} + +TableCell *table_get_cell(Table *t, size_t row, size_t column) { + size_t i; + + assert(t); + + if (column >= t->n_columns) + return NULL; + + i = row * t->n_columns + column; + if (i >= t->n_cells) + return NULL; + + return TABLE_INDEX_TO_CELL(i); +} + +const void *table_get(Table *t, TableCell *cell) { + TableData *d; + + assert(t); + + d = table_get_data(t, cell); + if (!d) + return NULL; + + return d->data; +} + +const void* table_get_at(Table *t, size_t row, size_t column) { + TableCell *cell; + + cell = table_get_cell(t, row, column); + if (!cell) + return NULL; + + return table_get(t, cell); +} + +static int table_data_to_json(TableData *d, JsonVariant **ret) { + + switch (d->type) { + + case TABLE_EMPTY: + return json_variant_new_null(ret); + + case TABLE_STRING: + case TABLE_PATH: + return json_variant_new_string(ret, d->string); + + case TABLE_STRV: + case TABLE_STRV_WRAPPED: + return json_variant_new_array_strv(ret, d->strv); + + case TABLE_BOOLEAN: + return json_variant_new_boolean(ret, d->boolean); + + case TABLE_TIMESTAMP: + case TABLE_TIMESTAMP_UTC: + case TABLE_TIMESTAMP_RELATIVE: + if (d->timestamp == USEC_INFINITY) + return json_variant_new_null(ret); + + return json_variant_new_unsigned(ret, d->timestamp); + + case TABLE_TIMESPAN: + case TABLE_TIMESPAN_MSEC: + if (d->timespan == USEC_INFINITY) + return json_variant_new_null(ret); + + return json_variant_new_unsigned(ret, d->timespan); + + case TABLE_SIZE: + case TABLE_BPS: + if (d->size == (uint64_t) -1) + return json_variant_new_null(ret); + + return json_variant_new_unsigned(ret, d->size); + + case TABLE_INT: + return json_variant_new_integer(ret, d->int_val); + + case TABLE_INT8: + return json_variant_new_integer(ret, d->int8); + + case TABLE_INT16: + return json_variant_new_integer(ret, d->int16); + + case TABLE_INT32: + return json_variant_new_integer(ret, d->int32); + + case TABLE_INT64: + return json_variant_new_integer(ret, d->int64); + + case TABLE_UINT: + return json_variant_new_unsigned(ret, d->uint_val); + + case TABLE_UINT8: + return json_variant_new_unsigned(ret, d->uint8); + + case TABLE_UINT16: + return json_variant_new_unsigned(ret, d->uint16); + + case TABLE_UINT32: + return json_variant_new_unsigned(ret, d->uint32); + + case TABLE_UINT64: + return json_variant_new_unsigned(ret, d->uint64); + + case TABLE_PERCENT: + return json_variant_new_integer(ret, d->percent); + + case TABLE_IFINDEX: + return json_variant_new_integer(ret, d->ifindex); + + case TABLE_IN_ADDR: + return json_variant_new_array_bytes(ret, &d->address, FAMILY_ADDRESS_SIZE(AF_INET)); + + case TABLE_IN6_ADDR: + return json_variant_new_array_bytes(ret, &d->address, FAMILY_ADDRESS_SIZE(AF_INET6)); + + case TABLE_ID128: { + char buf[SD_ID128_STRING_MAX]; + return json_variant_new_string(ret, sd_id128_to_string(d->id128, buf)); + } + + case TABLE_UUID: { + char buf[ID128_UUID_STRING_MAX]; + return json_variant_new_string(ret, id128_to_uuid_string(d->id128, buf)); + } + + default: + return -EINVAL; + } +} + +static char* string_to_json_field_name(const char *f) { + /* Tries to make a string more suitable as JSON field name. There are no strict rules defined what a + * field name can be hence this is a bit vague and black magic. Right now we only convert spaces to + * underscores and leave everything as is. */ + + char *c = strdup(f); + if (!c) + return NULL; + + for (char *x = c; *x; x++) + if (isspace(*x)) + *x = '_'; + + return c; +} + +int table_to_json(Table *t, JsonVariant **ret) { + JsonVariant **rows = NULL, **elements = NULL; + _cleanup_free_ size_t *sorted = NULL; + size_t n_rows, display_columns; + int r; + + assert(t); + + /* Ensure we have no incomplete rows */ + assert(t->n_cells % t->n_columns == 0); + + n_rows = t->n_cells / t->n_columns; + assert(n_rows > 0); /* at least the header row must be complete */ + + if (t->sort_map) { + /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */ + + sorted = new(size_t, n_rows); + if (!sorted) { + r = -ENOMEM; + goto finish; + } + + for (size_t i = 0; i < n_rows; i++) + sorted[i] = i * t->n_columns; + + typesafe_qsort_r(sorted, n_rows, table_data_compare, t); + } + + if (t->display_map) + display_columns = t->n_display_map; + else + display_columns = t->n_columns; + assert(display_columns > 0); + + elements = new0(JsonVariant*, display_columns * 2); + if (!elements) { + r = -ENOMEM; + goto finish; + } + + for (size_t j = 0; j < display_columns; j++) { + _cleanup_free_ char *mangled = NULL; + const char *formatted; + TableData *d; + + assert_se(d = t->data[t->display_map ? t->display_map[j] : j]); + + /* Field names must be strings, hence format whatever we got here as a string first */ + formatted = table_data_format(t, d, true, SIZE_MAX, NULL); + if (!formatted) { + r = -ENOMEM; + goto finish; + } + + /* Arbitrary strings suck as field names, try to mangle them into something more suitable hence */ + mangled = string_to_json_field_name(formatted); + if (!mangled) { + r = -ENOMEM; + goto finish; + } + + r = json_variant_new_string(elements + j*2, mangled); + if (r < 0) + goto finish; + } + + rows = new0(JsonVariant*, n_rows-1); + if (!rows) { + r = -ENOMEM; + goto finish; + } + + for (size_t i = 1; i < n_rows; i++) { + TableData **row; + + if (sorted) + row = t->data + sorted[i]; + else + row = t->data + i * t->n_columns; + + for (size_t j = 0; j < display_columns; j++) { + TableData *d; + size_t k; + + assert_se(d = row[t->display_map ? t->display_map[j] : j]); + + k = j*2+1; + elements[k] = json_variant_unref(elements[k]); + + r = table_data_to_json(d, elements + k); + if (r < 0) + goto finish; + } + + r = json_variant_new_object(rows + i - 1, elements, display_columns * 2); + if (r < 0) + goto finish; + } + + r = json_variant_new_array(ret, rows, n_rows - 1); + +finish: + if (rows) { + json_variant_unref_many(rows, n_rows-1); + free(rows); + } + + if (elements) { + json_variant_unref_many(elements, display_columns*2); + free(elements); + } + + return r; +} + +int table_print_json(Table *t, FILE *f, JsonFormatFlags flags) { + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + int r; + + assert(t); + + if (!f) + f = stdout; + + r = table_to_json(t, &v); + if (r < 0) + return r; + + json_variant_dump(v, flags, f, NULL); + + return fflush_and_check(f); +} diff --git a/src/shared/format-table.h b/src/shared/format-table.h new file mode 100644 index 0000000..965549b --- /dev/null +++ b/src/shared/format-table.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stdio.h> +#include <sys/types.h> + +#include "json.h" +#include "macro.h" + +typedef enum TableDataType { + TABLE_EMPTY, + TABLE_STRING, + TABLE_STRV, + TABLE_STRV_WRAPPED, + TABLE_PATH, + TABLE_BOOLEAN, + TABLE_TIMESTAMP, + TABLE_TIMESTAMP_UTC, + TABLE_TIMESTAMP_RELATIVE, + TABLE_TIMESPAN, + TABLE_TIMESPAN_MSEC, + TABLE_SIZE, + TABLE_BPS, + TABLE_INT, + TABLE_INT8, + TABLE_INT16, + TABLE_INT32, + TABLE_INT64, + TABLE_UINT, + TABLE_UINT8, + TABLE_UINT16, + TABLE_UINT32, + TABLE_UINT64, + TABLE_PERCENT, + TABLE_IFINDEX, + TABLE_IN_ADDR, /* Takes a union in_addr_union (or a struct in_addr) */ + TABLE_IN6_ADDR, /* Takes a union in_addr_union (or a struct in6_addr) */ + TABLE_ID128, + TABLE_UUID, + _TABLE_DATA_TYPE_MAX, + + /* The following are not really data types, but commands for table_add_cell_many() to make changes to + * a cell just added. */ + TABLE_SET_MINIMUM_WIDTH, + TABLE_SET_MAXIMUM_WIDTH, + TABLE_SET_WEIGHT, + TABLE_SET_ALIGN_PERCENT, + TABLE_SET_ELLIPSIZE_PERCENT, + TABLE_SET_COLOR, + TABLE_SET_RGAP_COLOR, + TABLE_SET_BOTH_COLORS, + TABLE_SET_URL, + TABLE_SET_UPPERCASE, + + _TABLE_DATA_TYPE_INVALID = -1, +} TableDataType; + +/* PIDs are just 32bit signed integers on Linux */ +#define TABLE_PID TABLE_INT32 +assert_cc(sizeof(pid_t) == sizeof(int32_t)); + +/* UIDs/GIDs are just 32bit unsigned integers on Linux */ +#define TABLE_UID TABLE_UINT32 +#define TABLE_GID TABLE_UINT32 +assert_cc(sizeof(uid_t) == sizeof(uint32_t)); +assert_cc(sizeof(gid_t) == sizeof(uint32_t)); + +typedef struct Table Table; +typedef struct TableCell TableCell; + +Table *table_new_internal(const char *first_header, ...) _sentinel_; +#define table_new(...) table_new_internal(__VA_ARGS__, NULL) +Table *table_new_raw(size_t n_columns); +Table *table_unref(Table *t); + +DEFINE_TRIVIAL_CLEANUP_FUNC(Table*, table_unref); + +int table_add_cell_full(Table *t, TableCell **ret_cell, TableDataType type, const void *data, size_t minimum_width, size_t maximum_width, unsigned weight, unsigned align_percent, unsigned ellipsize_percent); +static inline int table_add_cell(Table *t, TableCell **ret_cell, TableDataType type, const void *data) { + return table_add_cell_full(t, ret_cell, type, data, (size_t) -1, (size_t) -1, (unsigned) -1, (unsigned) -1, (unsigned) -1); +} +int table_add_cell_stringf(Table *t, TableCell **ret_cell, const char *format, ...) _printf_(3, 4); + +int table_fill_empty(Table *t, size_t until_column); + +int table_dup_cell(Table *t, TableCell *cell); + +int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width); +int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width); +int table_set_weight(Table *t, TableCell *cell, unsigned weight); +int table_set_align_percent(Table *t, TableCell *cell, unsigned percent); +int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent); +int table_set_color(Table *t, TableCell *cell, const char *color); +int table_set_rgap_color(Table *t, TableCell *cell, const char *color); +int table_set_url(Table *t, TableCell *cell, const char *url); +int table_set_uppercase(Table *t, TableCell *cell, bool b); + +int table_update(Table *t, TableCell *cell, TableDataType type, const void *data); + +int table_add_many_internal(Table *t, TableDataType first_type, ...); +#define table_add_many(t, ...) table_add_many_internal(t, __VA_ARGS__, _TABLE_DATA_TYPE_MAX) + +void table_set_header(Table *table, bool b); +void table_set_width(Table *t, size_t width); +void table_set_cell_height_max(Table *t, size_t height); +int table_set_empty_string(Table *t, const char *empty); +int table_set_display_all(Table *t); +int table_set_display(Table *t, size_t first_column, ...); +int table_set_sort(Table *t, size_t first_column, ...); +int table_set_reverse(Table *t, size_t column, bool b); +int table_hide_column_from_display(Table *t, size_t column); + +int table_print(Table *t, FILE *f); +int table_format(Table *t, char **ret); + +static inline TableCell* TABLE_HEADER_CELL(size_t i) { + return SIZE_TO_PTR(i + 1); +} + +size_t table_get_rows(Table *t); +size_t table_get_columns(Table *t); + +TableCell *table_get_cell(Table *t, size_t row, size_t column); + +const void *table_get(Table *t, TableCell *cell); +const void *table_get_at(Table *t, size_t row, size_t column); + +int table_to_json(Table *t, JsonVariant **ret); +int table_print_json(Table *t, FILE *f, JsonFormatFlags json_flags); + +#define table_log_add_error(r) \ + log_error_errno(r, "Failed to add cell(s) to table: %m") + +#define table_log_print_error(r) \ + log_error_errno(r, "Failed to print table: %m") + +#define table_log_sort_error(r) \ + log_error_errno(r, "Failed to sort table: %m") diff --git a/src/shared/fsck-util.h b/src/shared/fsck-util.h new file mode 100644 index 0000000..855137c --- /dev/null +++ b/src/shared/fsck-util.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* exit codes as defined in fsck(8) */ +enum { + FSCK_SUCCESS = 0, + FSCK_ERROR_CORRECTED = 1 << 0, + FSCK_SYSTEM_SHOULD_REBOOT = 1 << 1, + FSCK_ERRORS_LEFT_UNCORRECTED = 1 << 2, + FSCK_OPERATIONAL_ERROR = 1 << 3, + FSCK_USAGE_OR_SYNTAX_ERROR = 1 << 4, + FSCK_USER_CANCELLED = 1 << 5, + FSCK_SHARED_LIB_ERROR = 1 << 7, +}; diff --git a/src/shared/fstab-util.c b/src/shared/fstab-util.c new file mode 100644 index 0000000..292b97c --- /dev/null +++ b/src/shared/fstab-util.c @@ -0,0 +1,297 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> + +#include "alloc-util.h" +#include "device-nodes.h" +#include "fstab-util.h" +#include "macro.h" +#include "mount-util.h" +#include "nulstr-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "string-util.h" +#include "strv.h" + +int fstab_has_fstype(const char *fstype) { + _cleanup_endmntent_ FILE *f = NULL; + struct mntent *m; + + f = setmntent(fstab_path(), "re"); + if (!f) + return errno == ENOENT ? false : -errno; + + for (;;) { + errno = 0; + m = getmntent(f); + if (!m) + return errno != 0 ? -errno : false; + + if (streq(m->mnt_type, fstype)) + return true; + } + return false; +} + +bool fstab_is_extrinsic(const char *mount, const char *opts) { + + /* Don't bother with the OS data itself */ + if (PATH_IN_SET(mount, + "/", + "/usr", + "/etc")) + return true; + + if (PATH_STARTSWITH_SET(mount, + "/run/initramfs", /* This should stay around from before we boot until after we shutdown */ + "/proc", /* All of this is API VFS */ + "/sys", /* … dito … */ + "/dev")) /* … dito … */ + return true; + + /* If this is an initrd mount, and we are not in the initrd, then leave + * this around forever, too. */ + if (opts && fstab_test_option(opts, "x-initrd.mount\0") && !in_initrd()) + return true; + + return false; +} + +int fstab_is_mount_point(const char *mount) { + _cleanup_endmntent_ FILE *f = NULL; + struct mntent *m; + + f = setmntent(fstab_path(), "re"); + if (!f) + return errno == ENOENT ? false : -errno; + + for (;;) { + errno = 0; + m = getmntent(f); + if (!m) + return errno != 0 ? -errno : false; + + if (path_equal(m->mnt_dir, mount)) + return true; + } + return false; +} + +int fstab_filter_options(const char *opts, const char *names, + const char **ret_namefound, char **ret_value, char **ret_filtered) { + const char *name, *namefound = NULL, *x; + _cleanup_strv_free_ char **stor = NULL; + _cleanup_free_ char *v = NULL, **strv = NULL; + int r; + + assert(names && *names); + + if (!opts) + goto answer; + + /* If !ret_value and !ret_filtered, this function is not allowed to fail. */ + + if (!ret_filtered) { + for (const char *word = opts;;) { + const char *end = word; + + /* Look for an *non-escaped* comma separator. Only commas can be escaped, so "\," is + * the only valid escape sequence, so we can do a very simple test here. */ + for (;;) { + size_t n = strcspn(end, ","); + + end += n; + if (n > 0 && end[-1] == '\\') + end++; + else + break; + } + + NULSTR_FOREACH(name, names) { + if (end < word + strlen(name)) + continue; + if (!strneq(word, name, strlen(name))) + continue; + + /* We know that the string is NUL terminated, so *x is valid */ + x = word + strlen(name); + if (IN_SET(*x, '\0', '=', ',')) { + namefound = name; + if (ret_value) { + bool eq = *x == '='; + assert(eq || IN_SET(*x, ',', '\0')); + + r = free_and_strndup(&v, + eq ? x + 1 : NULL, + eq ? end - x - 1 : 0); + if (r < 0) + return r; + } + + break; + } + } + + if (*end) + word = end + 1; + else + break; + } + } else { + r = strv_split_full(&stor, opts, ",", EXTRACT_DONT_COALESCE_SEPARATORS | EXTRACT_UNESCAPE_SEPARATORS); + if (r < 0) + return r; + + strv = memdup(stor, sizeof(char*) * (strv_length(stor) + 1)); + if (!strv) + return -ENOMEM; + + char **t = strv; + for (char **s = strv; *s; s++) { + NULSTR_FOREACH(name, names) { + x = startswith(*s, name); + if (x && IN_SET(*x, '\0', '=')) + goto found; + } + + *t = *s; + t++; + continue; + found: + /* Keep the last occurrence found */ + namefound = name; + if (ret_value) { + assert(IN_SET(*x, '=', '\0')); + r = free_and_strdup(&v, *x == '=' ? x + 1 : NULL); + if (r < 0) + return r; + } + } + *t = NULL; + } + +answer: + if (ret_namefound) + *ret_namefound = namefound; + if (ret_filtered) { + char *f; + + f = strv_join_full(strv, ",", NULL, true); + if (!f) + return -ENOMEM; + + *ret_filtered = f; + } + if (ret_value) + *ret_value = TAKE_PTR(v); + + return !!namefound; +} + +int fstab_extract_values(const char *opts, const char *name, char ***values) { + _cleanup_strv_free_ char **optsv = NULL, **res = NULL; + char **s; + + assert(opts); + assert(name); + assert(values); + + optsv = strv_split(opts, ","); + if (!optsv) + return -ENOMEM; + + STRV_FOREACH(s, optsv) { + char *arg; + int r; + + arg = startswith(*s, name); + if (!arg || *arg != '=') + continue; + r = strv_extend(&res, arg + 1); + if (r < 0) + return r; + } + + *values = TAKE_PTR(res); + + return !!*values; +} + +int fstab_find_pri(const char *options, int *ret) { + _cleanup_free_ char *opt = NULL; + int r, pri; + + assert(ret); + + r = fstab_filter_options(options, "pri\0", NULL, &opt, NULL); + if (r < 0) + return r; + if (r == 0 || !opt) + return 0; + + r = safe_atoi(opt, &pri); + if (r < 0) + return r; + + *ret = pri; + return 1; +} + +static char *unquote(const char *s, const char* quotes) { + size_t l; + assert(s); + + /* This is rather stupid, simply removes the heading and + * trailing quotes if there is one. Doesn't care about + * escaping or anything. + * + * DON'T USE THIS FOR NEW CODE ANYMORE! */ + + l = strlen(s); + if (l < 2) + return strdup(s); + + if (strchr(quotes, s[0]) && s[l-1] == s[0]) + return strndup(s+1, l-2); + + return strdup(s); +} + +static char *tag_to_udev_node(const char *tagvalue, const char *by) { + _cleanup_free_ char *t = NULL, *u = NULL; + size_t enc_len; + + u = unquote(tagvalue, QUOTES); + if (!u) + return NULL; + + enc_len = strlen(u) * 4 + 1; + t = new(char, enc_len); + if (!t) + return NULL; + + if (encode_devnode_name(u, t, enc_len) < 0) + return NULL; + + return strjoin("/dev/disk/by-", by, "/", t); +} + +char *fstab_node_to_udev_node(const char *p) { + assert(p); + + if (startswith(p, "LABEL=")) + return tag_to_udev_node(p+6, "label"); + + if (startswith(p, "UUID=")) + return tag_to_udev_node(p+5, "uuid"); + + if (startswith(p, "PARTUUID=")) + return tag_to_udev_node(p+9, "partuuid"); + + if (startswith(p, "PARTLABEL=")) + return tag_to_udev_node(p+10, "partlabel"); + + return strdup(p); +} diff --git a/src/shared/fstab-util.h b/src/shared/fstab-util.h new file mode 100644 index 0000000..1a602cb --- /dev/null +++ b/src/shared/fstab-util.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> + +#include "macro.h" + +bool fstab_is_extrinsic(const char *mount, const char *opts); +int fstab_is_mount_point(const char *mount); +int fstab_has_fstype(const char *fstype); + +int fstab_filter_options(const char *opts, const char *names, const char **namefound, char **value, char **filtered); + +int fstab_extract_values(const char *opts, const char *name, char ***values); + +static inline bool fstab_test_option(const char *opts, const char *names) { + return !!fstab_filter_options(opts, names, NULL, NULL, NULL); +} + +int fstab_find_pri(const char *options, int *ret); + +static inline bool fstab_test_yes_no_option(const char *opts, const char *yes_no) { + const char *opt; + + /* If first name given is last, return 1. + * If second name given is last or neither is found, return 0. */ + + assert_se(fstab_filter_options(opts, yes_no, &opt, NULL, NULL) >= 0); + + return opt == yes_no; +} + +char *fstab_node_to_udev_node(const char *p); + +static inline const char* fstab_path(void) { + return secure_getenv("SYSTEMD_FSTAB") ?: "/etc/fstab"; +} diff --git a/src/shared/generate-ip-protocol-list.sh b/src/shared/generate-ip-protocol-list.sh new file mode 100755 index 0000000..3f91979 --- /dev/null +++ b/src/shared/generate-ip-protocol-list.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -eu + +$1 -dM -include netinet/in.h - </dev/null | \ + awk '/^#define[ \t]+IPPROTO_[^ \t]+[ \t]+[^ \t]/ { print $2; }' | \ + sed -e 's/IPPROTO_//' diff --git a/src/shared/generate-syscall-list.py b/src/shared/generate-syscall-list.py new file mode 100755 index 0000000..030c3fe --- /dev/null +++ b/src/shared/generate-syscall-list.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +import sys +import os + +s390 = 's390' in os.uname().machine +arm = 'arm' in os.uname().machine + +for line in open(sys.argv[1]): + if line.startswith('s390_') and not s390: + continue + if line.startswith('arm_') and not arm: + continue + + print('"{}\\0"'.format(line.strip())) diff --git a/src/shared/generator.c b/src/shared/generator.c new file mode 100644 index 0000000..1eccc5a --- /dev/null +++ b/src/shared/generator.c @@ -0,0 +1,631 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "cgroup-util.h" +#include "dropin.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio.h" +#include "fstab-util.h" +#include "generator.h" +#include "log.h" +#include "macro.h" +#include "mkdir.h" +#include "path-util.h" +#include "special.h" +#include "specifier.h" +#include "string-util.h" +#include "time-util.h" +#include "unit-name.h" +#include "util.h" + +int generator_open_unit_file( + const char *dest, + const char *source, + const char *name, + FILE **file) { + + const char *unit; + FILE *f; + int r; + + unit = prefix_roota(dest, name); + + r = fopen_unlocked(unit, "wxe", &f); + if (r < 0) { + if (source && r == -EEXIST) + return log_error_errno(r, + "Failed to create unit file %s, as it already exists. Duplicate entry in %s?", + unit, source); + else + return log_error_errno(r, + "Failed to create unit file %s: %m", + unit); + } + + fprintf(f, + "# Automatically generated by %s\n\n", + program_invocation_short_name); + + *file = f; + return 0; +} + +int generator_add_symlink(const char *dir, const char *dst, const char *dep_type, const char *src) { + /* Adds a symlink from <dst>.<dep_type>/ to <src> (if src is absolute) + * or ../<src> (otherwise). */ + + const char *from, *to; + + from = path_is_absolute(src) ? src : strjoina("../", src); + to = strjoina(dir, "/", dst, ".", dep_type, "/", basename(src)); + + mkdir_parents_label(to, 0755); + if (symlink(from, to) < 0) + if (errno != EEXIST) + return log_error_errno(errno, "Failed to create symlink \"%s\": %m", to); + + return 0; +} + +static int write_fsck_sysroot_service(const char *dir, const char *what) { + _cleanup_free_ char *device = NULL, *escaped = NULL, *escaped2 = NULL; + _cleanup_fclose_ FILE *f = NULL; + const char *unit; + int r; + + escaped = specifier_escape(what); + if (!escaped) + return log_oom(); + + escaped2 = cescape(escaped); + if (!escaped2) + return log_oom(); + + unit = strjoina(dir, "/"SPECIAL_FSCK_ROOT_SERVICE); + log_debug("Creating %s", unit); + + r = unit_name_from_path(what, ".device", &device); + if (r < 0) + return log_error_errno(r, "Failed to convert device \"%s\" to unit name: %m", what); + + f = fopen(unit, "wxe"); + if (!f) + return log_error_errno(errno, "Failed to create unit file %s: %m", unit); + + fprintf(f, + "# Automatically generated by %1$s\n\n" + "[Unit]\n" + "Description=File System Check on %2$s\n" + "Documentation=man:systemd-fsck-root.service(8)\n" + "DefaultDependencies=no\n" + "BindsTo=%3$s\n" + "Conflicts=shutdown.target\n" + "After=initrd-root-device.target local-fs-pre.target %3$s\n" + "Before=shutdown.target\n" + "\n" + "[Service]\n" + "Type=oneshot\n" + "RemainAfterExit=yes\n" + "ExecStart=" SYSTEMD_FSCK_PATH " %4$s\n" + "TimeoutSec=0\n", + program_invocation_short_name, + escaped, + device, + escaped2); + + r = fflush_and_check(f); + if (r < 0) + return log_error_errno(r, "Failed to write unit file %s: %m", unit); + + return 0; +} + +int generator_write_fsck_deps( + FILE *f, + const char *dir, + const char *what, + const char *where, + const char *fstype) { + + int r; + + assert(f); + assert(dir); + assert(what); + assert(where); + + if (!is_device_path(what)) { + log_warning("Checking was requested for \"%s\", but it is not a device.", what); + return 0; + } + + if (!isempty(fstype) && !streq(fstype, "auto")) { + r = fsck_exists(fstype); + if (r < 0) + log_warning_errno(r, "Checking was requested for %s, but couldn't detect if fsck.%s may be used, proceeding: %m", what, fstype); + else if (r == 0) { + /* treat missing check as essentially OK */ + log_debug("Checking was requested for %s, but fsck.%s does not exist.", what, fstype); + return 0; + } + } + + if (path_equal(where, "/")) { + const char *lnk; + + lnk = strjoina(dir, "/" SPECIAL_LOCAL_FS_TARGET ".wants/" SPECIAL_FSCK_ROOT_SERVICE); + + (void) mkdir_parents(lnk, 0755); + if (symlink(SYSTEM_DATA_UNIT_PATH "/" SPECIAL_FSCK_ROOT_SERVICE, lnk) < 0) + return log_error_errno(errno, "Failed to create symlink %s: %m", lnk); + + } else { + _cleanup_free_ char *_fsck = NULL; + const char *fsck, *dep; + + if (in_initrd() && path_equal(where, "/sysroot")) { + r = write_fsck_sysroot_service(dir, what); + if (r < 0) + return r; + + fsck = SPECIAL_FSCK_ROOT_SERVICE; + dep = "Requires"; + } else { + /* When this is /usr, then let's add a Wants= dependency, otherwise a Requires= + * dependency. Why? We can't possibly unmount /usr during shutdown, but if we have a + * Requires= from /usr onto a fsck@.service unit and that unit is shut down, then + * we'd have to unmount /usr too. */ + + dep = !in_initrd() && path_equal(where, "/usr") ? "Wants" : "Requires"; + + r = unit_name_from_path_instance("systemd-fsck", what, ".service", &_fsck); + if (r < 0) + return log_error_errno(r, "Failed to create fsck service name: %m"); + + fsck = _fsck; + } + + fprintf(f, + "%1$s=%2$s\n" + "After=%2$s\n", + dep, fsck); + } + + return 0; +} + +int generator_write_timeouts( + const char *dir, + const char *what, + const char *where, + const char *opts, + char **filtered) { + + /* Configure how long we wait for a device that backs a mount point or a + * swap partition to show up. This is useful to support endless device timeouts + * for devices that show up only after user input, like crypto devices. */ + + _cleanup_free_ char *node = NULL, *unit = NULL, *timeout = NULL; + usec_t u; + int r; + + r = fstab_filter_options(opts, "comment=systemd.device-timeout\0" + "x-systemd.device-timeout\0", + NULL, &timeout, filtered); + if (r <= 0) + return r; + + r = parse_sec_fix_0(timeout, &u); + if (r < 0) { + log_warning("Failed to parse timeout for %s, ignoring: %s", where, timeout); + return 0; + } + + node = fstab_node_to_udev_node(what); + if (!node) + return log_oom(); + if (!is_device_path(node)) { + log_warning("x-systemd.device-timeout ignored for %s", what); + return 0; + } + + r = unit_name_from_path(node, ".device", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path: %m"); + + return write_drop_in_format(dir, unit, 50, "device-timeout", + "# Automatically generated by %s\n" + "# from supplied options \"%s\"\n\n" + "[Unit]\n" + "JobRunningTimeoutSec=%s", + program_invocation_short_name, + opts, + timeout); +} + +int generator_write_device_deps( + const char *dir, + const char *what, + const char *where, + const char *opts) { + + /* fstab records that specify _netdev option should apply the network + * ordering on the actual device depending on network connection. If we + * are not mounting real device (NFS, CIFS), we rely on _netdev effect + * on the mount unit itself. */ + + _cleanup_free_ char *node = NULL, *unit = NULL; + int r; + + if (fstab_is_extrinsic(where, opts)) + return 0; + + if (!fstab_test_option(opts, "_netdev\0")) + return 0; + + node = fstab_node_to_udev_node(what); + if (!node) + return log_oom(); + + /* Nothing to apply dependencies to. */ + if (!is_device_path(node)) + return 0; + + r = unit_name_from_path(node, ".device", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path \"%s\": %m", + node); + + /* See mount_add_default_dependencies for explanation why we create such + * dependencies. */ + return write_drop_in_format(dir, unit, 50, "netdev-dependencies", + "# Automatically generated by %s\n\n" + "[Unit]\n" + "After=" SPECIAL_NETWORK_ONLINE_TARGET " " SPECIAL_NETWORK_TARGET "\n" + "Wants=" SPECIAL_NETWORK_ONLINE_TARGET "\n", + program_invocation_short_name); +} + +int generator_write_initrd_root_device_deps(const char *dir, const char *what) { + _cleanup_free_ char *unit = NULL; + int r; + + r = unit_name_from_path(what, ".device", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path \"%s\": %m", + what); + + return write_drop_in_format(dir, SPECIAL_INITRD_ROOT_DEVICE_TARGET, 50, "root-device", + "# Automatically generated by %s\n\n" + "[Unit]\n" + "Requires=%s\n" + "After=%s", + program_invocation_short_name, + unit, + unit); +} + +int generator_hook_up_mkswap( + const char *dir, + const char *what) { + + _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL; + _cleanup_fclose_ FILE *f = NULL; + const char *unit_file; + int r; + + node = fstab_node_to_udev_node(what); + if (!node) + return log_oom(); + + /* Nothing to work on. */ + if (!is_device_path(node)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Cannot format something that is not a device node: %s", + node); + + r = unit_name_from_path_instance("systemd-mkswap", node, ".service", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m", + node); + + unit_file = prefix_roota(dir, unit); + log_debug("Creating %s", unit_file); + + escaped = cescape(node); + if (!escaped) + return log_oom(); + + r = unit_name_from_path(what, ".swap", &where_unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path \"%s\": %m", + what); + + f = fopen(unit_file, "wxe"); + if (!f) + return log_error_errno(errno, "Failed to create unit file %s: %m", + unit_file); + + fprintf(f, + "# Automatically generated by %s\n\n" + "[Unit]\n" + "Description=Make Swap on %%f\n" + "Documentation=man:systemd-mkswap@.service(8)\n" + "DefaultDependencies=no\n" + "BindsTo=%%i.device\n" + "Conflicts=shutdown.target\n" + "After=%%i.device\n" + "Before=shutdown.target %s\n" + "\n" + "[Service]\n" + "Type=oneshot\n" + "RemainAfterExit=yes\n" + "ExecStart="SYSTEMD_MAKEFS_PATH " swap %s\n" + "TimeoutSec=0\n", + program_invocation_short_name, + where_unit, + escaped); + + r = fflush_and_check(f); + if (r < 0) + return log_error_errno(r, "Failed to write unit file %s: %m", unit_file); + + return generator_add_symlink(dir, where_unit, "requires", unit); +} + +int generator_hook_up_mkfs( + const char *dir, + const char *what, + const char *where, + const char *type) { + + _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL; + _cleanup_fclose_ FILE *f = NULL; + const char *unit_file; + int r; + + node = fstab_node_to_udev_node(what); + if (!node) + return log_oom(); + + /* Nothing to work on. */ + if (!is_device_path(node)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Cannot format something that is not a device node: %s", + node); + + if (!type || streq(type, "auto")) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Cannot format partition %s, filesystem type is not specified", + node); + + r = unit_name_from_path_instance("systemd-makefs", node, ".service", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m", + node); + + unit_file = prefix_roota(dir, unit); + log_debug("Creating %s", unit_file); + + escaped = cescape(node); + if (!escaped) + return log_oom(); + + r = unit_name_from_path(where, ".mount", &where_unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path \"%s\": %m", + where); + + f = fopen(unit_file, "wxe"); + if (!f) + return log_error_errno(errno, "Failed to create unit file %s: %m", + unit_file); + + fprintf(f, + "# Automatically generated by %s\n\n" + "[Unit]\n" + "Description=Make File System on %%f\n" + "Documentation=man:systemd-makefs@.service(8)\n" + "DefaultDependencies=no\n" + "BindsTo=%%i.device\n" + "Conflicts=shutdown.target\n" + "After=%%i.device\n" + /* fsck might or might not be used, so let's be safe and order + * ourselves before both systemd-fsck@.service and the mount unit. */ + "Before=shutdown.target systemd-fsck@%%i.service %s\n" + "\n" + "[Service]\n" + "Type=oneshot\n" + "RemainAfterExit=yes\n" + "ExecStart="SYSTEMD_MAKEFS_PATH " %s %s\n" + "TimeoutSec=0\n", + program_invocation_short_name, + where_unit, + type, + escaped); + // XXX: what about local-fs-pre.target? + + r = fflush_and_check(f); + if (r < 0) + return log_error_errno(r, "Failed to write unit file %s: %m", unit_file); + + return generator_add_symlink(dir, where_unit, "requires", unit); +} + +int generator_hook_up_growfs( + const char *dir, + const char *where, + const char *target) { + + _cleanup_free_ char *unit = NULL, *escaped = NULL, *where_unit = NULL; + _cleanup_fclose_ FILE *f = NULL; + const char *unit_file; + int r; + + escaped = cescape(where); + if (!escaped) + return log_oom(); + + r = unit_name_from_path_instance("systemd-growfs", where, ".service", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m", + where); + + r = unit_name_from_path(where, ".mount", &where_unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path \"%s\": %m", + where); + + unit_file = prefix_roota(dir, unit); + log_debug("Creating %s", unit_file); + + f = fopen(unit_file, "wxe"); + if (!f) + return log_error_errno(errno, "Failed to create unit file %s: %m", + unit_file); + + fprintf(f, + "# Automatically generated by %s\n\n" + "[Unit]\n" + "Description=Grow File System on %%f\n" + "Documentation=man:systemd-growfs@.service(8)\n" + "DefaultDependencies=no\n" + "BindsTo=%%i.mount\n" + "Conflicts=shutdown.target\n" + "After=%%i.mount\n" + "Before=shutdown.target %s\n", + program_invocation_short_name, + target); + + if (empty_or_root(where)) /* Make sure the root fs is actually writable before we resize it */ + fprintf(f, + "After=systemd-remount-fs.service\n"); + + fprintf(f, + "\n" + "[Service]\n" + "Type=oneshot\n" + "RemainAfterExit=yes\n" + "ExecStart="SYSTEMD_GROWFS_PATH " %s\n" + "TimeoutSec=0\n", + escaped); + + return generator_add_symlink(dir, where_unit, "wants", unit); +} + +int generator_enable_remount_fs_service(const char *dir) { + /* Pull in systemd-remount-fs.service */ + return generator_add_symlink(dir, SPECIAL_LOCAL_FS_TARGET, "wants", + SYSTEM_DATA_UNIT_PATH "/" SPECIAL_REMOUNT_FS_SERVICE); +} + +int generator_write_blockdev_dependency( + FILE *f, + const char *what) { + + _cleanup_free_ char *escaped = NULL; + int r; + + assert(f); + assert(what); + + if (!path_startswith(what, "/dev/")) + return 0; + + r = unit_name_path_escape(what, &escaped); + if (r < 0) + return log_error_errno(r, "Failed to escape device node path %s: %m", what); + + fprintf(f, + "After=blockdev@%s.target\n", + escaped); + + return 0; +} + +int generator_write_cryptsetup_unit_section( + FILE *f, + const char *source) { + + assert(f); + + fprintf(f, + "[Unit]\n" + "Description=Cryptography Setup for %%I\n" + "Documentation=man:crypttab(5) man:systemd-cryptsetup-generator(8) man:systemd-cryptsetup@.service(8)\n"); + + if (source) + fprintf(f, "SourcePath=%s\n", source); + + fprintf(f, + "DefaultDependencies=no\n" + "IgnoreOnIsolate=true\n" + "After=cryptsetup-pre.target systemd-udevd-kernel.socket\n" + "Before=blockdev@dev-mapper-%%i.target\n" + "Wants=blockdev@dev-mapper-%%i.target\n"); + + return 0; +} + +int generator_write_cryptsetup_service_section( + FILE *f, + const char *name, + const char *what, + const char *password, + const char *options) { + + _cleanup_free_ char *name_escaped = NULL, *what_escaped = NULL, *password_escaped = NULL, *options_escaped = NULL; + + assert(f); + assert(name); + assert(what); + + name_escaped = specifier_escape(name); + if (!name_escaped) + return log_oom(); + + what_escaped = specifier_escape(what); + if (!what_escaped) + return log_oom(); + + if (password) { + password_escaped = specifier_escape(password); + if (!password_escaped) + return log_oom(); + } + + if (options) { + options_escaped = specifier_escape(options); + if (!options_escaped) + return log_oom(); + } + + fprintf(f, + "\n" + "[Service]\n" + "Type=oneshot\n" + "RemainAfterExit=yes\n" + "TimeoutSec=0\n" /* The binary handles timeouts on its own */ + "KeyringMode=shared\n" /* Make sure we can share cached keys among instances */ + "OOMScoreAdjust=500\n" /* Unlocking can allocate a lot of memory if Argon2 is used */ + "ExecStart=" SYSTEMD_CRYPTSETUP_PATH " attach '%s' '%s' '%s' '%s'\n" + "ExecStop=" SYSTEMD_CRYPTSETUP_PATH " detach '%s'\n", + name_escaped, what_escaped, strempty(password_escaped), strempty(options_escaped), + name_escaped); + + return 0; +} + +void log_setup_generator(void) { + /* Disable talking to syslog/journal (i.e. the two IPC-based loggers) if we run in system context. */ + if (cg_pid_get_owner_uid(0, NULL) == -ENXIO /* not running in a per-user slice */) + log_set_prohibit_ipc(true); + + log_set_target(LOG_TARGET_JOURNAL_OR_KMSG); /* This effectively means: journal for per-user generators, kmsg otherwise */ + log_parse_environment(); + (void) log_open(); +} diff --git a/src/shared/generator.h b/src/shared/generator.h new file mode 100644 index 0000000..ff6072f --- /dev/null +++ b/src/shared/generator.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdio.h> + +#include "main-func.h" + +int generator_open_unit_file( + const char *dest, + const char *source, + const char *name, + FILE **file); + +int generator_add_symlink(const char *dir, const char *dst, const char *dep_type, const char *src); + +int generator_write_fsck_deps( + FILE *f, + const char *dir, + const char *what, + const char *where, + const char *type); + +int generator_write_timeouts( + const char *dir, + const char *what, + const char *where, + const char *opts, + char **filtered); + +int generator_write_blockdev_dependency( + FILE *f, + const char *what); + +int generator_write_cryptsetup_unit_section( + FILE *f, + const char *source); + +int generator_write_cryptsetup_service_section( + FILE *f, + const char *name, + const char *what, + const char *password, + const char *options); + +int generator_write_device_deps( + const char *dir, + const char *what, + const char *where, + const char *opts); + +int generator_write_initrd_root_device_deps( + const char *dir, + const char *what); + +int generator_hook_up_mkswap( + const char *dir, + const char *what); +int generator_hook_up_mkfs( + const char *dir, + const char *what, + const char *where, + const char *type); +int generator_hook_up_growfs( + const char *dir, + const char *where, + const char *target); + +int generator_enable_remount_fs_service(const char *dir); + +void log_setup_generator(void); + +/* Similar to DEFINE_MAIN_FUNCTION, but initializes logging and assigns positional arguments. */ +#define DEFINE_MAIN_GENERATOR_FUNCTION(impl) \ + _DEFINE_MAIN_FUNCTION( \ + ({ \ + log_setup_generator(); \ + if (argc > 1 && argc != 4) \ + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), \ + "This program takes zero or three arguments."); \ + }), \ + impl(argc > 1 ? argv[1] : "/tmp", \ + argc > 1 ? argv[2] : "/tmp", \ + argc > 1 ? argv[3] : "/tmp"), \ + r < 0 ? EXIT_FAILURE : EXIT_SUCCESS) diff --git a/src/shared/geneve-util.c b/src/shared/geneve-util.c new file mode 100644 index 0000000..36ef9c8 --- /dev/null +++ b/src/shared/geneve-util.c @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "geneve-util.h" +#include "string-table.h" + +static const char* const geneve_df_table[_NETDEV_GENEVE_DF_MAX] = { + [NETDEV_GENEVE_DF_UNSET] = "unset", + [NETDEV_GENEVE_DF_SET] = "set", + [NETDEV_GENEVE_DF_INHERIT] = "inherit", +}; + +DEFINE_STRING_TABLE_LOOKUP(geneve_df, GeneveDF); diff --git a/src/shared/geneve-util.h b/src/shared/geneve-util.h new file mode 100644 index 0000000..3865f80 --- /dev/null +++ b/src/shared/geneve-util.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <linux/if_link.h> + +#include "conf-parser.h" + +typedef enum GeneveDF { + NETDEV_GENEVE_DF_UNSET = GENEVE_DF_UNSET, + NETDEV_GENEVE_DF_SET = GENEVE_DF_SET, + NETDEV_GENEVE_DF_INHERIT = GENEVE_DF_INHERIT, + _NETDEV_GENEVE_DF_MAX, + _NETDEV_GENEVE_DF_INVALID = -1, +} GeneveDF; + +const char *geneve_df_to_string(GeneveDF d) _const_; +GeneveDF geneve_df_from_string(const char *d) _pure_; diff --git a/src/shared/gpt.c b/src/shared/gpt.c new file mode 100644 index 0000000..15ea2f0 --- /dev/null +++ b/src/shared/gpt.c @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "gpt.h" +#include "string-util.h" + +const GptPartitionType gpt_partition_type_table[] = { + { GPT_ROOT_X86, "root-x86" }, + { GPT_ROOT_X86_VERITY, "root-x86-verity" }, + { GPT_ROOT_X86_64, "root-x86-64" }, + { GPT_ROOT_X86_64_VERITY, "root-x86-64-verity" }, + { GPT_ROOT_ARM, "root-arm" }, + { GPT_ROOT_ARM_VERITY, "root-arm-verity" }, + { GPT_ROOT_ARM_64, "root-arm64" }, + { GPT_ROOT_ARM_64_VERITY, "root-arm64-verity" }, + { GPT_ROOT_IA64, "root-ia64" }, + { GPT_ROOT_IA64_VERITY, "root-ia64-verity" }, + { GPT_ROOT_RISCV32, "root-riscv32" }, + { GPT_ROOT_RISCV32_VERITY, "root-riscv32-verity" }, + { GPT_ROOT_RISCV64, "root-riscv64" }, + { GPT_ROOT_RISCV64_VERITY, "root-riscv64-verity" }, +#ifdef GPT_ROOT_NATIVE + { GPT_ROOT_NATIVE, "root" }, + { GPT_ROOT_NATIVE_VERITY, "root-verity" }, +#endif +#ifdef GPT_ROOT_SECONDARY + { GPT_ROOT_SECONDARY, "root-secondary" }, + { GPT_ROOT_SECONDARY_VERITY, "root-secondary-verity" }, +#endif + { GPT_USR_X86, "usr-x86" }, + { GPT_USR_X86_VERITY, "usr-x86-verity" }, + { GPT_USR_X86_64, "usr-x86-64" }, + { GPT_USR_X86_64_VERITY, "usr-x86-64-verity" }, + { GPT_USR_ARM, "usr-arm" }, + { GPT_USR_ARM_VERITY, "usr-arm-verity" }, + { GPT_USR_ARM_64, "usr-arm64" }, + { GPT_USR_ARM_64_VERITY, "usr-arm64-verity" }, + { GPT_USR_IA64, "usr-ia64" }, + { GPT_USR_IA64_VERITY, "usr-ia64-verity" }, + { GPT_USR_RISCV32, "usr-riscv32" }, + { GPT_USR_RISCV32_VERITY, "usr-riscv32-verity" }, + { GPT_USR_RISCV64, "usr-riscv64" }, + { GPT_USR_RISCV64_VERITY, "usr-riscv64-verity" }, +#ifdef GPT_USR_NATIVE + { GPT_USR_NATIVE, "usr" }, + { GPT_USR_NATIVE_VERITY, "usr-verity" }, +#endif +#ifdef GPT_USR_SECONDARY + { GPT_USR_SECONDARY, "usr-secondary" }, + { GPT_USR_SECONDARY_VERITY, "usr-secondary-verity" }, +#endif + { GPT_ESP, "esp" }, + { GPT_XBOOTLDR, "xbootldr" }, + { GPT_SWAP, "swap" }, + { GPT_HOME, "home" }, + { GPT_SRV, "srv" }, + { GPT_VAR, "var" }, + { GPT_TMP, "tmp" }, + { GPT_USER_HOME, "user-home" }, + { GPT_LINUX_GENERIC, "linux-generic" }, + {} +}; + +const char *gpt_partition_type_uuid_to_string(sd_id128_t id) { + for (size_t i = 0; i < ELEMENTSOF(gpt_partition_type_table) - 1; i++) + if (sd_id128_equal(id, gpt_partition_type_table[i].uuid)) + return gpt_partition_type_table[i].name; + + return NULL; +} + +const char *gpt_partition_type_uuid_to_string_harder( + sd_id128_t id, + char buffer[static ID128_UUID_STRING_MAX]) { + + const char *s; + + assert(buffer); + + s = gpt_partition_type_uuid_to_string(id); + if (s) + return s; + + return id128_to_uuid_string(id, buffer); +} + +int gpt_partition_type_uuid_from_string(const char *s, sd_id128_t *ret) { + assert(s); + assert(ret); + + for (size_t i = 0; i < ELEMENTSOF(gpt_partition_type_table) - 1; i++) + if (streq(s, gpt_partition_type_table[i].name)) { + *ret = gpt_partition_type_table[i].uuid; + return 0; + } + + return sd_id128_from_string(s, ret); +} diff --git a/src/shared/gpt.h b/src/shared/gpt.h new file mode 100644 index 0000000..241ff03 --- /dev/null +++ b/src/shared/gpt.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <endian.h> + +#include "sd-id128.h" + +#include "id128-util.h" + +/* We only support root disk discovery for x86, x86-64, Itanium and ARM for now, since EFI for anything else + * doesn't really exist, and we only care for root partitions on the same disk as the EFI ESP. */ + +#define GPT_ROOT_X86 SD_ID128_MAKE(44,47,95,40,f2,97,41,b2,9a,f7,d1,31,d5,f0,45,8a) +#define GPT_ROOT_X86_64 SD_ID128_MAKE(4f,68,bc,e3,e8,cd,4d,b1,96,e7,fb,ca,f9,84,b7,09) +#define GPT_ROOT_ARM SD_ID128_MAKE(69,da,d7,10,2c,e4,4e,3c,b1,6c,21,a1,d4,9a,be,d3) +#define GPT_ROOT_ARM_64 SD_ID128_MAKE(b9,21,b0,45,1d,f0,41,c3,af,44,4c,6f,28,0d,3f,ae) +#define GPT_ROOT_IA64 SD_ID128_MAKE(99,3d,8d,3d,f8,0e,42,25,85,5a,9d,af,8e,d7,ea,97) +#define GPT_ROOT_RISCV32 SD_ID128_MAKE(60,d5,a7,fe,8e,7d,43,5c,b7,14,3d,d8,16,21,44,e1) +#define GPT_ROOT_RISCV64 SD_ID128_MAKE(72,ec,70,a6,cf,74,40,e6,bd,49,4b,da,08,e8,f2,24) +#define GPT_USR_X86 SD_ID128_MAKE(75,25,0d,76,8c,c6,45,8e,bd,66,bd,47,cc,81,a8,12) +#define GPT_USR_X86_64 SD_ID128_MAKE(84,84,68,0c,95,21,48,c6,9c,11,b0,72,06,56,f6,9e) +#define GPT_USR_ARM SD_ID128_MAKE(7d,03,59,a3,02,b3,4f,0a,86,5c,65,44,03,e7,06,25) +#define GPT_USR_ARM_64 SD_ID128_MAKE(b0,e0,10,50,ee,5f,43,90,94,9a,91,01,b1,71,04,e9) +#define GPT_USR_IA64 SD_ID128_MAKE(43,01,d2,a6,4e,3b,4b,2a,bb,94,9e,0b,2c,42,25,ea) +#define GPT_USR_RISCV32 SD_ID128_MAKE(b9,33,fb,22,5c,3f,4f,91,af,90,e2,bb,0f,a5,07,02) +#define GPT_USR_RISCV64 SD_ID128_MAKE(be,ae,c3,4b,84,42,43,9b,a4,0b,98,43,81,ed,09,7d) +#define GPT_ESP SD_ID128_MAKE(c1,2a,73,28,f8,1f,11,d2,ba,4b,00,a0,c9,3e,c9,3b) +#define GPT_XBOOTLDR SD_ID128_MAKE(bc,13,c2,ff,59,e6,42,62,a3,52,b2,75,fd,6f,71,72) +#define GPT_SWAP SD_ID128_MAKE(06,57,fd,6d,a4,ab,43,c4,84,e5,09,33,c8,4b,4f,4f) +#define GPT_HOME SD_ID128_MAKE(93,3a,c7,e1,2e,b4,4f,13,b8,44,0e,14,e2,ae,f9,15) +#define GPT_SRV SD_ID128_MAKE(3b,8f,84,25,20,e0,4f,3b,90,7f,1a,25,a7,6f,98,e8) +#define GPT_VAR SD_ID128_MAKE(4d,21,b0,16,b5,34,45,c2,a9,fb,5c,16,e0,91,fd,2d) +#define GPT_TMP SD_ID128_MAKE(7e,c6,f5,57,3b,c5,4a,ca,b2,93,16,ef,5d,f6,39,d1) +#define GPT_USER_HOME SD_ID128_MAKE(77,3f,91,ef,66,d4,49,b5,bd,83,d6,83,bf,40,ad,16) +#define GPT_LINUX_GENERIC SD_ID128_MAKE(0f,c6,3d,af,84,83,47,72,8e,79,3d,69,d8,47,7d,e4) + +/* Verity partitions for the root partitions above (we only define them for the root and /usr partitions, + * because only they are are commonly read-only and hence suitable for verity). */ +#define GPT_ROOT_X86_VERITY SD_ID128_MAKE(d1,3c,5d,3b,b5,d1,42,2a,b2,9f,94,54,fd,c8,9d,76) +#define GPT_ROOT_X86_64_VERITY SD_ID128_MAKE(2c,73,57,ed,eb,d2,46,d9,ae,c1,23,d4,37,ec,2b,f5) +#define GPT_ROOT_ARM_VERITY SD_ID128_MAKE(73,86,cd,f2,20,3c,47,a9,a4,98,f2,ec,ce,45,a2,d6) +#define GPT_ROOT_ARM_64_VERITY SD_ID128_MAKE(df,33,00,ce,d6,9f,4c,92,97,8c,9b,fb,0f,38,d8,20) +#define GPT_ROOT_IA64_VERITY SD_ID128_MAKE(86,ed,10,d5,b6,07,45,bb,89,57,d3,50,f2,3d,05,71) +#define GPT_ROOT_RISCV32_VERITY SD_ID128_MAKE(ae,02,53,be,11,67,40,07,ac,68,43,92,6c,14,c5,de) +#define GPT_ROOT_RISCV64_VERITY SD_ID128_MAKE(b6,ed,55,82,44,0b,42,09,b8,da,5f,f7,c4,19,ea,3d) +#define GPT_USR_X86_VERITY SD_ID128_MAKE(8f,46,1b,0d,14,ee,4e,81,9a,a9,04,9b,6f,b9,7a,bd) +#define GPT_USR_X86_64_VERITY SD_ID128_MAKE(77,ff,5f,63,e7,b6,46,33,ac,f4,15,65,b8,64,c0,e6) +#define GPT_USR_ARM_VERITY SD_ID128_MAKE(c2,15,d7,51,7b,cd,46,49,be,90,66,27,49,0a,4c,05) +#define GPT_USR_ARM_64_VERITY SD_ID128_MAKE(6e,11,a4,e7,fb,ca,4d,ed,b9,e9,e1,a5,12,bb,66,4e) +#define GPT_USR_IA64_VERITY SD_ID128_MAKE(6a,49,1e,03,3b,e7,45,45,8e,38,83,32,0e,0e,a8,80) +#define GPT_USR_RISCV32_VERITY SD_ID128_MAKE(cb,1e,e4,e3,8c,d0,41,36,a0,a4,aa,61,a3,2e,87,30) +#define GPT_USR_RISCV64_VERITY SD_ID128_MAKE(8f,10,56,be,9b,05,47,c4,81,d6,be,53,12,8e,5b,54) + +#if defined(__x86_64__) +# define GPT_ROOT_NATIVE GPT_ROOT_X86_64 +# define GPT_ROOT_SECONDARY GPT_ROOT_X86 +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_64_VERITY +# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_X86_VERITY +# define GPT_USR_NATIVE GPT_USR_X86_64 +# define GPT_USR_SECONDARY GPT_USR_X86 +# define GPT_USR_NATIVE_VERITY GPT_USR_X86_64_VERITY +# define GPT_USR_SECONDARY_VERITY GPT_USR_X86_VERITY +#elif defined(__i386__) +# define GPT_ROOT_NATIVE GPT_ROOT_X86 +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_VERITY +# define GPT_USR_NATIVE GPT_USR_X86 +# define GPT_USR_NATIVE_VERITY GPT_USR_X86_VERITY +#endif + +#if defined(__ia64__) +# define GPT_ROOT_NATIVE GPT_ROOT_IA64 +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_IA64_VERITY +# define GPT_USR_NATIVE GPT_USR_IA64 +# define GPT_USR_NATIVE_VERITY GPT_USR_IA64_VERITY +#endif + +#if defined(__aarch64__) && (__BYTE_ORDER != __BIG_ENDIAN) +# define GPT_ROOT_NATIVE GPT_ROOT_ARM_64 +# define GPT_ROOT_SECONDARY GPT_ROOT_ARM +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_64_VERITY +# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_ARM_VERITY +# define GPT_USR_NATIVE GPT_USR_ARM_64 +# define GPT_USR_SECONDARY GPT_USR_ARM +# define GPT_USR_NATIVE_VERITY GPT_USR_ARM_64_VERITY +# define GPT_USR_SECONDARY_VERITY GPT_USR_ARM_VERITY +#elif defined(__arm__) && (__BYTE_ORDER != __BIG_ENDIAN) +# define GPT_ROOT_NATIVE GPT_ROOT_ARM +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_VERITY +# define GPT_USR_NATIVE GPT_USR_ARM +# define GPT_USR_NATIVE_VERITY GPT_USR_ARM_VERITY +#endif + +#if defined(__riscv) +#if (__riscv_xlen == 32) +# define GPT_ROOT_NATIVE GPT_ROOT_RISCV32 +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_RISCV32_VERITY +# define GPT_USR_NATIVE GPT_USR_RISCV32 +# define GPT_USR_NATIVE_VERITY GPT_USR_RISCV32_VERITY +#elif (__riscv_xlen == 64) +# define GPT_ROOT_NATIVE GPT_ROOT_RISCV64 +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_RISCV64_VERITY +# define GPT_USR_NATIVE GPT_USR_RISCV64 +# define GPT_USR_NATIVE_VERITY GPT_USR_RISCV64_VERITY +#endif +#endif + +#define GPT_FLAG_REQUIRED_PARTITION (1ULL << 0) +#define GPT_FLAG_NO_BLOCK_IO_PROTOCOL (1ULL << 1) +#define GPT_FLAG_LEGACY_BIOS_BOOTABLE (1ULL << 2) + +/* Flags we recognize on the root, usr, xbootldr, swap, home, srv, var, tmp partitions when doing + * auto-discovery. These happen to be identical to what Microsoft defines for its own Basic Data Partitions, + * but that's just because we saw no point in defining any other values here. */ +#define GPT_FLAG_READ_ONLY (1ULL << 60) +#define GPT_FLAG_NO_AUTO (1ULL << 63) + +const char *gpt_partition_type_uuid_to_string(sd_id128_t id); +const char *gpt_partition_type_uuid_to_string_harder( + sd_id128_t id, + char buffer[static ID128_UUID_STRING_MAX]); +int gpt_partition_type_uuid_from_string(const char *s, sd_id128_t *ret); + +typedef struct GptPartitionType { + sd_id128_t uuid; + const char *name; +} GptPartitionType; + +extern const GptPartitionType gpt_partition_type_table[]; diff --git a/src/shared/group-record.c b/src/shared/group-record.c new file mode 100644 index 0000000..da3ed0a --- /dev/null +++ b/src/shared/group-record.c @@ -0,0 +1,348 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "group-record.h" +#include "strv.h" +#include "user-util.h" + +GroupRecord* group_record_new(void) { + GroupRecord *h; + + h = new(GroupRecord, 1); + if (!h) + return NULL; + + *h = (GroupRecord) { + .n_ref = 1, + .disposition = _USER_DISPOSITION_INVALID, + .last_change_usec = UINT64_MAX, + .gid = GID_INVALID, + }; + + return h; +} + +static GroupRecord *group_record_free(GroupRecord *g) { + if (!g) + return NULL; + + free(g->group_name); + free(g->realm); + free(g->group_name_and_realm_auto); + free(g->description); + + strv_free(g->members); + free(g->service); + strv_free(g->administrators); + strv_free_erase(g->hashed_password); + + json_variant_unref(g->json); + + return mfree(g); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(GroupRecord, group_record, group_record_free); + +static int dispatch_privileged(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + + static const JsonDispatch privileged_dispatch_table[] = { + { "hashedPassword", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(GroupRecord, hashed_password), JSON_SAFE }, + {}, + }; + + return json_dispatch(variant, privileged_dispatch_table, NULL, flags, userdata); +} + +static int dispatch_binding(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + + static const JsonDispatch binding_dispatch_table[] = { + { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(GroupRecord, gid), 0 }, + {}, + }; + + char smid[SD_ID128_STRING_MAX]; + JsonVariant *m; + sd_id128_t mid; + int r; + + if (!variant) + return 0; + + if (!json_variant_is_object(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name)); + + r = sd_id128_get_machine(&mid); + if (r < 0) + return json_log(variant, flags, r, "Failed to determine machine ID: %m"); + + m = json_variant_by_key(variant, sd_id128_to_string(mid, smid)); + if (!m) + return 0; + + return json_dispatch(m, binding_dispatch_table, NULL, flags, userdata); +} + +static int dispatch_per_machine(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + + static const JsonDispatch per_machine_dispatch_table[] = { + { "matchMachineId", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 }, + { "matchHostname", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 }, + { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(GroupRecord, gid), 0 }, + { "members", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, members), JSON_RELAX}, + { "administrators", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, administrators), JSON_RELAX}, + {}, + }; + + JsonVariant *e; + int r; + + if (!variant) + return 0; + + if (!json_variant_is_array(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name)); + + JSON_VARIANT_ARRAY_FOREACH(e, variant) { + bool matching = false; + JsonVariant *m; + + if (!json_variant_is_object(e)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name)); + + m = json_variant_by_key(e, "matchMachineId"); + if (m) { + r = per_machine_id_match(m, flags); + if (r < 0) + return r; + + matching = r > 0; + } + + if (!matching) { + m = json_variant_by_key(e, "matchHostname"); + if (m) { + r = per_machine_hostname_match(m, flags); + if (r < 0) + return r; + + matching = r > 0; + } + } + + if (!matching) + continue; + + r = json_dispatch(e, per_machine_dispatch_table, NULL, flags, userdata); + if (r < 0) + return r; + } + + return 0; +} + +static int dispatch_status(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + + static const JsonDispatch status_dispatch_table[] = { + { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(GroupRecord, service), JSON_SAFE }, + {}, + }; + + char smid[SD_ID128_STRING_MAX]; + JsonVariant *m; + sd_id128_t mid; + int r; + + if (!variant) + return 0; + + if (!json_variant_is_object(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name)); + + r = sd_id128_get_machine(&mid); + if (r < 0) + return json_log(variant, flags, r, "Failed to determine machine ID: %m"); + + m = json_variant_by_key(variant, sd_id128_to_string(mid, smid)); + if (!m) + return 0; + + return json_dispatch(m, status_dispatch_table, NULL, flags, userdata); +} + +static int group_record_augment(GroupRecord *h, JsonDispatchFlags json_flags) { + assert(h); + + if (!FLAGS_SET(h->mask, USER_RECORD_REGULAR)) + return 0; + + assert(h->group_name); + + if (!h->group_name_and_realm_auto && h->realm) { + h->group_name_and_realm_auto = strjoin(h->group_name, "@", h->realm); + if (!h->group_name_and_realm_auto) + return json_log_oom(h->json, json_flags); + } + + return 0; +} + +int group_record_load( + GroupRecord *h, + JsonVariant *v, + UserRecordLoadFlags load_flags) { + + static const JsonDispatch group_dispatch_table[] = { + { "groupName", JSON_VARIANT_STRING, json_dispatch_user_group_name, offsetof(GroupRecord, group_name), JSON_RELAX}, + { "realm", JSON_VARIANT_STRING, json_dispatch_realm, offsetof(GroupRecord, realm), 0 }, + { "description", JSON_VARIANT_STRING, json_dispatch_gecos, offsetof(GroupRecord, description), 0 }, + { "disposition", JSON_VARIANT_STRING, json_dispatch_user_disposition, offsetof(GroupRecord, disposition), 0 }, + { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(GroupRecord, service), JSON_SAFE }, + { "lastChangeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(GroupRecord, last_change_usec), 0 }, + { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(GroupRecord, gid), 0 }, + { "members", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, members), JSON_RELAX}, + { "administrators", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, administrators), JSON_RELAX}, + + { "privileged", JSON_VARIANT_OBJECT, dispatch_privileged, 0, 0 }, + + /* Not defined for now, for groups, but let's at least generate sensible errors about it */ + { "secret", JSON_VARIANT_OBJECT, json_dispatch_unsupported, 0, 0 }, + + /* Ignore the perMachine, binding and status stuff here, and process it later, so that it overrides whatever is set above */ + { "perMachine", JSON_VARIANT_ARRAY, NULL, 0, 0 }, + { "binding", JSON_VARIANT_OBJECT, NULL, 0, 0 }, + { "status", JSON_VARIANT_OBJECT, NULL, 0, 0 }, + + /* Ignore 'signature', we check it with explicit accessors instead */ + { "signature", JSON_VARIANT_ARRAY, NULL, 0, 0 }, + {}, + }; + + JsonDispatchFlags json_flags = USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(load_flags); + int r; + + assert(h); + assert(!h->json); + + /* Note that this call will leave a half-initialized record around on failure! */ + + if ((USER_RECORD_REQUIRE_MASK(load_flags) & (USER_RECORD_SECRET|USER_RECORD_PRIVILEGED))) + return json_log(v, json_flags, SYNTHETIC_ERRNO(EINVAL), "Secret and privileged section currently not available for groups, refusing."); + + r = user_group_record_mangle(v, load_flags, &h->json, &h->mask); + if (r < 0) + return r; + + r = json_dispatch(h->json, group_dispatch_table, NULL, json_flags, h); + if (r < 0) + return r; + + /* During the parsing operation above we ignored the 'perMachine', 'binding' and 'status' fields, since we want + * them to override the global options. Let's process them now. */ + + r = dispatch_per_machine("perMachine", json_variant_by_key(h->json, "perMachine"), json_flags, h); + if (r < 0) + return r; + + r = dispatch_binding("binding", json_variant_by_key(h->json, "binding"), json_flags, h); + if (r < 0) + return r; + + r = dispatch_status("status", json_variant_by_key(h->json, "status"), json_flags, h); + if (r < 0) + return r; + + if (FLAGS_SET(h->mask, USER_RECORD_REGULAR) && !h->group_name) + return json_log(h->json, json_flags, SYNTHETIC_ERRNO(EINVAL), "Group name field missing, refusing."); + + r = group_record_augment(h, json_flags); + if (r < 0) + return r; + + return 0; +} + +int group_record_build(GroupRecord **ret, ...) { + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + _cleanup_(group_record_unrefp) GroupRecord *g = NULL; + va_list ap; + int r; + + assert(ret); + + va_start(ap, ret); + r = json_buildv(&v, ap); + va_end(ap); + + if (r < 0) + return r; + + g = group_record_new(); + if (!g) + return -ENOMEM; + + r = group_record_load(g, v, USER_RECORD_LOAD_FULL); + if (r < 0) + return r; + + *ret = TAKE_PTR(g); + return 0; +} + +const char *group_record_group_name_and_realm(GroupRecord *h) { + assert(h); + + /* Return the pre-initialized joined string if it is defined */ + if (h->group_name_and_realm_auto) + return h->group_name_and_realm_auto; + + /* If it's not defined then we cannot have a realm */ + assert(!h->realm); + return h->group_name; +} + +UserDisposition group_record_disposition(GroupRecord *h) { + assert(h); + + if (h->disposition >= 0) + return h->disposition; + + /* If not declared, derive from GID */ + + if (!gid_is_valid(h->gid)) + return _USER_DISPOSITION_INVALID; + + if (h->gid == 0 || h->gid == GID_NOBODY) + return USER_INTRINSIC; + + if (gid_is_system(h->gid)) + return USER_SYSTEM; + + if (gid_is_dynamic(h->gid)) + return USER_DYNAMIC; + + if (gid_is_container(h->gid)) + return USER_CONTAINER; + + if (h->gid > INT32_MAX) + return USER_RESERVED; + + return USER_REGULAR; +} + +int group_record_clone(GroupRecord *h, UserRecordLoadFlags flags, GroupRecord **ret) { + _cleanup_(group_record_unrefp) GroupRecord *c = NULL; + int r; + + assert(h); + assert(ret); + + c = group_record_new(); + if (!c) + return -ENOMEM; + + r = group_record_load(c, h->json, flags); + if (r < 0) + return r; + + *ret = TAKE_PTR(c); + return 0; +} diff --git a/src/shared/group-record.h b/src/shared/group-record.h new file mode 100644 index 0000000..f810204 --- /dev/null +++ b/src/shared/group-record.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "json.h" +#include "user-record.h" + +typedef struct GroupRecord { + unsigned n_ref; + UserRecordMask mask; + bool incomplete; + + char *group_name; + char *realm; + char *group_name_and_realm_auto; + + char *description; + + UserDisposition disposition; + uint64_t last_change_usec; + + gid_t gid; + + char **members; + + char *service; + + /* The following exist mostly so that we can cover the full /etc/gshadow set of fields, we currently + * do not actually make use of these */ + char **administrators; /* maps to 'struct sgrp' .sg_adm field */ + char **hashed_password; /* maps to 'struct sgrp' .sg_passwd field */ + + JsonVariant *json; +} GroupRecord; + +GroupRecord* group_record_new(void); +GroupRecord* group_record_ref(GroupRecord *g); +GroupRecord* group_record_unref(GroupRecord *g); + +DEFINE_TRIVIAL_CLEANUP_FUNC(GroupRecord*, group_record_unref); + +int group_record_load(GroupRecord *h, JsonVariant *v, UserRecordLoadFlags flags); +int group_record_build(GroupRecord **ret, ...); +int group_record_clone(GroupRecord *g, UserRecordLoadFlags flags, GroupRecord **ret); + +const char *group_record_group_name_and_realm(GroupRecord *h); +UserDisposition group_record_disposition(GroupRecord *h); diff --git a/src/shared/id128-print.c b/src/shared/id128-print.c new file mode 100644 index 0000000..f232767 --- /dev/null +++ b/src/shared/id128-print.c @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdio.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "id128-print.h" +#include "log.h" +#include "pretty-print.h" +#include "terminal-util.h" + +int id128_pretty_print_sample(const char *name, sd_id128_t id) { + _cleanup_free_ char *man_link = NULL, *mod_link = NULL; + const char *on, *off; + unsigned i; + + on = ansi_highlight(); + off = ansi_normal(); + + if (terminal_urlify("man:systemd-id128(1)", "systemd-id128(1)", &man_link) < 0) + return log_oom(); + + if (terminal_urlify("https://docs.python.org/3/library/uuid.html", "uuid", &mod_link) < 0) + return log_oom(); + + printf("As string:\n" + "%s" SD_ID128_FORMAT_STR "%s\n\n" + "As UUID:\n" + "%s" SD_ID128_UUID_FORMAT_STR "%s\n\n" + "As %s macro:\n" + "%s#define %s SD_ID128_MAKE(", + on, SD_ID128_FORMAT_VAL(id), off, + on, SD_ID128_FORMAT_VAL(id), off, + man_link, + on, name); + for (i = 0; i < 16; i++) + printf("%02x%s", id.bytes[i], i != 15 ? "," : ""); + printf(")%s\n\n", off); + + printf("As Python constant:\n" + ">>> import %s\n" + ">>> %s%s = uuid.UUID('" SD_ID128_FORMAT_STR "')%s\n", + mod_link, + on, name, SD_ID128_FORMAT_VAL(id), off); + + return 0; +} + + +int id128_pretty_print(sd_id128_t id, Id128PrettyPrintMode mode) { + assert(mode >= 0); + assert(mode < _ID128_PRETTY_PRINT_MODE_MAX); + + if (mode == ID128_PRINT_ID128) { + printf(SD_ID128_FORMAT_STR "\n", + SD_ID128_FORMAT_VAL(id)); + return 0; + } else if (mode == ID128_PRINT_UUID) { + printf(SD_ID128_UUID_FORMAT_STR "\n", + SD_ID128_FORMAT_VAL(id)); + return 0; + } else + return id128_pretty_print_sample("XYZ", id); +} + +int id128_print_new(Id128PrettyPrintMode mode) { + sd_id128_t id; + int r; + + r = sd_id128_randomize(&id); + if (r < 0) + return log_error_errno(r, "Failed to generate ID: %m"); + + return id128_pretty_print(id, mode); +} diff --git a/src/shared/id128-print.h b/src/shared/id128-print.h new file mode 100644 index 0000000..d69cb9b --- /dev/null +++ b/src/shared/id128-print.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#pragma once + +#include <stdbool.h> + +#include "sd-id128.h" + +typedef enum Id128PrettyPrintMode { + ID128_PRINT_ID128, + ID128_PRINT_UUID, + ID128_PRINT_PRETTY, + _ID128_PRETTY_PRINT_MODE_MAX, + _ID128_PRETTY_PRINT_MODE_INVALID = -1 +} Id128PrettyPrintMode; + +int id128_pretty_print_sample(const char *name, sd_id128_t id); +int id128_pretty_print(sd_id128_t id, Id128PrettyPrintMode mode); +int id128_print_new(Id128PrettyPrintMode mode); diff --git a/src/shared/idn-util.c b/src/shared/idn-util.c new file mode 100644 index 0000000..83c4b3c --- /dev/null +++ b/src/shared/idn-util.c @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#if HAVE_LIBIDN2 +# include <idn2.h> +#elif HAVE_LIBIDN +# include <idna.h> +# include <stringprep.h> +#endif + +#include "alloc-util.h" +#include "dlfcn-util.h" +#include "idn-util.h" + +#if HAVE_LIBIDN || HAVE_LIBIDN2 +static void* idn_dl = NULL; +#endif + +#if HAVE_LIBIDN2 +int (*sym_idn2_lookup_u8)(const uint8_t* src, uint8_t** lookupname, int flags) = NULL; +const char *(*sym_idn2_strerror)(int rc) = NULL; +int (*sym_idn2_to_unicode_8z8z)(const char * input, char ** output, int flags) = NULL; + +int dlopen_idn(void) { + _cleanup_(dlclosep) void *dl = NULL; + int r; + + if (idn_dl) + return 0; /* Already loaded */ + + dl = dlopen("libidn2.so.0", RTLD_LAZY); + if (!dl) + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "libidn2 support is not installed: %s", dlerror()); + + r = dlsym_many_and_warn( + dl, + LOG_DEBUG, + &sym_idn2_lookup_u8, "idn2_lookup_u8", + &sym_idn2_strerror, "idn2_strerror", + &sym_idn2_to_unicode_8z8z, "idn2_to_unicode_8z8z", + NULL); + if (r < 0) + return r; + + /* Note that we never release the reference here, because there's no real reason to, after all this + * was traditionally a regular shared library dependency which lives forever too. */ + idn_dl = TAKE_PTR(dl); + + return 1; +} +#endif + +#if HAVE_LIBIDN +int (*sym_idna_to_ascii_4i)(const uint32_t * in, size_t inlen, char *out, int flags); +int (*sym_idna_to_unicode_44i)(const uint32_t * in, size_t inlen,uint32_t * out, size_t * outlen, int flags); +char* (*sym_stringprep_ucs4_to_utf8)(const uint32_t * str, ssize_t len, size_t * items_read, size_t * items_written); +uint32_t* (*sym_stringprep_utf8_to_ucs4)(const char *str, ssize_t len, size_t *items_written); + +int dlopen_idn(void) { + _cleanup_(dlclosep) void *dl = NULL; + int r; + + if (idn_dl) + return 0; /* Already loaded */ + + dl = dlopen("libidn.so.12", RTLD_LAZY); + if (!dl) { + /* libidn broke ABI in 1.34, but not in a way we care about (a new field got added to an + * open-coded struct we do not use), hence support both versions. */ + dl = dlopen("libidn.so.11", RTLD_LAZY); + if (!dl) + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "libidn support is not installed: %s", dlerror()); + } + + r = dlsym_many_and_warn( + dl, + LOG_DEBUG, + &sym_idna_to_ascii_4i, "idna_to_ascii_4i", + &sym_idna_to_unicode_44i, "idna_to_unicode_44i", + &sym_stringprep_ucs4_to_utf8, "stringprep_ucs4_to_utf8", + &sym_stringprep_utf8_to_ucs4, "stringprep_utf8_to_ucs4", + NULL); + if (r < 0) + return r; + + idn_dl = TAKE_PTR(dl); + + return 1; +} +#endif diff --git a/src/shared/idn-util.h b/src/shared/idn-util.h new file mode 100644 index 0000000..4698eed --- /dev/null +++ b/src/shared/idn-util.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#if HAVE_LIBIDN2 +# include <idn2.h> +#elif HAVE_LIBIDN +# include <idna.h> +# include <stringprep.h> +#endif + +#include <inttypes.h> + +#if HAVE_LIBIDN2 || HAVE_LIBIDN +int dlopen_idn(void); +#else +static inline int dlopen_idn(void) { + return -EOPNOTSUPP; +} +#endif + +#if HAVE_LIBIDN2 +extern int (*sym_idn2_lookup_u8)(const uint8_t* src, uint8_t** lookupname, int flags); +extern const char *(*sym_idn2_strerror)(int rc); +extern int (*sym_idn2_to_unicode_8z8z)(const char * input, char ** output, int flags); +#endif + +#if HAVE_LIBIDN +extern int (*sym_idna_to_ascii_4i)(const uint32_t * in, size_t inlen, char *out, int flags); +extern int (*sym_idna_to_unicode_44i)(const uint32_t * in, size_t inlen,uint32_t * out, size_t * outlen, int flags); +extern char* (*sym_stringprep_ucs4_to_utf8)(const uint32_t * str, ssize_t len, size_t * items_read, size_t * items_written); +extern uint32_t* (*sym_stringprep_utf8_to_ucs4)(const char *str, ssize_t len, size_t *items_written); +#endif diff --git a/src/shared/ima-util.c b/src/shared/ima-util.c new file mode 100644 index 0000000..e37c9ad --- /dev/null +++ b/src/shared/ima-util.c @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <unistd.h> + +#include "ima-util.h" + +static int use_ima_cached = -1; + +bool use_ima(void) { + + if (use_ima_cached < 0) + use_ima_cached = access("/sys/kernel/security/ima/", F_OK) >= 0; + + return use_ima_cached; +} diff --git a/src/shared/ima-util.h b/src/shared/ima-util.h new file mode 100644 index 0000000..922db78 --- /dev/null +++ b/src/shared/ima-util.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +bool use_ima(void); diff --git a/src/shared/import-util.c b/src/shared/import-util.c new file mode 100644 index 0000000..298c066 --- /dev/null +++ b/src/shared/import-util.c @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> + +#include "alloc-util.h" +#include "btrfs-util.h" +#include "chattr-util.h" +#include "errno-util.h" +#include "import-util.h" +#include "log.h" +#include "macro.h" +#include "nulstr-util.h" +#include "path-util.h" +#include "string-table.h" +#include "string-util.h" + +int import_url_last_component(const char *url, char **ret) { + const char *e, *p; + char *s; + + e = strchrnul(url, '?'); + + while (e > url && e[-1] == '/') + e--; + + p = e; + while (p > url && p[-1] != '/') + p--; + + if (e <= p) + return -EINVAL; + + s = strndup(p, e - p); + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +int import_url_change_last_component(const char *url, const char *suffix, char **ret) { + const char *e; + char *s; + + assert(url); + assert(ret); + + e = strchrnul(url, '?'); + + while (e > url && e[-1] == '/') + e--; + + while (e > url && e[-1] != '/') + e--; + + if (e <= url) + return -EINVAL; + + s = new(char, (e - url) + strlen(suffix) + 1); + if (!s) + return -ENOMEM; + + strcpy(mempcpy(s, url, e - url), suffix); + *ret = s; + return 0; +} + +static const char* const import_verify_table[_IMPORT_VERIFY_MAX] = { + [IMPORT_VERIFY_NO] = "no", + [IMPORT_VERIFY_CHECKSUM] = "checksum", + [IMPORT_VERIFY_SIGNATURE] = "signature", +}; + +DEFINE_STRING_TABLE_LOOKUP(import_verify, ImportVerify); + +int tar_strip_suffixes(const char *name, char **ret) { + const char *e; + char *s; + + e = endswith(name, ".tar"); + if (!e) + e = endswith(name, ".tar.xz"); + if (!e) + e = endswith(name, ".tar.gz"); + if (!e) + e = endswith(name, ".tar.bz2"); + if (!e) + e = endswith(name, ".tgz"); + if (!e) + e = strchr(name, 0); + + if (e <= name) + return -EINVAL; + + s = strndup(name, e - name); + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +int raw_strip_suffixes(const char *p, char **ret) { + + static const char suffixes[] = + ".xz\0" + ".gz\0" + ".bz2\0" + ".raw\0" + ".qcow2\0" + ".img\0" + ".bin\0"; + + _cleanup_free_ char *q = NULL; + + q = strdup(p); + if (!q) + return -ENOMEM; + + for (;;) { + const char *sfx; + bool changed = false; + + NULSTR_FOREACH(sfx, suffixes) { + char *e; + + e = endswith(q, sfx); + if (e) { + *e = 0; + changed = true; + } + } + + if (!changed) + break; + } + + *ret = TAKE_PTR(q); + + return 0; +} + +int import_assign_pool_quota_and_warn(const char *path) { + int r; + + r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true); + if (r == -ENOTTY) { + log_debug_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, as directory is not on btrfs or not a subvolume. Ignoring."); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines: %m"); + if (r > 0) + log_info("Set up default quota hierarchy for /var/lib/machines."); + + r = btrfs_subvol_auto_qgroup(path, 0, true); + if (r == -ENOTTY) { + log_debug_errno(r, "Failed to set up quota hierarchy for %s, as directory is not on btrfs or not a subvolume. Ignoring.", path); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to set up default quota hierarchy for %s: %m", path); + if (r > 0) + log_debug("Set up default quota hierarchy for %s.", path); + + return 0; +} + +int import_set_nocow_and_log(int fd, const char *path) { + int r; + + r = chattr_fd(fd, FS_NOCOW_FL, FS_NOCOW_FL, NULL); + if (r < 0) + return log_full_errno( + ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING, + r, "Failed to set file attributes on %s: %m", path); + + return 0; +} diff --git a/src/shared/import-util.h b/src/shared/import-util.h new file mode 100644 index 0000000..8d017f6 --- /dev/null +++ b/src/shared/import-util.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "macro.h" + +typedef enum ImportVerify { + IMPORT_VERIFY_NO, + IMPORT_VERIFY_CHECKSUM, + IMPORT_VERIFY_SIGNATURE, + _IMPORT_VERIFY_MAX, + _IMPORT_VERIFY_INVALID = -1, +} ImportVerify; + +int import_url_last_component(const char *url, char **ret); +int import_url_change_last_component(const char *url, const char *suffix, char **ret); + +const char* import_verify_to_string(ImportVerify v) _const_; +ImportVerify import_verify_from_string(const char *s) _pure_; + +int tar_strip_suffixes(const char *name, char **ret); +int raw_strip_suffixes(const char *name, char **ret); + +int import_assign_pool_quota_and_warn(const char *path); + +int import_set_nocow_and_log(int fd, const char *path); diff --git a/src/shared/initreq.h b/src/shared/initreq.h new file mode 100644 index 0000000..1bf5b8e --- /dev/null +++ b/src/shared/initreq.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: LGPL-2+ */ +/* + * initreq.h Interface to talk to init through /dev/initctl. + * + * Copyright (C) 1995-2004 Miquel van Smoorenburg + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * Version: @(#)initreq.h 1.28 31-Mar-2004 MvS + */ + +#pragma once + +#include <sys/param.h> + +#if defined(__FreeBSD_kernel__) +# define INIT_FIFO "/etc/.initctl" +#else +# define INIT_FIFO "/dev/initctl" +#endif + +#define INIT_MAGIC 0x03091969 +#define INIT_CMD_START 0 +#define INIT_CMD_RUNLVL 1 +#define INIT_CMD_POWERFAIL 2 +#define INIT_CMD_POWERFAILNOW 3 +#define INIT_CMD_POWEROK 4 +#define INIT_CMD_BSD 5 +#define INIT_CMD_SETENV 6 +#define INIT_CMD_UNSETENV 7 + +#define INIT_CMD_CHANGECONS 12345 + +#ifdef MAXHOSTNAMELEN +# define INITRQ_HLEN MAXHOSTNAMELEN +#else +# define INITRQ_HLEN 64 +#endif + +/* + * This is what BSD 4.4 uses when talking to init. + * Linux doesn't use this right now. + */ +struct init_request_bsd { + char gen_id[8]; /* Beats me.. telnetd uses "fe" */ + char tty_id[16]; /* Tty name minus /dev/tty */ + char host[INITRQ_HLEN]; /* Hostname */ + char term_type[16]; /* Terminal type */ + int signal; /* Signal to send */ + int pid; /* Process to send to */ + char exec_name[128]; /* Program to execute */ + char reserved[128]; /* For future expansion. */ +}; + +/* + * Because of legacy interfaces, "runlevel" and "sleeptime" + * aren't in a separate struct in the union. + * + * The weird sizes are because init expects the whole + * struct to be 384 bytes. + */ +struct init_request { + int magic; /* Magic number */ + int cmd; /* What kind of request */ + int runlevel; /* Runlevel to change to */ + int sleeptime; /* Time between TERM and KILL */ + union { + struct init_request_bsd bsd; + char data[368]; + } i; +}; diff --git a/src/shared/install-printf.c b/src/shared/install-printf.c new file mode 100644 index 0000000..6bc3f15 --- /dev/null +++ b/src/shared/install-printf.c @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdio.h> +#include <unistd.h> + +#include "format-util.h" +#include "install-printf.h" +#include "install.h" +#include "macro.h" +#include "specifier.h" +#include "string-util.h" +#include "unit-name.h" +#include "user-util.h" + +static int specifier_prefix_and_instance(char specifier, const void *data, const void *userdata, char **ret) { + const UnitFileInstallInfo *i = userdata; + _cleanup_free_ char *prefix = NULL; + int r; + + assert(i); + + r = unit_name_to_prefix_and_instance(i->name, &prefix); + if (r < 0) + return r; + + if (endswith(prefix, "@") && i->default_instance) { + char *ans; + + ans = strjoin(prefix, i->default_instance); + if (!ans) + return -ENOMEM; + *ret = ans; + } else + *ret = TAKE_PTR(prefix); + + return 0; +} + +static int specifier_name(char specifier, const void *data, const void *userdata, char **ret) { + const UnitFileInstallInfo *i = userdata; + char *ans; + + assert(i); + + if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE) && i->default_instance) + return unit_name_replace_instance(i->name, i->default_instance, ret); + + ans = strdup(i->name); + if (!ans) + return -ENOMEM; + *ret = ans; + return 0; +} + +static int specifier_prefix(char specifier, const void *data, const void *userdata, char **ret) { + const UnitFileInstallInfo *i = userdata; + + assert(i); + + return unit_name_to_prefix(i->name, ret); +} + +static int specifier_instance(char specifier, const void *data, const void *userdata, char **ret) { + const UnitFileInstallInfo *i = userdata; + char *instance; + int r; + + assert(i); + + r = unit_name_to_instance(i->name, &instance); + if (r < 0) + return r; + + if (isempty(instance)) { + r = free_and_strdup(&instance, strempty(i->default_instance)); + if (r < 0) + return r; + } + + *ret = instance; + return 0; +} + +static int specifier_last_component(char specifier, const void *data, const void *userdata, char **ret) { + _cleanup_free_ char *prefix = NULL; + char *dash; + int r; + + r = specifier_prefix(specifier, data, userdata, &prefix); + if (r < 0) + return r; + + dash = strrchr(prefix, '-'); + if (dash) { + dash = strdup(dash + 1); + if (!dash) + return -ENOMEM; + *ret = dash; + } else + *ret = TAKE_PTR(prefix); + + return 0; +} + +int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret) { + /* This is similar to unit_name_printf() */ + + const Specifier table[] = { + { 'i', specifier_instance, NULL }, + { 'j', specifier_last_component, NULL }, + { 'n', specifier_name, NULL }, + { 'N', specifier_prefix_and_instance, NULL }, + { 'p', specifier_prefix, NULL }, + + COMMON_SYSTEM_SPECIFIERS, + + COMMON_CREDS_SPECIFIERS, + {} + }; + + assert(i); + assert(format); + assert(ret); + + return specifier_printf(format, table, i, ret); +} diff --git a/src/shared/install-printf.h b/src/shared/install-printf.h new file mode 100644 index 0000000..34e1294 --- /dev/null +++ b/src/shared/install-printf.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "install.h" + +int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret); diff --git a/src/shared/install.c b/src/shared/install.c new file mode 100644 index 0000000..302497a --- /dev/null +++ b/src/shared/install.c @@ -0,0 +1,3479 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "conf-files.h" +#include "conf-parser.h" +#include "def.h" +#include "dirent-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "hashmap.h" +#include "install-printf.h" +#include "install.h" +#include "locale-util.h" +#include "log.h" +#include "macro.h" +#include "mkdir.h" +#include "path-lookup.h" +#include "path-util.h" +#include "rm-rf.h" +#include "set.h" +#include "special.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "unit-file.h" + +#define UNIT_FILE_FOLLOW_SYMLINK_MAX 64 + +typedef enum SearchFlags { + SEARCH_LOAD = 1 << 0, + SEARCH_FOLLOW_CONFIG_SYMLINKS = 1 << 1, + SEARCH_DROPIN = 1 << 2, +} SearchFlags; + +typedef struct { + OrderedHashmap *will_process; + OrderedHashmap *have_processed; +} InstallContext; + +typedef enum { + PRESET_UNKNOWN, + PRESET_ENABLE, + PRESET_DISABLE, +} PresetAction; + +struct UnitFilePresetRule { + char *pattern; + PresetAction action; + char **instances; +}; + +static bool unit_file_install_info_has_rules(const UnitFileInstallInfo *i) { + assert(i); + + return !strv_isempty(i->aliases) || + !strv_isempty(i->wanted_by) || + !strv_isempty(i->required_by); +} + +static bool unit_file_install_info_has_also(const UnitFileInstallInfo *i) { + assert(i); + + return !strv_isempty(i->also); +} + +void unit_file_presets_freep(UnitFilePresets *p) { + if (!p) + return; + + for (size_t i = 0; i < p->n_rules; i++) { + free(p->rules[i].pattern); + strv_free(p->rules[i].instances); + } + + free(p->rules); + p->n_rules = 0; +} + +static const char *const unit_file_type_table[_UNIT_FILE_TYPE_MAX] = { + [UNIT_FILE_TYPE_REGULAR] = "regular", + [UNIT_FILE_TYPE_SYMLINK] = "symlink", + [UNIT_FILE_TYPE_MASKED] = "masked", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(unit_file_type, UnitFileType); + +static int in_search_path(const LookupPaths *p, const char *path) { + _cleanup_free_ char *parent = NULL; + + assert(path); + + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + return path_strv_contains(p->search_path, parent); +} + +static const char* skip_root(const LookupPaths *p, const char *path) { + char *e; + + assert(p); + assert(path); + + if (!p->root_dir) + return path; + + e = path_startswith(path, p->root_dir); + if (!e) + return NULL; + + /* Make sure the returned path starts with a slash */ + if (e[0] != '/') { + if (e == path || e[-1] != '/') + return NULL; + + e--; + } + + return e; +} + +static int path_is_generator(const LookupPaths *p, const char *path) { + _cleanup_free_ char *parent = NULL; + + assert(p); + assert(path); + + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + return path_equal_ptr(parent, p->generator) || + path_equal_ptr(parent, p->generator_early) || + path_equal_ptr(parent, p->generator_late); +} + +static int path_is_transient(const LookupPaths *p, const char *path) { + _cleanup_free_ char *parent = NULL; + + assert(p); + assert(path); + + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + return path_equal_ptr(parent, p->transient); +} + +static int path_is_control(const LookupPaths *p, const char *path) { + _cleanup_free_ char *parent = NULL; + + assert(p); + assert(path); + + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + return path_equal_ptr(parent, p->persistent_control) || + path_equal_ptr(parent, p->runtime_control); +} + +static int path_is_config(const LookupPaths *p, const char *path, bool check_parent) { + _cleanup_free_ char *parent = NULL; + + assert(p); + assert(path); + + /* Note that we do *not* have generic checks for /etc or /run in place, since with + * them we couldn't discern configuration from transient or generated units */ + + if (check_parent) { + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + path = parent; + } + + return path_equal_ptr(path, p->persistent_config) || + path_equal_ptr(path, p->runtime_config); +} + +static int path_is_runtime(const LookupPaths *p, const char *path, bool check_parent) { + _cleanup_free_ char *parent = NULL; + const char *rpath; + + assert(p); + assert(path); + + /* Everything in /run is considered runtime. On top of that we also add + * explicit checks for the various runtime directories, as safety net. */ + + rpath = skip_root(p, path); + if (rpath && path_startswith(rpath, "/run")) + return true; + + if (check_parent) { + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + path = parent; + } + + return path_equal_ptr(path, p->runtime_config) || + path_equal_ptr(path, p->generator) || + path_equal_ptr(path, p->generator_early) || + path_equal_ptr(path, p->generator_late) || + path_equal_ptr(path, p->transient) || + path_equal_ptr(path, p->runtime_control); +} + +static int path_is_vendor_or_generator(const LookupPaths *p, const char *path) { + const char *rpath; + + assert(p); + assert(path); + + rpath = skip_root(p, path); + if (!rpath) + return 0; + + if (path_startswith(rpath, "/usr")) + return true; + +#if HAVE_SPLIT_USR + if (path_startswith(rpath, "/lib")) + return true; +#endif + + if (path_is_generator(p, rpath)) + return true; + + return path_equal(rpath, SYSTEM_DATA_UNIT_PATH); +} + +static const char* config_path_from_flags(const LookupPaths *paths, UnitFileFlags flags) { + assert(paths); + + if (FLAGS_SET(flags, UNIT_FILE_PORTABLE)) + return FLAGS_SET(flags, UNIT_FILE_RUNTIME) ? paths->runtime_attached : paths->persistent_attached; + else + return FLAGS_SET(flags, UNIT_FILE_RUNTIME) ? paths->runtime_config : paths->persistent_config; +} + +int unit_file_changes_add( + UnitFileChange **changes, + size_t *n_changes, + int type, + const char *path, + const char *source) { + + _cleanup_free_ char *p = NULL, *s = NULL; + UnitFileChange *c; + + assert(path); + assert(!changes == !n_changes); + + if (!changes) + return 0; + + c = reallocarray(*changes, *n_changes + 1, sizeof(UnitFileChange)); + if (!c) + return -ENOMEM; + *changes = c; + + p = strdup(path); + if (source) + s = strdup(source); + + if (!p || (source && !s)) + return -ENOMEM; + + path_simplify(p, false); + if (s) + path_simplify(s, false); + + c[*n_changes] = (UnitFileChange) { type, p, s }; + p = s = NULL; + (*n_changes) ++; + return 0; +} + +void unit_file_changes_free(UnitFileChange *changes, size_t n_changes) { + assert(changes || n_changes == 0); + + for (size_t i = 0; i < n_changes; i++) { + free(changes[i].path); + free(changes[i].source); + } + + free(changes); +} + +void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet) { + bool logged = false; + + assert(changes || n_changes == 0); + /* If verb is not specified, errors are not allowed! */ + assert(verb || r >= 0); + + for (size_t i = 0; i < n_changes; i++) { + assert(verb || changes[i].type >= 0); + + switch(changes[i].type) { + case UNIT_FILE_SYMLINK: + if (!quiet) + log_info("Created symlink %s %s %s.", + changes[i].path, + special_glyph(SPECIAL_GLYPH_ARROW), + changes[i].source); + break; + case UNIT_FILE_UNLINK: + if (!quiet) + log_info("Removed %s.", changes[i].path); + break; + case UNIT_FILE_IS_MASKED: + if (!quiet) + log_info("Unit %s is masked, ignoring.", changes[i].path); + break; + case UNIT_FILE_IS_DANGLING: + if (!quiet) + log_info("Unit %s is an alias to a unit that is not present, ignoring.", + changes[i].path); + break; + case -EEXIST: + if (changes[i].source) + log_error_errno(changes[i].type, + "Failed to %s unit, file %s already exists and is a symlink to %s.", + verb, changes[i].path, changes[i].source); + else + log_error_errno(changes[i].type, + "Failed to %s unit, file %s already exists.", + verb, changes[i].path); + logged = true; + break; + case -ERFKILL: + log_error_errno(changes[i].type, "Failed to %s unit, unit %s is masked.", + verb, changes[i].path); + logged = true; + break; + case -EADDRNOTAVAIL: + log_error_errno(changes[i].type, "Failed to %s unit, unit %s is transient or generated.", + verb, changes[i].path); + logged = true; + break; + case -EUCLEAN: + log_error_errno(changes[i].type, + "Failed to %s unit, \"%s\" is not a valid unit name.", + verb, changes[i].path); + logged = true; + break; + case -ELOOP: + log_error_errno(changes[i].type, "Failed to %s unit, refusing to operate on linked unit file %s", + verb, changes[i].path); + logged = true; + break; + + case -ENOENT: + log_error_errno(changes[i].type, "Failed to %s unit, unit %s does not exist.", verb, changes[i].path); + logged = true; + break; + + default: + assert(changes[i].type < 0); + log_error_errno(changes[i].type, "Failed to %s unit, file %s: %m.", + verb, changes[i].path); + logged = true; + } + } + + if (r < 0 && !logged) + log_error_errno(r, "Failed to %s: %m.", verb); +} + +/** + * Checks if two paths or symlinks from wd are the same, when root is the root of the filesystem. + * wc should be the full path in the host file system. + */ +static bool chroot_symlinks_same(const char *root, const char *wd, const char *a, const char *b) { + assert(path_is_absolute(wd)); + + /* This will give incorrect results if the paths are relative and go outside + * of the chroot. False negatives are possible. */ + + if (!root) + root = "/"; + + a = strjoina(path_is_absolute(a) ? root : wd, "/", a); + b = strjoina(path_is_absolute(b) ? root : wd, "/", b); + return path_equal_or_files_same(a, b, 0); +} + +static int create_symlink( + const LookupPaths *paths, + const char *old_path, + const char *new_path, + bool force, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_free_ char *dest = NULL, *dirname = NULL; + const char *rp; + int r; + + assert(old_path); + assert(new_path); + + rp = skip_root(paths, old_path); + if (rp) + old_path = rp; + + /* Actually create a symlink, and remember that we did. Is + * smart enough to check if there's already a valid symlink in + * place. + * + * Returns 1 if a symlink was created or already exists and points to + * the right place, or negative on error. + */ + + mkdir_parents_label(new_path, 0755); + + if (symlink(old_path, new_path) >= 0) { + unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path); + return 1; + } + + if (errno != EEXIST) { + unit_file_changes_add(changes, n_changes, -errno, new_path, NULL); + return -errno; + } + + r = readlink_malloc(new_path, &dest); + if (r < 0) { + /* translate EINVAL (non-symlink exists) to EEXIST */ + if (r == -EINVAL) + r = -EEXIST; + + unit_file_changes_add(changes, n_changes, r, new_path, NULL); + return r; + } + + dirname = dirname_malloc(new_path); + if (!dirname) + return -ENOMEM; + + if (chroot_symlinks_same(paths->root_dir, dirname, dest, old_path)) { + log_debug("Symlink %s → %s already exists", new_path, dest); + return 1; + } + + if (!force) { + unit_file_changes_add(changes, n_changes, -EEXIST, new_path, dest); + return -EEXIST; + } + + r = symlink_atomic(old_path, new_path); + if (r < 0) { + unit_file_changes_add(changes, n_changes, r, new_path, NULL); + return r; + } + + unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, new_path, NULL); + unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path); + + return 1; +} + +static int mark_symlink_for_removal( + Set **remove_symlinks_to, + const char *p) { + + char *n; + int r; + + assert(p); + + r = set_ensure_allocated(remove_symlinks_to, &path_hash_ops); + if (r < 0) + return r; + + n = strdup(p); + if (!n) + return -ENOMEM; + + path_simplify(n, false); + + r = set_consume(*remove_symlinks_to, n); + if (r == -EEXIST) + return 0; + if (r < 0) + return r; + + return 1; +} + +static int remove_marked_symlinks_fd( + Set *remove_symlinks_to, + int fd, + const char *path, + const char *config_path, + const LookupPaths *lp, + bool dry_run, + bool *restart, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + int r = 0; + + assert(remove_symlinks_to); + assert(fd >= 0); + assert(path); + assert(config_path); + assert(lp); + assert(restart); + + d = fdopendir(fd); + if (!d) { + safe_close(fd); + return -errno; + } + + rewinddir(d); + + FOREACH_DIRENT(de, d, return -errno) { + + dirent_ensure_type(d, de); + + if (de->d_type == DT_DIR) { + _cleanup_free_ char *p = NULL; + int nfd, q; + + nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); + if (nfd < 0) { + if (errno == ENOENT) + continue; + + if (r == 0) + r = -errno; + continue; + } + + p = path_make_absolute(de->d_name, path); + if (!p) { + safe_close(nfd); + return -ENOMEM; + } + + /* This will close nfd, regardless whether it succeeds or not */ + q = remove_marked_symlinks_fd(remove_symlinks_to, nfd, p, config_path, lp, dry_run, restart, changes, n_changes); + if (q < 0 && r == 0) + r = q; + + } else if (de->d_type == DT_LNK) { + _cleanup_free_ char *p = NULL, *dest = NULL; + const char *rp; + bool found; + int q; + + if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY)) + continue; + + p = path_make_absolute(de->d_name, path); + if (!p) + return -ENOMEM; + path_simplify(p, false); + + q = chase_symlinks(p, NULL, CHASE_NONEXISTENT, &dest, NULL); + if (q == -ENOENT) + continue; + if (q < 0) { + if (r == 0) + r = q; + continue; + } + + /* We remove all links pointing to a file or path that is marked, as well as all files sharing + * the same name as a file that is marked. */ + + found = set_contains(remove_symlinks_to, dest) || + set_contains(remove_symlinks_to, basename(dest)) || + set_contains(remove_symlinks_to, de->d_name); + + if (!found) + continue; + + if (!dry_run) { + if (unlinkat(fd, de->d_name, 0) < 0 && errno != ENOENT) { + if (r == 0) + r = -errno; + unit_file_changes_add(changes, n_changes, -errno, p, NULL); + continue; + } + + (void) rmdir_parents(p, config_path); + } + + unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, p, NULL); + + /* Now, remember the full path (but with the root prefix removed) of + * the symlink we just removed, and remove any symlinks to it, too. */ + + rp = skip_root(lp, p); + q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: p); + if (q < 0) + return q; + if (q > 0 && !dry_run) + *restart = true; + } + } + + return r; +} + +static int remove_marked_symlinks( + Set *remove_symlinks_to, + const char *config_path, + const LookupPaths *lp, + bool dry_run, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_close_ int fd = -1; + bool restart; + int r = 0; + + assert(config_path); + assert(lp); + + if (set_size(remove_symlinks_to) <= 0) + return 0; + + fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC); + if (fd < 0) + return errno == ENOENT ? 0 : -errno; + + do { + int q, cfd; + restart = false; + + cfd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (cfd < 0) + return -errno; + + /* This takes possession of cfd and closes it */ + q = remove_marked_symlinks_fd(remove_symlinks_to, cfd, config_path, config_path, lp, dry_run, &restart, changes, n_changes); + if (r == 0) + r = q; + } while (restart); + + return r; +} + +static int is_symlink_with_known_name(const UnitFileInstallInfo *i, const char *name) { + int r; + + if (streq(name, i->name)) + return true; + + if (strv_contains(i->aliases, name)) + return true; + + /* Look for template symlink matching DefaultInstance */ + if (i->default_instance && unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) { + _cleanup_free_ char *s = NULL; + + r = unit_name_replace_instance(i->name, i->default_instance, &s); + if (r < 0) { + if (r != -EINVAL) + return r; + + } else if (streq(name, s)) + return true; + } + + return false; +} + +static int find_symlinks_fd( + const char *root_dir, + const UnitFileInstallInfo *i, + bool match_aliases, + bool ignore_same_name, + int fd, + const char *path, + const char *config_path, + bool *same_name_link) { + + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + int r = 0; + + assert(i); + assert(fd >= 0); + assert(path); + assert(config_path); + assert(same_name_link); + + d = fdopendir(fd); + if (!d) { + safe_close(fd); + return -errno; + } + + FOREACH_DIRENT(de, d, return -errno) { + + dirent_ensure_type(d, de); + + if (de->d_type == DT_DIR) { + _cleanup_free_ char *p = NULL; + int nfd, q; + + nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); + if (nfd < 0) { + if (errno == ENOENT) + continue; + + if (r == 0) + r = -errno; + continue; + } + + p = path_make_absolute(de->d_name, path); + if (!p) { + safe_close(nfd); + return -ENOMEM; + } + + /* This will close nfd, regardless whether it succeeds or not */ + q = find_symlinks_fd(root_dir, i, match_aliases, ignore_same_name, nfd, + p, config_path, same_name_link); + if (q > 0) + return 1; + if (r == 0) + r = q; + + } else if (de->d_type == DT_LNK) { + _cleanup_free_ char *p = NULL, *dest = NULL; + bool found_path = false, found_dest, b = false; + int q; + + /* Acquire symlink name */ + p = path_make_absolute(de->d_name, path); + if (!p) + return -ENOMEM; + + /* Acquire symlink destination */ + q = readlink_malloc(p, &dest); + if (q == -ENOENT) + continue; + if (q < 0) { + if (r == 0) + r = q; + continue; + } + + /* Make absolute */ + if (!path_is_absolute(dest)) { + char *x; + + x = path_join(root_dir, dest); + if (!x) + return -ENOMEM; + + free_and_replace(dest, x); + } + + assert(unit_name_is_valid(i->name, UNIT_NAME_ANY)); + if (!ignore_same_name) + /* Check if the symlink itself matches what we are looking for. + * + * If ignore_same_name is specified, we are in one of the directories which + * have lower priority than the unit file, and even if a file or symlink with + * this name was found, we should ignore it. */ + found_path = streq(de->d_name, i->name); + + /* Check if what the symlink points to matches what we are looking for */ + found_dest = streq(basename(dest), i->name); + + if (found_path && found_dest) { + _cleanup_free_ char *t = NULL; + + /* Filter out same name links in the main + * config path */ + t = path_make_absolute(i->name, config_path); + if (!t) + return -ENOMEM; + + b = path_equal(t, p); + } + + if (b) + *same_name_link = true; + else if (found_path || found_dest) { + if (!match_aliases) + return 1; + + /* Check if symlink name is in the set of names used by [Install] */ + q = is_symlink_with_known_name(i, de->d_name); + if (q < 0) + return q; + if (q > 0) + return 1; + } + } + } + + return r; +} + +static int find_symlinks( + const char *root_dir, + const UnitFileInstallInfo *i, + bool match_name, + bool ignore_same_name, + const char *config_path, + bool *same_name_link) { + + int fd; + + assert(i); + assert(config_path); + assert(same_name_link); + + fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC); + if (fd < 0) { + if (IN_SET(errno, ENOENT, ENOTDIR, EACCES)) + return 0; + return -errno; + } + + /* This takes possession of fd and closes it */ + return find_symlinks_fd(root_dir, i, match_name, ignore_same_name, fd, + config_path, config_path, same_name_link); +} + +static int find_symlinks_in_scope( + UnitFileScope scope, + const LookupPaths *paths, + const UnitFileInstallInfo *i, + bool match_name, + UnitFileState *state) { + + bool same_name_link_runtime = false, same_name_link_config = false; + bool enabled_in_runtime = false, enabled_at_all = false; + bool ignore_same_name = false; + char **p; + int r; + + assert(paths); + assert(i); + + /* As we iterate over the list of search paths in paths->search_path, we may encounter "same name" + * symlinks. The ones which are "below" (i.e. have lower priority) than the unit file itself are + * effectively masked, so we should ignore them. */ + + STRV_FOREACH(p, paths->search_path) { + bool same_name_link = false; + + r = find_symlinks(paths->root_dir, i, match_name, ignore_same_name, *p, &same_name_link); + if (r < 0) + return r; + if (r > 0) { + /* We found symlinks in this dir? Yay! Let's see where precisely it is enabled. */ + + if (path_equal_ptr(*p, paths->persistent_config)) { + /* This is the best outcome, let's return it immediately. */ + *state = UNIT_FILE_ENABLED; + return 1; + } + + /* look for global enablement of user units */ + if (scope == UNIT_FILE_USER && path_is_user_config_dir(*p)) { + *state = UNIT_FILE_ENABLED; + return 1; + } + + r = path_is_runtime(paths, *p, false); + if (r < 0) + return r; + if (r > 0) + enabled_in_runtime = true; + else + enabled_at_all = true; + + } else if (same_name_link) { + if (path_equal_ptr(*p, paths->persistent_config)) + same_name_link_config = true; + else { + r = path_is_runtime(paths, *p, false); + if (r < 0) + return r; + if (r > 0) + same_name_link_runtime = true; + } + } + + /* Check if next iteration will be "below" the unit file (either a regular file + * or a symlink), and hence should be ignored */ + if (!ignore_same_name && path_startswith(i->path, *p)) + ignore_same_name = true; + } + + if (enabled_in_runtime) { + *state = UNIT_FILE_ENABLED_RUNTIME; + return 1; + } + + /* Here's a special rule: if the unit we are looking for is an instance, and it symlinked in the search path + * outside of runtime and configuration directory, then we consider it statically enabled. Note we do that only + * for instance, not for regular names, as those are merely aliases, while instances explicitly instantiate + * something, and hence are a much stronger concept. */ + if (enabled_at_all && unit_name_is_valid(i->name, UNIT_NAME_INSTANCE)) { + *state = UNIT_FILE_STATIC; + return 1; + } + + /* Hmm, we didn't find it, but maybe we found the same name + * link? */ + if (same_name_link_config) { + *state = UNIT_FILE_LINKED; + return 1; + } + if (same_name_link_runtime) { + *state = UNIT_FILE_LINKED_RUNTIME; + return 1; + } + + return 0; +} + +static void install_info_free(UnitFileInstallInfo *i) { + + if (!i) + return; + + free(i->name); + free(i->path); + strv_free(i->aliases); + strv_free(i->wanted_by); + strv_free(i->required_by); + strv_free(i->also); + free(i->default_instance); + free(i->symlink_target); + free(i); +} + +static void install_context_done(InstallContext *c) { + assert(c); + + c->will_process = ordered_hashmap_free_with_destructor(c->will_process, install_info_free); + c->have_processed = ordered_hashmap_free_with_destructor(c->have_processed, install_info_free); +} + +static UnitFileInstallInfo *install_info_find(InstallContext *c, const char *name) { + UnitFileInstallInfo *i; + + i = ordered_hashmap_get(c->have_processed, name); + if (i) + return i; + + return ordered_hashmap_get(c->will_process, name); +} + +static int install_info_may_process( + const UnitFileInstallInfo *i, + const LookupPaths *paths, + UnitFileChange **changes, + size_t *n_changes) { + assert(i); + assert(paths); + + /* Checks whether the loaded unit file is one we should process, or is masked, + * transient or generated and thus not subject to enable/disable operations. */ + + if (i->type == UNIT_FILE_TYPE_MASKED) { + unit_file_changes_add(changes, n_changes, -ERFKILL, i->path, NULL); + return -ERFKILL; + } + if (path_is_generator(paths, i->path) || + path_is_transient(paths, i->path)) { + unit_file_changes_add(changes, n_changes, -EADDRNOTAVAIL, i->path, NULL); + return -EADDRNOTAVAIL; + } + + return 0; +} + +/** + * Adds a new UnitFileInstallInfo entry under name in the InstallContext.will_process + * hashmap, or retrieves the existing one if already present. + * + * Returns negative on error, 0 if the unit was already known, 1 otherwise. + */ +static int install_info_add( + InstallContext *c, + const char *name, + const char *path, + bool auxiliary, + UnitFileInstallInfo **ret) { + + UnitFileInstallInfo *i = NULL; + int r; + + assert(c); + + if (!name) { + /* 'name' and 'path' must not both be null. Check here 'path' using assert_se() to + * workaround a bug in gcc that generates a -Wnonnull warning when calling basename(), + * but this cannot be possible in any code path (See #6119). */ + assert_se(path); + name = basename(path); + } + + if (!unit_name_is_valid(name, UNIT_NAME_ANY)) + return -EINVAL; + + i = install_info_find(c, name); + if (i) { + i->auxiliary = i->auxiliary && auxiliary; + + if (ret) + *ret = i; + return 0; + } + + r = ordered_hashmap_ensure_allocated(&c->will_process, &string_hash_ops); + if (r < 0) + return r; + + i = new(UnitFileInstallInfo, 1); + if (!i) + return -ENOMEM; + + *i = (UnitFileInstallInfo) { + .type = _UNIT_FILE_TYPE_INVALID, + .auxiliary = auxiliary, + }; + + i->name = strdup(name); + if (!i->name) { + r = -ENOMEM; + goto fail; + } + + if (path) { + i->path = strdup(path); + if (!i->path) { + r = -ENOMEM; + goto fail; + } + } + + r = ordered_hashmap_put(c->will_process, i->name, i); + if (r < 0) + goto fail; + + if (ret) + *ret = i; + + return 1; + +fail: + install_info_free(i); + return r; +} + +static int config_parse_alias( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + UnitType type; + + assert(unit); + assert(filename); + assert(lvalue); + assert(rvalue); + + type = unit_name_to_type(unit); + if (!unit_type_may_alias(type)) + return log_syntax(unit, LOG_WARNING, filename, line, 0, + "Alias= is not allowed for %s units, ignoring.", + unit_type_to_string(type)); + + return config_parse_strv(unit, filename, line, section, section_line, + lvalue, ltype, rvalue, data, userdata); +} + +static int config_parse_also( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + UnitFileInstallInfo *info = userdata; + InstallContext *c = data; + int r; + + assert(unit); + assert(filename); + assert(lvalue); + assert(rvalue); + + for (;;) { + _cleanup_free_ char *word = NULL, *printed = NULL; + + r = extract_first_word(&rvalue, &word, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + r = install_full_printf(info, word, &printed); + if (r < 0) + return r; + + r = install_info_add(c, printed, NULL, true, NULL); + if (r < 0) + return r; + + r = strv_push(&info->also, printed); + if (r < 0) + return r; + + printed = NULL; + } + + return 0; +} + +static int config_parse_default_instance( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + UnitFileInstallInfo *i = data; + _cleanup_free_ char *printed = NULL; + int r; + + assert(unit); + assert(filename); + assert(lvalue); + assert(rvalue); + + if (unit_name_is_valid(unit, UNIT_NAME_INSTANCE)) + /* When enabling an instance, we might be using a template unit file, + * but we should ignore DefaultInstance silently. */ + return 0; + if (!unit_name_is_valid(unit, UNIT_NAME_TEMPLATE)) + return log_syntax(unit, LOG_WARNING, filename, line, 0, + "DefaultInstance= only makes sense for template units, ignoring."); + + r = install_full_printf(i, rvalue, &printed); + if (r < 0) + return r; + + if (isempty(printed)) { + i->default_instance = mfree(i->default_instance); + return 0; + } + + if (!unit_instance_is_valid(printed)) + return log_syntax(unit, LOG_WARNING, filename, line, SYNTHETIC_ERRNO(EINVAL), + "Invalid DefaultInstance= value \"%s\".", printed); + + return free_and_replace(i->default_instance, printed); +} + +static int unit_file_load( + InstallContext *c, + UnitFileInstallInfo *info, + const char *path, + const char *root_dir, + SearchFlags flags) { + + const ConfigTableItem items[] = { + { "Install", "Alias", config_parse_alias, 0, &info->aliases }, + { "Install", "WantedBy", config_parse_strv, 0, &info->wanted_by }, + { "Install", "RequiredBy", config_parse_strv, 0, &info->required_by }, + { "Install", "DefaultInstance", config_parse_default_instance, 0, info }, + { "Install", "Also", config_parse_also, 0, c }, + {} + }; + + UnitType type; + _cleanup_fclose_ FILE *f = NULL; + _cleanup_close_ int fd = -1; + struct stat st; + int r; + + assert(info); + assert(path); + + if (!(flags & SEARCH_DROPIN)) { + /* Loading or checking for the main unit file… */ + + type = unit_name_to_type(info->name); + if (type < 0) + return -EINVAL; + if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE|UNIT_NAME_INSTANCE) && !unit_type_may_template(type)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: unit type %s cannot be templated, ignoring.", path, unit_type_to_string(type)); + + if (!(flags & SEARCH_LOAD)) { + if (lstat(path, &st) < 0) + return -errno; + + if (null_or_empty(&st)) + info->type = UNIT_FILE_TYPE_MASKED; + else if (S_ISREG(st.st_mode)) + info->type = UNIT_FILE_TYPE_REGULAR; + else if (S_ISLNK(st.st_mode)) + return -ELOOP; + else if (S_ISDIR(st.st_mode)) + return -EISDIR; + else + return -ENOTTY; + + return 0; + } + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + } else { + /* Operating on a drop-in file. If we aren't supposed to load the unit file drop-ins don't matter, let's hence shortcut this. */ + + if (!(flags & SEARCH_LOAD)) + return 0; + + fd = chase_symlinks_and_open(path, root_dir, 0, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL); + if (fd < 0) + return fd; + } + + if (fstat(fd, &st) < 0) + return -errno; + + if (null_or_empty(&st)) { + if ((flags & SEARCH_DROPIN) == 0) + info->type = UNIT_FILE_TYPE_MASKED; + + return 0; + } + + r = stat_verify_regular(&st); + if (r < 0) + return r; + + f = take_fdopen(&fd, "r"); + if (!f) + return -errno; + + /* c is only needed if we actually load the file (it's referenced from items[] btw, in case you wonder.) */ + assert(c); + + r = config_parse(info->name, path, f, + "Install\0" + "-Unit\0" + "-Automount\0" + "-Device\0" + "-Mount\0" + "-Path\0" + "-Scope\0" + "-Service\0" + "-Slice\0" + "-Socket\0" + "-Swap\0" + "-Target\0" + "-Timer\0", + config_item_table_lookup, items, + 0, info, + NULL); + if (r < 0) + return log_debug_errno(r, "Failed to parse %s: %m", info->name); + + if ((flags & SEARCH_DROPIN) == 0) + info->type = UNIT_FILE_TYPE_REGULAR; + + return + (int) strv_length(info->aliases) + + (int) strv_length(info->wanted_by) + + (int) strv_length(info->required_by); +} + +static int unit_file_load_or_readlink( + InstallContext *c, + UnitFileInstallInfo *info, + const char *path, + const char *root_dir, + SearchFlags flags) { + _cleanup_free_ char *resolved = NULL; + struct stat st; + int r; + + r = unit_file_load(c, info, path, root_dir, flags); + if (r != -ELOOP || (flags & SEARCH_DROPIN)) + return r; + + r = chase_symlinks(path, root_dir, CHASE_WARN | CHASE_NONEXISTENT, &resolved, NULL); + if (r >= 0 && + root_dir && + path_equal_ptr(path_startswith(resolved, root_dir), "dev/null")) + /* When looking under root_dir, we can't expect /dev/ to be mounted, + * so let's see if the path is a (possibly dangling) symlink to /dev/null. */ + info->type = UNIT_FILE_TYPE_MASKED; + + else if (r > 0 && + stat(resolved, &st) >= 0 && + null_or_empty(&st)) + + info->type = UNIT_FILE_TYPE_MASKED; + + else { + _cleanup_free_ char *target = NULL; + const char *bn; + UnitType a, b; + + /* This is a symlink, let's read it. We read the link again, because last time + * we followed the link until resolution, and here we need to do one step. */ + + r = readlink_malloc(path, &target); + if (r < 0) + return r; + + bn = basename(target); + + if (unit_name_is_valid(info->name, UNIT_NAME_PLAIN)) { + + if (!unit_name_is_valid(bn, UNIT_NAME_PLAIN)) + return -EINVAL; + + } else if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) { + + if (!unit_name_is_valid(bn, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE)) + return -EINVAL; + + } else if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE)) { + + if (!unit_name_is_valid(bn, UNIT_NAME_TEMPLATE)) + return -EINVAL; + } else + return -EINVAL; + + /* Enforce that the symlink destination does not + * change the unit file type. */ + + a = unit_name_to_type(info->name); + b = unit_name_to_type(bn); + if (a < 0 || b < 0 || a != b) + return -EINVAL; + + if (path_is_absolute(target)) + /* This is an absolute path, prefix the root so that we always deal with fully qualified paths */ + info->symlink_target = path_join(root_dir, target); + else + /* This is a relative path, take it relative to the dir the symlink is located in. */ + info->symlink_target = file_in_same_dir(path, target); + if (!info->symlink_target) + return -ENOMEM; + + info->type = UNIT_FILE_TYPE_SYMLINK; + } + + return 0; +} + +static int unit_file_search( + InstallContext *c, + UnitFileInstallInfo *info, + const LookupPaths *paths, + SearchFlags flags) { + + const char *dropin_dir_name = NULL, *dropin_template_dir_name = NULL; + _cleanup_strv_free_ char **dirs = NULL, **files = NULL; + _cleanup_free_ char *template = NULL; + bool found_unit = false; + int r, result; + char **p; + + assert(info); + assert(paths); + + /* Was this unit already loaded? */ + if (info->type != _UNIT_FILE_TYPE_INVALID) + return 0; + + if (info->path) + return unit_file_load_or_readlink(c, info, info->path, paths->root_dir, flags); + + assert(info->name); + + if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) { + r = unit_name_template(info->name, &template); + if (r < 0) + return r; + } + + STRV_FOREACH(p, paths->search_path) { + _cleanup_free_ char *path = NULL; + + path = path_join(*p, info->name); + if (!path) + return -ENOMEM; + + r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags); + if (r >= 0) { + info->path = TAKE_PTR(path); + result = r; + found_unit = true; + break; + } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES)) + return r; + } + + if (!found_unit && template) { + + /* Unit file doesn't exist, however instance + * enablement was requested. We will check if it is + * possible to load template unit file. */ + + STRV_FOREACH(p, paths->search_path) { + _cleanup_free_ char *path = NULL; + + path = path_join(*p, template); + if (!path) + return -ENOMEM; + + r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags); + if (r >= 0) { + info->path = TAKE_PTR(path); + result = r; + found_unit = true; + break; + } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES)) + return r; + } + } + + if (!found_unit) + return log_debug_errno(SYNTHETIC_ERRNO(ENOENT), + "Cannot find unit %s%s%s.", + info->name, template ? " or " : "", strempty(template)); + + if (info->type == UNIT_FILE_TYPE_MASKED) + return result; + + /* Search for drop-in directories */ + + dropin_dir_name = strjoina(info->name, ".d"); + STRV_FOREACH(p, paths->search_path) { + char *path; + + path = path_join(*p, dropin_dir_name); + if (!path) + return -ENOMEM; + + r = strv_consume(&dirs, path); + if (r < 0) + return r; + } + + if (template) { + dropin_template_dir_name = strjoina(template, ".d"); + STRV_FOREACH(p, paths->search_path) { + char *path; + + path = path_join(*p, dropin_template_dir_name); + if (!path) + return -ENOMEM; + + r = strv_consume(&dirs, path); + if (r < 0) + return r; + } + } + + /* Load drop-in conf files */ + + r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char**) dirs); + if (r < 0) + return log_debug_errno(r, "Failed to get list of conf files: %m"); + + STRV_FOREACH(p, files) { + r = unit_file_load_or_readlink(c, info, *p, paths->root_dir, flags | SEARCH_DROPIN); + if (r < 0) + return log_debug_errno(r, "Failed to load conf file %s: %m", *p); + } + + return result; +} + +static int install_info_follow( + InstallContext *c, + UnitFileInstallInfo *i, + const char *root_dir, + SearchFlags flags, + bool ignore_different_name) { + + assert(c); + assert(i); + + if (i->type != UNIT_FILE_TYPE_SYMLINK) + return -EINVAL; + if (!i->symlink_target) + return -EINVAL; + + /* If the basename doesn't match, the caller should add a + * complete new entry for this. */ + + if (!ignore_different_name && !streq(basename(i->symlink_target), i->name)) + return -EXDEV; + + free_and_replace(i->path, i->symlink_target); + i->type = _UNIT_FILE_TYPE_INVALID; + + return unit_file_load_or_readlink(c, i, i->path, root_dir, flags); +} + +/** + * Search for the unit file. If the unit name is a symlink, follow the symlink to the + * target, maybe more than once. Propagate the instance name if present. + */ +static int install_info_traverse( + UnitFileScope scope, + InstallContext *c, + const LookupPaths *paths, + UnitFileInstallInfo *start, + SearchFlags flags, + UnitFileInstallInfo **ret) { + + UnitFileInstallInfo *i; + unsigned k = 0; + int r; + + assert(paths); + assert(start); + assert(c); + + r = unit_file_search(c, start, paths, flags); + if (r < 0) + return r; + + i = start; + while (i->type == UNIT_FILE_TYPE_SYMLINK) { + /* Follow the symlink */ + + if (++k > UNIT_FILE_FOLLOW_SYMLINK_MAX) + return -ELOOP; + + if (!(flags & SEARCH_FOLLOW_CONFIG_SYMLINKS)) { + r = path_is_config(paths, i->path, true); + if (r < 0) + return r; + if (r > 0) + return -ELOOP; + } + + r = install_info_follow(c, i, paths->root_dir, flags, false); + if (r == -EXDEV) { + _cleanup_free_ char *buffer = NULL; + const char *bn; + + /* Target has a different name, create a new + * install info object for that, and continue + * with that. */ + + bn = basename(i->symlink_target); + + if (unit_name_is_valid(i->name, UNIT_NAME_INSTANCE) && + unit_name_is_valid(bn, UNIT_NAME_TEMPLATE)) { + + _cleanup_free_ char *instance = NULL; + + r = unit_name_to_instance(i->name, &instance); + if (r < 0) + return r; + + r = unit_name_replace_instance(bn, instance, &buffer); + if (r < 0) + return r; + + if (streq(buffer, i->name)) { + + /* We filled in the instance, and the target stayed the same? If so, then let's + * honour the link as it is. */ + + r = install_info_follow(c, i, paths->root_dir, flags, true); + if (r < 0) + return r; + + continue; + } + + bn = buffer; + } + + r = install_info_add(c, bn, NULL, false, &i); + if (r < 0) + return r; + + /* Try again, with the new target we found. */ + r = unit_file_search(c, i, paths, flags); + if (r == -ENOENT) + /* Translate error code to highlight this specific case */ + return -ENOLINK; + } + + if (r < 0) + return r; + } + + if (ret) + *ret = i; + + return 0; +} + +/** + * Call install_info_add() with name_or_path as the path (if name_or_path starts with "/") + * or the name (otherwise). root_dir is prepended to the path. + */ +static int install_info_add_auto( + InstallContext *c, + const LookupPaths *paths, + const char *name_or_path, + UnitFileInstallInfo **ret) { + + assert(c); + assert(name_or_path); + + if (path_is_absolute(name_or_path)) { + const char *pp; + + pp = prefix_roota(paths->root_dir, name_or_path); + + return install_info_add(c, NULL, pp, false, ret); + } else + return install_info_add(c, name_or_path, NULL, false, ret); +} + +static int install_info_discover( + UnitFileScope scope, + InstallContext *c, + const LookupPaths *paths, + const char *name, + SearchFlags flags, + UnitFileInstallInfo **ret, + UnitFileChange **changes, + size_t *n_changes) { + + UnitFileInstallInfo *i; + int r; + + assert(c); + assert(paths); + assert(name); + + r = install_info_add_auto(c, paths, name, &i); + if (r >= 0) + r = install_info_traverse(scope, c, paths, i, flags, ret); + + if (r < 0) + unit_file_changes_add(changes, n_changes, r, name, NULL); + return r; +} + +static int install_info_discover_and_check( + UnitFileScope scope, + InstallContext *c, + const LookupPaths *paths, + const char *name, + SearchFlags flags, + UnitFileInstallInfo **ret, + UnitFileChange **changes, + size_t *n_changes) { + + int r; + + r = install_info_discover(scope, c, paths, name, flags, ret, changes, n_changes); + if (r < 0) + return r; + + return install_info_may_process(ret ? *ret : NULL, paths, changes, n_changes); +} + +int unit_file_verify_alias(const UnitFileInstallInfo *i, const char *dst, char **ret_dst) { + _cleanup_free_ char *dst_updated = NULL; + int r; + + /* Verify that dst is a valid either a valid alias or a valid .wants/.requires symlink for the target + * unit *i. Return negative on error or if not compatible, zero on success. + * + * ret_dst is set in cases where "instance propagation" happens, i.e. when the instance part is + * inserted into dst. It is not normally set, even on success, so that the caller can easily + * distinguish the case where instance propagation occurred. + */ + + const char *path_alias = strrchr(dst, '/'); + if (path_alias) { + /* This branch covers legacy Alias= function of creating .wants and .requires symlinks. */ + _cleanup_free_ char *dir = NULL; + char *p; + + path_alias ++; /* skip over slash */ + + dir = dirname_malloc(dst); + if (!dir) + return log_oom(); + + p = endswith(dir, ".wants"); + if (!p) + p = endswith(dir, ".requires"); + if (!p) + return log_warning_errno(SYNTHETIC_ERRNO(EXDEV), + "Invalid path \"%s\" in alias.", dir); + *p = '\0'; /* dir should now be a unit name */ + + r = unit_name_classify(dir); + if (r < 0) + return log_warning_errno(SYNTHETIC_ERRNO(EXDEV), + "Invalid unit name component \"%s\" in alias.", dir); + + const bool instance_propagation = r == UNIT_NAME_TEMPLATE; + + /* That's the name we want to use for verification. */ + r = unit_symlink_name_compatible(path_alias, i->name, instance_propagation); + if (r < 0) + return log_error_errno(r, "Failed to verify alias validity: %m"); + if (r == 0) + return log_warning_errno(SYNTHETIC_ERRNO(EXDEV), + "Invalid unit %s symlink %s.", + i->name, dst); + + } else { + /* If the symlink target has an instance set and the symlink source doesn't, we "propagate + * the instance", i.e. instantiate the symlink source with the target instance. */ + if (unit_name_is_valid(dst, UNIT_NAME_TEMPLATE)) { + _cleanup_free_ char *inst = NULL; + + r = unit_name_to_instance(i->name, &inst); + if (r < 0) + return log_error_errno(r, "Failed to extract instance name from %s: %m", i->name); + + if (r == UNIT_NAME_INSTANCE) { + r = unit_name_replace_instance(dst, inst, &dst_updated); + if (r < 0) + return log_error_errno(r, "Failed to build unit name from %s+%s: %m", + dst, inst); + } + } + + r = unit_validate_alias_symlink_and_warn(dst_updated ?: dst, i->name); + if (r < 0) + return r; + + } + + *ret_dst = TAKE_PTR(dst_updated); + return 0; +} + +static int install_info_symlink_alias( + UnitFileInstallInfo *i, + const LookupPaths *paths, + const char *config_path, + bool force, + UnitFileChange **changes, + size_t *n_changes) { + + char **s; + int r = 0, q; + + assert(i); + assert(paths); + assert(config_path); + + STRV_FOREACH(s, i->aliases) { + _cleanup_free_ char *alias_path = NULL, *dst = NULL, *dst_updated = NULL; + + q = install_full_printf(i, *s, &dst); + if (q < 0) + return q; + + q = unit_file_verify_alias(i, dst, &dst_updated); + if (q < 0) + continue; + + alias_path = path_make_absolute(dst_updated ?: dst, config_path); + if (!alias_path) + return -ENOMEM; + + q = create_symlink(paths, i->path, alias_path, force, changes, n_changes); + if (r == 0) + r = q; + } + + return r; +} + +static int install_info_symlink_wants( + UnitFileInstallInfo *i, + const LookupPaths *paths, + const char *config_path, + char **list, + const char *suffix, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_free_ char *buf = NULL; + const char *n; + char **s; + int r = 0, q; + + assert(i); + assert(paths); + assert(config_path); + + if (strv_isempty(list)) + return 0; + + if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) { + UnitFileInstallInfo instance = { + .type = _UNIT_FILE_TYPE_INVALID, + }; + _cleanup_free_ char *path = NULL; + + /* If this is a template, and we have no instance, don't do anything */ + if (!i->default_instance) + return 1; + + r = unit_name_replace_instance(i->name, i->default_instance, &buf); + if (r < 0) + return r; + + instance.name = buf; + r = unit_file_search(NULL, &instance, paths, SEARCH_FOLLOW_CONFIG_SYMLINKS); + if (r < 0) + return r; + + path = TAKE_PTR(instance.path); + + if (instance.type == UNIT_FILE_TYPE_MASKED) { + unit_file_changes_add(changes, n_changes, -ERFKILL, path, NULL); + return -ERFKILL; + } + + n = buf; + } else + n = i->name; + + STRV_FOREACH(s, list) { + _cleanup_free_ char *path = NULL, *dst = NULL; + + q = install_full_printf(i, *s, &dst); + if (q < 0) + return q; + + if (!unit_name_is_valid(dst, UNIT_NAME_ANY)) { + unit_file_changes_add(changes, n_changes, -EUCLEAN, dst, NULL); + r = -EUCLEAN; + continue; + } + + path = strjoin(config_path, "/", dst, suffix, n); + if (!path) + return -ENOMEM; + + q = create_symlink(paths, i->path, path, true, changes, n_changes); + if (r == 0) + r = q; + } + + return r; +} + +static int install_info_symlink_link( + UnitFileInstallInfo *i, + const LookupPaths *paths, + const char *config_path, + bool force, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_free_ char *path = NULL; + int r; + + assert(i); + assert(paths); + assert(config_path); + assert(i->path); + + r = in_search_path(paths, i->path); + if (r < 0) + return r; + if (r > 0) + return 0; + + path = path_join(config_path, i->name); + if (!path) + return -ENOMEM; + + return create_symlink(paths, i->path, path, force, changes, n_changes); +} + +static int install_info_apply( + UnitFileInstallInfo *i, + const LookupPaths *paths, + const char *config_path, + bool force, + UnitFileChange **changes, + size_t *n_changes) { + + int r, q; + + assert(i); + assert(paths); + assert(config_path); + + if (i->type != UNIT_FILE_TYPE_REGULAR) + return 0; + + r = install_info_symlink_alias(i, paths, config_path, force, changes, n_changes); + + q = install_info_symlink_wants(i, paths, config_path, i->wanted_by, ".wants/", changes, n_changes); + if (r == 0) + r = q; + + q = install_info_symlink_wants(i, paths, config_path, i->required_by, ".requires/", changes, n_changes); + if (r == 0) + r = q; + + q = install_info_symlink_link(i, paths, config_path, force, changes, n_changes); + /* Do not count links to the unit file towards the "carries_install_info" count */ + if (r == 0 && q < 0) + r = q; + + return r; +} + +static int install_context_apply( + UnitFileScope scope, + InstallContext *c, + const LookupPaths *paths, + const char *config_path, + bool force, + SearchFlags flags, + UnitFileChange **changes, + size_t *n_changes) { + + UnitFileInstallInfo *i; + int r; + + assert(c); + assert(paths); + assert(config_path); + + if (ordered_hashmap_isempty(c->will_process)) + return 0; + + r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops); + if (r < 0) + return r; + + r = 0; + while ((i = ordered_hashmap_first(c->will_process))) { + int q; + + q = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name); + if (q < 0) + return q; + + q = install_info_traverse(scope, c, paths, i, flags, NULL); + if (q < 0) { + unit_file_changes_add(changes, n_changes, q, i->name, NULL); + return q; + } + + /* We can attempt to process a masked unit when a different unit + * that we were processing specifies it in Also=. */ + if (i->type == UNIT_FILE_TYPE_MASKED) { + unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path, NULL); + if (r >= 0) + /* Assume that something *could* have been enabled here, + * avoid "empty [Install] section" warning. */ + r += 1; + continue; + } + + if (i->type != UNIT_FILE_TYPE_REGULAR) + continue; + + q = install_info_apply(i, paths, config_path, force, changes, n_changes); + if (r >= 0) { + if (q < 0) + r = q; + else + r += q; + } + } + + return r; +} + +static int install_context_mark_for_removal( + UnitFileScope scope, + InstallContext *c, + const LookupPaths *paths, + Set **remove_symlinks_to, + const char *config_path, + UnitFileChange **changes, + size_t *n_changes) { + + UnitFileInstallInfo *i; + int r; + + assert(c); + assert(paths); + assert(config_path); + + /* Marks all items for removal */ + + if (ordered_hashmap_isempty(c->will_process)) + return 0; + + r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops); + if (r < 0) + return r; + + while ((i = ordered_hashmap_first(c->will_process))) { + + r = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name); + if (r < 0) + return r; + + r = install_info_traverse(scope, c, paths, i, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, NULL); + if (r == -ENOLINK) { + log_debug_errno(r, "Name %s leads to a dangling symlink, removing name.", i->name); + unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_DANGLING, i->path ?: i->name, NULL); + } else if (r == -ENOENT) { + + if (i->auxiliary) /* some unit specified in Also= or similar is missing */ + log_debug_errno(r, "Auxiliary unit of %s not found, removing name.", i->name); + else { + log_debug_errno(r, "Unit %s not found, removing name.", i->name); + unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL); + } + + } else if (r < 0) { + log_debug_errno(r, "Failed to find unit %s, removing name: %m", i->name); + unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL); + } else if (i->type == UNIT_FILE_TYPE_MASKED) { + log_debug("Unit file %s is masked, ignoring.", i->name); + unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path ?: i->name, NULL); + continue; + } else if (i->type != UNIT_FILE_TYPE_REGULAR) { + log_debug("Unit %s has type %s, ignoring.", i->name, unit_file_type_to_string(i->type) ?: "invalid"); + continue; + } + + r = mark_symlink_for_removal(remove_symlinks_to, i->name); + if (r < 0) + return r; + } + + return 0; +} + +int unit_file_mask( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + const char *config_path; + char **i; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + STRV_FOREACH(i, files) { + _cleanup_free_ char *path = NULL; + int q; + + if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) { + if (r == 0) + r = -EINVAL; + continue; + } + + path = path_make_absolute(*i, config_path); + if (!path) + return -ENOMEM; + + q = create_symlink(&paths, "/dev/null", path, !!(flags & UNIT_FILE_FORCE), changes, n_changes); + if (q < 0 && r >= 0) + r = q; + } + + return r; +} + +int unit_file_unmask( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_set_free_free_ Set *remove_symlinks_to = NULL; + _cleanup_strv_free_ char **todo = NULL; + size_t n_todo = 0, n_allocated = 0; + const char *config_path; + char **i; + bool dry_run; + int r, q; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + dry_run = !!(flags & UNIT_FILE_DRY_RUN); + + STRV_FOREACH(i, files) { + _cleanup_free_ char *path = NULL; + + if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) + return -EINVAL; + + path = path_make_absolute(*i, config_path); + if (!path) + return -ENOMEM; + + r = null_or_empty_path(path); + if (r == -ENOENT) + continue; + if (r < 0) + return r; + if (r == 0) + continue; + + if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2)) + return -ENOMEM; + + todo[n_todo] = strdup(*i); + if (!todo[n_todo]) + return -ENOMEM; + + n_todo++; + } + + strv_uniq(todo); + + r = 0; + STRV_FOREACH(i, todo) { + _cleanup_free_ char *path = NULL; + const char *rp; + + path = path_make_absolute(*i, config_path); + if (!path) + return -ENOMEM; + + if (!dry_run && unlink(path) < 0) { + if (errno != ENOENT) { + if (r >= 0) + r = -errno; + unit_file_changes_add(changes, n_changes, -errno, path, NULL); + } + + continue; + } + + unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, path, NULL); + + rp = skip_root(&paths, path); + q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: path); + if (q < 0) + return q; + } + + q = remove_marked_symlinks(remove_symlinks_to, config_path, &paths, dry_run, changes, n_changes); + if (r >= 0) + r = q; + + return r; +} + +int unit_file_link( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_strv_free_ char **todo = NULL; + size_t n_todo = 0, n_allocated = 0; + const char *config_path; + char **i; + int r, q; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + STRV_FOREACH(i, files) { + _cleanup_free_ char *full = NULL; + struct stat st; + char *fn; + + if (!path_is_absolute(*i)) + return -EINVAL; + + fn = basename(*i); + if (!unit_name_is_valid(fn, UNIT_NAME_ANY)) + return -EINVAL; + + full = path_join(paths.root_dir, *i); + if (!full) + return -ENOMEM; + + if (lstat(full, &st) < 0) + return -errno; + r = stat_verify_regular(&st); + if (r < 0) + return r; + + q = in_search_path(&paths, *i); + if (q < 0) + return q; + if (q > 0) + continue; + + if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2)) + return -ENOMEM; + + todo[n_todo] = strdup(*i); + if (!todo[n_todo]) + return -ENOMEM; + + n_todo++; + } + + strv_uniq(todo); + + r = 0; + STRV_FOREACH(i, todo) { + _cleanup_free_ char *new_path = NULL; + + new_path = path_make_absolute(basename(*i), config_path); + if (!new_path) + return -ENOMEM; + + q = create_symlink(&paths, *i, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes); + if (q < 0 && r >= 0) + r = q; + } + + return r; +} + +static int path_shall_revert(const LookupPaths *paths, const char *path) { + int r; + + assert(paths); + assert(path); + + /* Checks whether the path is one where the drop-in directories shall be removed. */ + + r = path_is_config(paths, path, true); + if (r != 0) + return r; + + r = path_is_control(paths, path); + if (r != 0) + return r; + + return path_is_transient(paths, path); +} + +int unit_file_revert( + UnitFileScope scope, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_set_free_free_ Set *remove_symlinks_to = NULL; + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_strv_free_ char **todo = NULL; + size_t n_todo = 0, n_allocated = 0; + char **i; + int r, q; + + /* Puts a unit file back into vendor state. This means: + * + * a) we remove all drop-in snippets added by the user ("config"), add to transient units ("transient"), and + * added via "systemctl set-property" ("control"), but not if the drop-in is generated ("generated"). + * + * c) if there's a vendor unit file (i.e. one in /usr) we remove any configured overriding unit files (i.e. in + * "config", but not in "transient" or "control" or even "generated"). + * + * We remove all that in both the runtime and the persistent directories, if that applies. + */ + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + STRV_FOREACH(i, files) { + bool has_vendor = false; + char **p; + + if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) + return -EINVAL; + + STRV_FOREACH(p, paths.search_path) { + _cleanup_free_ char *path = NULL, *dropin = NULL; + struct stat st; + + path = path_make_absolute(*i, *p); + if (!path) + return -ENOMEM; + + r = lstat(path, &st); + if (r < 0) { + if (errno != ENOENT) + return -errno; + } else if (S_ISREG(st.st_mode)) { + /* Check if there's a vendor version */ + r = path_is_vendor_or_generator(&paths, path); + if (r < 0) + return r; + if (r > 0) + has_vendor = true; + } + + dropin = strjoin(path, ".d"); + if (!dropin) + return -ENOMEM; + + r = lstat(dropin, &st); + if (r < 0) { + if (errno != ENOENT) + return -errno; + } else if (S_ISDIR(st.st_mode)) { + /* Remove the drop-ins */ + r = path_shall_revert(&paths, dropin); + if (r < 0) + return r; + if (r > 0) { + if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2)) + return -ENOMEM; + + todo[n_todo++] = TAKE_PTR(dropin); + } + } + } + + if (!has_vendor) + continue; + + /* OK, there's a vendor version, hence drop all configuration versions */ + STRV_FOREACH(p, paths.search_path) { + _cleanup_free_ char *path = NULL; + struct stat st; + + path = path_make_absolute(*i, *p); + if (!path) + return -ENOMEM; + + r = lstat(path, &st); + if (r < 0) { + if (errno != ENOENT) + return -errno; + } else if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) { + r = path_is_config(&paths, path, true); + if (r < 0) + return r; + if (r > 0) { + if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2)) + return -ENOMEM; + + todo[n_todo++] = TAKE_PTR(path); + } + } + } + } + + strv_uniq(todo); + + r = 0; + STRV_FOREACH(i, todo) { + _cleanup_strv_free_ char **fs = NULL; + const char *rp; + char **j; + + (void) get_files_in_directory(*i, &fs); + + q = rm_rf(*i, REMOVE_ROOT|REMOVE_PHYSICAL); + if (q < 0 && q != -ENOENT && r >= 0) { + r = q; + continue; + } + + STRV_FOREACH(j, fs) { + _cleanup_free_ char *t = NULL; + + t = path_join(*i, *j); + if (!t) + return -ENOMEM; + + unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, t, NULL); + } + + unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, *i, NULL); + + rp = skip_root(&paths, *i); + q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: *i); + if (q < 0) + return q; + } + + q = remove_marked_symlinks(remove_symlinks_to, paths.runtime_config, &paths, false, changes, n_changes); + if (r >= 0) + r = q; + + q = remove_marked_symlinks(remove_symlinks_to, paths.persistent_config, &paths, false, changes, n_changes); + if (r >= 0) + r = q; + + return r; +} + +int unit_file_add_dependency( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + const char *target, + UnitDependency dep, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(install_context_done) InstallContext c = {}; + UnitFileInstallInfo *i, *target_info; + const char *config_path; + char **f; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(target); + + if (!IN_SET(dep, UNIT_WANTS, UNIT_REQUIRES)) + return -EINVAL; + + if (!unit_name_is_valid(target, UNIT_NAME_ANY)) + return -EINVAL; + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + r = install_info_discover_and_check(scope, &c, &paths, target, SEARCH_FOLLOW_CONFIG_SYMLINKS, + &target_info, changes, n_changes); + if (r < 0) + return r; + + assert(target_info->type == UNIT_FILE_TYPE_REGULAR); + + STRV_FOREACH(f, files) { + char ***l; + + r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + if (r < 0) + return r; + + assert(i->type == UNIT_FILE_TYPE_REGULAR); + + /* We didn't actually load anything from the unit + * file, but instead just add in our new symlink to + * create. */ + + if (dep == UNIT_WANTS) + l = &i->wanted_by; + else + l = &i->required_by; + + strv_free(*l); + *l = strv_new(target_info->name); + if (!*l) + return -ENOMEM; + } + + return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_FOLLOW_CONFIG_SYMLINKS, changes, n_changes); +} + +int unit_file_enable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(install_context_done) InstallContext c = {}; + const char *config_path; + UnitFileInstallInfo *i; + char **f; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = config_path_from_flags(&paths, flags); + if (!config_path) + return -ENXIO; + + STRV_FOREACH(f, files) { + r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + if (r < 0) + return r; + + assert(i->type == UNIT_FILE_TYPE_REGULAR); + } + + /* This will return the number of symlink rules that were + supposed to be created, not the ones actually created. This + is useful to determine whether the passed files had any + installation data at all. */ + + return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_LOAD, changes, n_changes); +} + +int unit_file_disable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(install_context_done) InstallContext c = {}; + _cleanup_set_free_free_ Set *remove_symlinks_to = NULL; + const char *config_path; + char **i; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = config_path_from_flags(&paths, flags); + if (!config_path) + return -ENXIO; + + STRV_FOREACH(i, files) { + if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) + return -EINVAL; + + r = install_info_add(&c, *i, NULL, false, NULL); + if (r < 0) + return r; + } + + r = install_context_mark_for_removal(scope, &c, &paths, &remove_symlinks_to, config_path, changes, n_changes); + if (r < 0) + return r; + + return remove_marked_symlinks(remove_symlinks_to, config_path, &paths, !!(flags & UNIT_FILE_DRY_RUN), changes, n_changes); +} + +int unit_file_reenable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + char **n; + int r; + size_t l, i; + + /* First, we invoke the disable command with only the basename... */ + l = strv_length(files); + n = newa(char*, l+1); + for (i = 0; i < l; i++) + n[i] = basename(files[i]); + n[i] = NULL; + + r = unit_file_disable(scope, flags, root_dir, n, changes, n_changes); + if (r < 0) + return r; + + /* But the enable command with the full name */ + return unit_file_enable(scope, flags, root_dir, files, changes, n_changes); +} + +int unit_file_set_default( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + const char *name, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(install_context_done) InstallContext c = {}; + UnitFileInstallInfo *i; + const char *new_path; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(name); + + if (unit_name_to_type(name) != UNIT_TARGET) /* this also validates the name */ + return -EINVAL; + if (streq(name, SPECIAL_DEFAULT_TARGET)) + return -EINVAL; + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + r = install_info_discover_and_check(scope, &c, &paths, name, 0, &i, changes, n_changes); + if (r < 0) + return r; + + new_path = strjoina(paths.persistent_config, "/" SPECIAL_DEFAULT_TARGET); + return create_symlink(&paths, i->path, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes); +} + +int unit_file_get_default( + UnitFileScope scope, + const char *root_dir, + char **name) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(install_context_done) InstallContext c = {}; + UnitFileInstallInfo *i; + char *n; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(name); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + r = install_info_discover(scope, &c, &paths, SPECIAL_DEFAULT_TARGET, SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, NULL, NULL); + if (r < 0) + return r; + r = install_info_may_process(i, &paths, NULL, 0); + if (r < 0) + return r; + + n = strdup(i->name); + if (!n) + return -ENOMEM; + + *name = n; + return 0; +} + +int unit_file_lookup_state( + UnitFileScope scope, + const LookupPaths *paths, + const char *name, + UnitFileState *ret) { + + _cleanup_(install_context_done) InstallContext c = {}; + UnitFileInstallInfo *i; + UnitFileState state; + int r; + + assert(paths); + assert(name); + + if (!unit_name_is_valid(name, UNIT_NAME_ANY)) + return -EINVAL; + + r = install_info_discover(scope, &c, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, NULL, NULL); + if (r < 0) + return log_debug_errno(r, "Failed to discover unit %s: %m", name); + + assert(IN_SET(i->type, UNIT_FILE_TYPE_REGULAR, UNIT_FILE_TYPE_MASKED)); + log_debug("Found unit %s at %s (%s)", name, strna(i->path), + i->type == UNIT_FILE_TYPE_REGULAR ? "regular file" : "mask"); + + /* Shortcut things, if the caller just wants to know if this unit exists. */ + if (!ret) + return 0; + + switch (i->type) { + + case UNIT_FILE_TYPE_MASKED: + r = path_is_runtime(paths, i->path, true); + if (r < 0) + return r; + + state = r > 0 ? UNIT_FILE_MASKED_RUNTIME : UNIT_FILE_MASKED; + break; + + case UNIT_FILE_TYPE_REGULAR: + /* Check if the name we were querying is actually an alias */ + if (!streq(name, basename(i->path)) && !unit_name_is_valid(i->name, UNIT_NAME_INSTANCE)) { + state = UNIT_FILE_ALIAS; + break; + } + + r = path_is_generator(paths, i->path); + if (r < 0) + return r; + if (r > 0) { + state = UNIT_FILE_GENERATED; + break; + } + + r = path_is_transient(paths, i->path); + if (r < 0) + return r; + if (r > 0) { + state = UNIT_FILE_TRANSIENT; + break; + } + + /* Check if any of the Alias= symlinks have been created. + * We ignore other aliases, and only check those that would + * be created by systemctl enable for this unit. */ + r = find_symlinks_in_scope(scope, paths, i, true, &state); + if (r < 0) + return r; + if (r > 0) + break; + + /* Check if the file is known under other names. If it is, + * it might be in use. Report that as UNIT_FILE_INDIRECT. */ + r = find_symlinks_in_scope(scope, paths, i, false, &state); + if (r < 0) + return r; + if (r > 0) + state = UNIT_FILE_INDIRECT; + else { + if (unit_file_install_info_has_rules(i)) + state = UNIT_FILE_DISABLED; + else if (unit_file_install_info_has_also(i)) + state = UNIT_FILE_INDIRECT; + else + state = UNIT_FILE_STATIC; + } + + break; + + default: + assert_not_reached("Unexpected unit file type."); + } + + *ret = state; + return 0; +} + +int unit_file_get_state( + UnitFileScope scope, + const char *root_dir, + const char *name, + UnitFileState *ret) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(name); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + return unit_file_lookup_state(scope, &paths, name, ret); +} + +int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name) { + _cleanup_(install_context_done) InstallContext c = {}; + int r; + + assert(paths); + assert(name); + + if (!unit_name_is_valid(name, UNIT_NAME_ANY)) + return -EINVAL; + + r = install_info_discover(scope, &c, paths, name, 0, NULL, NULL, NULL); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + + return 1; +} + +static int split_pattern_into_name_and_instances(const char *pattern, char **out_unit_name, char ***out_instances) { + _cleanup_strv_free_ char **instances = NULL; + _cleanup_free_ char *unit_name = NULL; + int r; + + assert(pattern); + assert(out_instances); + assert(out_unit_name); + + r = extract_first_word(&pattern, &unit_name, NULL, EXTRACT_RETAIN_ESCAPE); + if (r < 0) + return r; + + /* We handle the instances logic when unit name is extracted */ + if (pattern) { + /* We only create instances when a rule of templated unit + * is seen. A rule like enable foo@.service a b c will + * result in an array of (a, b, c) as instance names */ + if (!unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) + return -EINVAL; + + instances = strv_split(pattern, WHITESPACE); + if (!instances) + return -ENOMEM; + + *out_instances = TAKE_PTR(instances); + } + + *out_unit_name = TAKE_PTR(unit_name); + + return 0; +} + +static int presets_find_config(UnitFileScope scope, const char *root_dir, char ***files) { + static const char* const system_dirs[] = {CONF_PATHS("systemd/system-preset"), NULL}; + static const char* const user_dirs[] = {CONF_PATHS_USR("systemd/user-preset"), NULL}; + const char* const* dirs; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + + if (scope == UNIT_FILE_SYSTEM) + dirs = system_dirs; + else if (IN_SET(scope, UNIT_FILE_GLOBAL, UNIT_FILE_USER)) + dirs = user_dirs; + else + assert_not_reached("Invalid unit file scope"); + + return conf_files_list_strv(files, ".preset", root_dir, 0, dirs); +} + +static int read_presets(UnitFileScope scope, const char *root_dir, UnitFilePresets *presets) { + _cleanup_(unit_file_presets_freep) UnitFilePresets ps = {}; + size_t n_allocated = 0; + _cleanup_strv_free_ char **files = NULL; + char **p; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(presets); + + r = presets_find_config(scope, root_dir, &files); + if (r < 0) + return r; + + STRV_FOREACH(p, files) { + _cleanup_fclose_ FILE *f; + int n = 0; + + f = fopen(*p, "re"); + if (!f) { + if (errno == ENOENT) + continue; + + return -errno; + } + + for (;;) { + _cleanup_free_ char *line = NULL; + UnitFilePresetRule rule = {}; + const char *parameter; + char *l; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + l = strstrip(line); + n++; + + if (isempty(l)) + continue; + if (strchr(COMMENTS, *l)) + continue; + + parameter = first_word(l, "enable"); + if (parameter) { + char *unit_name; + char **instances = NULL; + + /* Unit_name will remain the same as parameter when no instances are specified */ + r = split_pattern_into_name_and_instances(parameter, &unit_name, &instances); + if (r < 0) { + log_syntax(NULL, LOG_WARNING, *p, n, r, "Couldn't parse line '%s'. Ignoring.", line); + continue; + } + + rule = (UnitFilePresetRule) { + .pattern = unit_name, + .action = PRESET_ENABLE, + .instances = instances, + }; + } + + parameter = first_word(l, "disable"); + if (parameter) { + char *pattern; + + pattern = strdup(parameter); + if (!pattern) + return -ENOMEM; + + rule = (UnitFilePresetRule) { + .pattern = pattern, + .action = PRESET_DISABLE, + }; + } + + if (rule.action) { + if (!GREEDY_REALLOC(ps.rules, n_allocated, ps.n_rules + 1)) + return -ENOMEM; + + ps.rules[ps.n_rules++] = rule; + continue; + } + + log_syntax(NULL, LOG_WARNING, *p, n, 0, "Couldn't parse line '%s'. Ignoring.", line); + } + } + + ps.initialized = true; + *presets = ps; + ps = (UnitFilePresets){}; + + return 0; +} + +static int pattern_match_multiple_instances( + const UnitFilePresetRule rule, + const char *unit_name, + char ***ret) { + + _cleanup_free_ char *templated_name = NULL; + int r; + + /* If no ret is needed or the rule itself does not have instances + * initialized, we return not matching */ + if (!ret || !rule.instances) + return 0; + + r = unit_name_template(unit_name, &templated_name); + if (r < 0) + return r; + if (!streq(rule.pattern, templated_name)) + return 0; + + /* Compose a list of specified instances when unit name is a template */ + if (unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) { + _cleanup_strv_free_ char **out_strv = NULL; + + char **iter; + STRV_FOREACH(iter, rule.instances) { + _cleanup_free_ char *name = NULL; + + r = unit_name_replace_instance(unit_name, *iter, &name); + if (r < 0) + return r; + + r = strv_consume(&out_strv, TAKE_PTR(name)); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(out_strv); + return 1; + } else { + /* We now know the input unit name is an instance name */ + _cleanup_free_ char *instance_name = NULL; + + r = unit_name_to_instance(unit_name, &instance_name); + if (r < 0) + return r; + + if (strv_find(rule.instances, instance_name)) + return 1; + } + return 0; +} + +static int query_presets(const char *name, const UnitFilePresets *presets, char ***instance_name_list) { + PresetAction action = PRESET_UNKNOWN; + + if (!unit_name_is_valid(name, UNIT_NAME_ANY)) + return -EINVAL; + + for (size_t i = 0; i < presets->n_rules; i++) + if (pattern_match_multiple_instances(presets->rules[i], name, instance_name_list) > 0 || + fnmatch(presets->rules[i].pattern, name, FNM_NOESCAPE) == 0) { + action = presets->rules[i].action; + break; + } + + switch (action) { + case PRESET_UNKNOWN: + log_debug("Preset files don't specify rule for %s. Enabling.", name); + return 1; + case PRESET_ENABLE: + if (instance_name_list && *instance_name_list) { + char **s; + STRV_FOREACH(s, *instance_name_list) + log_debug("Preset files say enable %s.", *s); + } else + log_debug("Preset files say enable %s.", name); + return 1; + case PRESET_DISABLE: + log_debug("Preset files say disable %s.", name); + return 0; + default: + assert_not_reached("invalid preset action"); + } +} + +int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name, UnitFilePresets *cached) { + _cleanup_(unit_file_presets_freep) UnitFilePresets tmp = {}; + int r; + + if (!cached) + cached = &tmp; + if (!cached->initialized) { + r = read_presets(scope, root_dir, cached); + if (r < 0) + return r; + } + + return query_presets(name, cached, NULL); +} + +static int execute_preset( + UnitFileScope scope, + InstallContext *plus, + InstallContext *minus, + const LookupPaths *paths, + const char *config_path, + char **files, + UnitFilePresetMode mode, + bool force, + UnitFileChange **changes, + size_t *n_changes) { + + int r; + + assert(plus); + assert(minus); + assert(paths); + assert(config_path); + + if (mode != UNIT_FILE_PRESET_ENABLE_ONLY) { + _cleanup_set_free_free_ Set *remove_symlinks_to = NULL; + + r = install_context_mark_for_removal(scope, minus, paths, &remove_symlinks_to, config_path, changes, n_changes); + if (r < 0) + return r; + + r = remove_marked_symlinks(remove_symlinks_to, config_path, paths, false, changes, n_changes); + } else + r = 0; + + if (mode != UNIT_FILE_PRESET_DISABLE_ONLY) { + int q; + + /* Returns number of symlinks that where supposed to be installed. */ + q = install_context_apply(scope, plus, paths, config_path, force, SEARCH_LOAD, changes, n_changes); + if (r >= 0) { + if (q < 0) + r = q; + else + r += q; + } + } + + return r; +} + +static int preset_prepare_one( + UnitFileScope scope, + InstallContext *plus, + InstallContext *minus, + LookupPaths *paths, + const char *name, + const UnitFilePresets *presets, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(install_context_done) InstallContext tmp = {}; + _cleanup_strv_free_ char **instance_name_list = NULL; + UnitFileInstallInfo *i; + int r; + + if (install_info_find(plus, name) || install_info_find(minus, name)) + return 0; + + r = install_info_discover(scope, &tmp, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + if (r < 0) + return r; + if (!streq(name, i->name)) { + log_debug("Skipping %s because it is an alias for %s.", name, i->name); + return 0; + } + + r = query_presets(name, presets, &instance_name_list); + if (r < 0) + return r; + + if (r > 0) { + if (instance_name_list) { + char **s; + STRV_FOREACH(s, instance_name_list) { + r = install_info_discover_and_check(scope, plus, paths, *s, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + if (r < 0) + return r; + } + } else { + r = install_info_discover_and_check(scope, plus, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + if (r < 0) + return r; + } + + } else + r = install_info_discover(scope, minus, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + + return r; +} + +int unit_file_preset( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFilePresetMode mode, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(install_context_done) InstallContext plus = {}, minus = {}; + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(unit_file_presets_freep) UnitFilePresets presets = {}; + const char *config_path; + char **i; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(mode < _UNIT_FILE_PRESET_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + r = read_presets(scope, root_dir, &presets); + if (r < 0) + return r; + + STRV_FOREACH(i, files) { + r = preset_prepare_one(scope, &plus, &minus, &paths, *i, &presets, changes, n_changes); + if (r < 0) + return r; + } + + return execute_preset(scope, &plus, &minus, &paths, config_path, files, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes); +} + +int unit_file_preset_all( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + UnitFilePresetMode mode, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(install_context_done) InstallContext plus = {}, minus = {}; + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(unit_file_presets_freep) UnitFilePresets presets = {}; + const char *config_path = NULL; + char **i; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(mode < _UNIT_FILE_PRESET_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + r = read_presets(scope, root_dir, &presets); + if (r < 0) + return r; + + STRV_FOREACH(i, paths.search_path) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + + d = opendir(*i); + if (!d) { + if (errno == ENOENT) + continue; + + return -errno; + } + + FOREACH_DIRENT(de, d, return -errno) { + + if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY)) + continue; + + dirent_ensure_type(d, de); + + if (!IN_SET(de->d_type, DT_LNK, DT_REG)) + continue; + + /* we don't pass changes[] in, because we want to handle errors on our own */ + r = preset_prepare_one(scope, &plus, &minus, &paths, de->d_name, &presets, NULL, 0); + if (r == -ERFKILL) + r = unit_file_changes_add(changes, n_changes, + UNIT_FILE_IS_MASKED, de->d_name, NULL); + else if (r == -ENOLINK) + r = unit_file_changes_add(changes, n_changes, + UNIT_FILE_IS_DANGLING, de->d_name, NULL); + else if (r == -EADDRNOTAVAIL) /* Ignore generated/transient units when applying preset */ + continue; + if (r < 0) + return r; + } + } + + return execute_preset(scope, &plus, &minus, &paths, config_path, NULL, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes); +} + +static void unit_file_list_free_one(UnitFileList *f) { + if (!f) + return; + + free(f->path); + free(f); +} + +Hashmap* unit_file_list_free(Hashmap *h) { + return hashmap_free_with_destructor(h, unit_file_list_free_one); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(UnitFileList*, unit_file_list_free_one); + +int unit_file_get_list( + UnitFileScope scope, + const char *root_dir, + Hashmap *h, + char **states, + char **patterns) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + char **dirname; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(h); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + STRV_FOREACH(dirname, paths.search_path) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + + d = opendir(*dirname); + if (!d) { + if (errno == ENOENT) + continue; + if (IN_SET(errno, ENOTDIR, EACCES)) { + log_debug_errno(errno, "Failed to open \"%s\": %m", *dirname); + continue; + } + + return -errno; + } + + FOREACH_DIRENT(de, d, return -errno) { + _cleanup_(unit_file_list_free_onep) UnitFileList *f = NULL; + + if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY)) + continue; + + if (!strv_fnmatch_or_empty(patterns, de->d_name, FNM_NOESCAPE)) + continue; + + if (hashmap_get(h, de->d_name)) + continue; + + dirent_ensure_type(d, de); + + if (!IN_SET(de->d_type, DT_LNK, DT_REG)) + continue; + + f = new0(UnitFileList, 1); + if (!f) + return -ENOMEM; + + f->path = path_make_absolute(de->d_name, *dirname); + if (!f->path) + return -ENOMEM; + + r = unit_file_lookup_state(scope, &paths, de->d_name, &f->state); + if (r < 0) + f->state = UNIT_FILE_BAD; + + if (!strv_isempty(states) && + !strv_contains(states, unit_file_state_to_string(f->state))) + continue; + + r = hashmap_put(h, basename(f->path), f); + if (r < 0) + return r; + + f = NULL; /* prevent cleanup */ + } + } + + return 0; +} + +static const char* const unit_file_state_table[_UNIT_FILE_STATE_MAX] = { + [UNIT_FILE_ENABLED] = "enabled", + [UNIT_FILE_ENABLED_RUNTIME] = "enabled-runtime", + [UNIT_FILE_LINKED] = "linked", + [UNIT_FILE_LINKED_RUNTIME] = "linked-runtime", + [UNIT_FILE_ALIAS] = "alias", + [UNIT_FILE_MASKED] = "masked", + [UNIT_FILE_MASKED_RUNTIME] = "masked-runtime", + [UNIT_FILE_STATIC] = "static", + [UNIT_FILE_DISABLED] = "disabled", + [UNIT_FILE_INDIRECT] = "indirect", + [UNIT_FILE_GENERATED] = "generated", + [UNIT_FILE_TRANSIENT] = "transient", + [UNIT_FILE_BAD] = "bad", +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_file_state, UnitFileState); + +static const char* const unit_file_change_type_table[_UNIT_FILE_CHANGE_TYPE_MAX] = { + [UNIT_FILE_SYMLINK] = "symlink", + [UNIT_FILE_UNLINK] = "unlink", + [UNIT_FILE_IS_MASKED] = "masked", + [UNIT_FILE_IS_DANGLING] = "dangling", +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_file_change_type, UnitFileChangeType); + +static const char* const unit_file_preset_mode_table[_UNIT_FILE_PRESET_MAX] = { + [UNIT_FILE_PRESET_FULL] = "full", + [UNIT_FILE_PRESET_ENABLE_ONLY] = "enable-only", + [UNIT_FILE_PRESET_DISABLE_ONLY] = "disable-only", +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_file_preset_mode, UnitFilePresetMode); diff --git a/src/shared/install.h b/src/shared/install.h new file mode 100644 index 0000000..84bf1f5 --- /dev/null +++ b/src/shared/install.h @@ -0,0 +1,213 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +typedef enum UnitFilePresetMode UnitFilePresetMode; +typedef enum UnitFileChangeType UnitFileChangeType; +typedef enum UnitFileFlags UnitFileFlags; +typedef enum UnitFileType UnitFileType; +typedef struct UnitFileChange UnitFileChange; +typedef struct UnitFileList UnitFileList; +typedef struct UnitFileInstallInfo UnitFileInstallInfo; + +#include <stdbool.h> + +#include "hashmap.h" +#include "macro.h" +#include "path-lookup.h" +#include "strv.h" +#include "unit-name.h" + +enum UnitFilePresetMode { + UNIT_FILE_PRESET_FULL, + UNIT_FILE_PRESET_ENABLE_ONLY, + UNIT_FILE_PRESET_DISABLE_ONLY, + _UNIT_FILE_PRESET_MAX, + _UNIT_FILE_PRESET_INVALID = -1 +}; + +enum UnitFileChangeType { + UNIT_FILE_SYMLINK, + UNIT_FILE_UNLINK, + UNIT_FILE_IS_MASKED, + UNIT_FILE_IS_DANGLING, + _UNIT_FILE_CHANGE_TYPE_MAX, + _UNIT_FILE_CHANGE_TYPE_INVALID = INT_MIN +}; + +enum UnitFileFlags { + UNIT_FILE_RUNTIME = 1 << 0, /* Public API via DBUS, do not change */ + UNIT_FILE_FORCE = 1 << 1, /* Public API via DBUS, do not change */ + UNIT_FILE_PORTABLE = 1 << 2, /* Public API via DBUS, do not change */ + UNIT_FILE_DRY_RUN = 1 << 3, + _UNIT_FILE_FLAGS_MASK_PUBLIC = UNIT_FILE_RUNTIME|UNIT_FILE_PORTABLE|UNIT_FILE_FORCE, +}; + +/* type can either one of the UnitFileChangeTypes listed above, or a negative error. + * If source is specified, it should be the contents of the path symlink. + * In case of an error, source should be the existing symlink contents or NULL + */ +struct UnitFileChange { + int type; /* UnitFileChangeType or bust */ + char *path; + char *source; +}; + +static inline bool unit_file_changes_have_modification(const UnitFileChange* changes, size_t n_changes) { + size_t i; + for (i = 0; i < n_changes; i++) + if (IN_SET(changes[i].type, UNIT_FILE_SYMLINK, UNIT_FILE_UNLINK)) + return true; + return false; +} + +struct UnitFileList { + char *path; + UnitFileState state; +}; + +enum UnitFileType { + UNIT_FILE_TYPE_REGULAR, + UNIT_FILE_TYPE_SYMLINK, + UNIT_FILE_TYPE_MASKED, + _UNIT_FILE_TYPE_MAX, + _UNIT_FILE_TYPE_INVALID = -1, +}; + +struct UnitFileInstallInfo { + char *name; + char *path; + + char **aliases; + char **wanted_by; + char **required_by; + char **also; + + char *default_instance; + char *symlink_target; + + UnitFileType type; + bool auxiliary; +}; + +int unit_file_enable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_disable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_reenable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_preset( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFilePresetMode mode, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_preset_all( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + UnitFilePresetMode mode, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_mask( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_unmask( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_link( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_revert( + UnitFileScope scope, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_set_default( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + const char *file, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_get_default( + UnitFileScope scope, + const char *root_dir, + char **name); +int unit_file_add_dependency( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + const char *target, + UnitDependency dep, + UnitFileChange **changes, + size_t *n_changes); + +int unit_file_lookup_state( + UnitFileScope scope, + const LookupPaths *paths, + const char *name, + UnitFileState *ret); + +int unit_file_get_state(UnitFileScope scope, const char *root_dir, const char *filename, UnitFileState *ret); +int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name); + +int unit_file_get_list(UnitFileScope scope, const char *root_dir, Hashmap *h, char **states, char **patterns); +Hashmap* unit_file_list_free(Hashmap *h); + +int unit_file_changes_add(UnitFileChange **changes, size_t *n_changes, int type, const char *path, const char *source); +void unit_file_changes_free(UnitFileChange *changes, size_t n_changes); +void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet); + +int unit_file_verify_alias(const UnitFileInstallInfo *i, const char *dst, char **ret_dst); + +typedef struct UnitFilePresetRule UnitFilePresetRule; + +typedef struct { + UnitFilePresetRule *rules; + size_t n_rules; + bool initialized; +} UnitFilePresets; + +void unit_file_presets_freep(UnitFilePresets *p); +int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name, UnitFilePresets *cached); + +const char *unit_file_state_to_string(UnitFileState s) _const_; +UnitFileState unit_file_state_from_string(const char *s) _pure_; +/* from_string conversion is unreliable because of the overlap between -EPERM and -1 for error. */ + +const char *unit_file_change_type_to_string(UnitFileChangeType s) _const_; +UnitFileChangeType unit_file_change_type_from_string(const char *s) _pure_; + +const char *unit_file_preset_mode_to_string(UnitFilePresetMode m) _const_; +UnitFilePresetMode unit_file_preset_mode_from_string(const char *s) _pure_; diff --git a/src/shared/ip-protocol-list.c b/src/shared/ip-protocol-list.c new file mode 100644 index 0000000..0623d5e --- /dev/null +++ b/src/shared/ip-protocol-list.c @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <netinet/in.h> + +#include "alloc-util.h" +#include "ip-protocol-list.h" +#include "macro.h" +#include "parse-util.h" +#include "string-util.h" + +static const struct ip_protocol_name* lookup_ip_protocol(register const char *str, register GPERF_LEN_TYPE len); + +#include "ip-protocol-from-name.h" +#include "ip-protocol-to-name.h" + +const char *ip_protocol_to_name(int id) { + + if (id < 0) + return NULL; + + if ((size_t) id >= ELEMENTSOF(ip_protocol_names)) + return NULL; + + return ip_protocol_names[id]; +} + +int ip_protocol_from_name(const char *name) { + const struct ip_protocol_name *sc; + + assert(name); + + sc = lookup_ip_protocol(name, strlen(name)); + if (!sc) + return -EINVAL; + + return sc->id; +} + +int parse_ip_protocol(const char *s) { + _cleanup_free_ char *str = NULL; + int i, r; + + assert(s); + + if (isempty(s)) + return IPPROTO_IP; + + /* Do not use strdupa() here, as the input string may come from * + * command line or config files. */ + str = strdup(s); + if (!str) + return -ENOMEM; + + i = ip_protocol_from_name(ascii_strlower(str)); + if (i >= 0) + return i; + + r = safe_atoi(str, &i); + if (r < 0) + return r; + + if (!ip_protocol_to_name(i)) + return -EINVAL; + + return i; +} diff --git a/src/shared/ip-protocol-list.h b/src/shared/ip-protocol-list.h new file mode 100644 index 0000000..abe3f5f --- /dev/null +++ b/src/shared/ip-protocol-list.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +const char *ip_protocol_to_name(int id); +int ip_protocol_from_name(const char *name); +int parse_ip_protocol(const char *s); diff --git a/src/shared/ip-protocol-to-name.awk b/src/shared/ip-protocol-to-name.awk new file mode 100644 index 0000000..824f811 --- /dev/null +++ b/src/shared/ip-protocol-to-name.awk @@ -0,0 +1,9 @@ +BEGIN{ + print "static const char* const ip_protocol_names[] = { " +} +!/HOPOPTS/ { + printf " [IPPROTO_%s] = \"%s\",\n", $1, tolower($1) +} +END{ + print "};" +} diff --git a/src/shared/ipvlan-util.c b/src/shared/ipvlan-util.c new file mode 100644 index 0000000..1f2e2ff --- /dev/null +++ b/src/shared/ipvlan-util.c @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <net/if.h> + +#include "ipvlan-util.h" +#include "string-table.h" + +static const char* const ipvlan_mode_table[_NETDEV_IPVLAN_MODE_MAX] = { + [NETDEV_IPVLAN_MODE_L2] = "L2", + [NETDEV_IPVLAN_MODE_L3] = "L3", + [NETDEV_IPVLAN_MODE_L3S] = "L3S", +}; + +DEFINE_STRING_TABLE_LOOKUP(ipvlan_mode, IPVlanMode); + +static const char* const ipvlan_flags_table[_NETDEV_IPVLAN_FLAGS_MAX] = { + [NETDEV_IPVLAN_FLAGS_BRIGDE] = "bridge", + [NETDEV_IPVLAN_FLAGS_PRIVATE] = "private", + [NETDEV_IPVLAN_FLAGS_VEPA] = "vepa", +}; + +DEFINE_STRING_TABLE_LOOKUP(ipvlan_flags, IPVlanFlags); diff --git a/src/shared/ipvlan-util.h b/src/shared/ipvlan-util.h new file mode 100644 index 0000000..90f755b --- /dev/null +++ b/src/shared/ipvlan-util.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <netinet/in.h> +#include <linux/if_link.h> + +#include "macro.h" + +typedef enum IPVlanMode { + NETDEV_IPVLAN_MODE_L2 = IPVLAN_MODE_L2, + NETDEV_IPVLAN_MODE_L3 = IPVLAN_MODE_L3, + NETDEV_IPVLAN_MODE_L3S = IPVLAN_MODE_L3S, + _NETDEV_IPVLAN_MODE_MAX, + _NETDEV_IPVLAN_MODE_INVALID = -1 +} IPVlanMode; + +typedef enum IPVlanFlags { + NETDEV_IPVLAN_FLAGS_BRIGDE, + NETDEV_IPVLAN_FLAGS_PRIVATE = IPVLAN_F_PRIVATE, + NETDEV_IPVLAN_FLAGS_VEPA = IPVLAN_F_VEPA, + _NETDEV_IPVLAN_FLAGS_MAX, + _NETDEV_IPVLAN_FLAGS_INVALID = -1 +} IPVlanFlags; + +const char *ipvlan_mode_to_string(IPVlanMode d) _const_; +IPVlanMode ipvlan_mode_from_string(const char *d) _pure_; + +const char *ipvlan_flags_to_string(IPVlanFlags d) _const_; +IPVlanFlags ipvlan_flags_from_string(const char *d) _pure_; diff --git a/src/shared/journal-importer.c b/src/shared/journal-importer.c new file mode 100644 index 0000000..b2785f0 --- /dev/null +++ b/src/shared/journal-importer.c @@ -0,0 +1,483 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "errno-util.h" +#include "escape.h" +#include "fd-util.h" +#include "io-util.h" +#include "journal-file.h" +#include "journal-importer.h" +#include "journal-util.h" +#include "parse-util.h" +#include "string-util.h" +#include "unaligned.h" + +enum { + IMPORTER_STATE_LINE = 0, /* waiting to read, or reading line */ + IMPORTER_STATE_DATA_START, /* reading binary data header */ + IMPORTER_STATE_DATA, /* reading binary data */ + IMPORTER_STATE_DATA_FINISH, /* expecting newline */ + IMPORTER_STATE_EOF, /* done */ +}; + +void journal_importer_cleanup(JournalImporter *imp) { + if (imp->fd >= 0 && !imp->passive_fd) { + log_debug("Closing %s (fd=%d)", imp->name ?: "importer", imp->fd); + safe_close(imp->fd); + } + + free(imp->name); + free(imp->buf); + iovw_free_contents(&imp->iovw, false); +} + +static char* realloc_buffer(JournalImporter *imp, size_t size) { + char *b, *old = imp->buf; + + b = GREEDY_REALLOC(imp->buf, imp->size, size); + if (!b) + return NULL; + + iovw_rebase(&imp->iovw, old, imp->buf); + + return b; +} + +static int get_line(JournalImporter *imp, char **line, size_t *size) { + ssize_t n; + char *c = NULL; + + assert(imp); + assert(imp->state == IMPORTER_STATE_LINE); + assert(imp->offset <= imp->filled); + assert(imp->filled <= imp->size); + assert(!imp->buf || imp->size > 0); + assert(imp->fd >= 0); + + for (;;) { + if (imp->buf) { + size_t start = MAX(imp->scanned, imp->offset); + + c = memchr(imp->buf + start, '\n', + imp->filled - start); + if (c) + break; + } + + imp->scanned = imp->filled; + if (imp->scanned >= DATA_SIZE_MAX) + return log_error_errno(SYNTHETIC_ERRNO(ENOBUFS), + "Entry is bigger than %u bytes.", + DATA_SIZE_MAX); + + if (imp->passive_fd) + /* we have to wait for some data to come to us */ + return -EAGAIN; + + /* We know that imp->filled is at most DATA_SIZE_MAX, so if + we reallocate it, we'll increase the size at least a bit. */ + assert_cc(DATA_SIZE_MAX < ENTRY_SIZE_MAX); + if (imp->size - imp->filled < LINE_CHUNK && + !realloc_buffer(imp, MIN(imp->filled + LINE_CHUNK, ENTRY_SIZE_MAX))) + return log_oom(); + + assert(imp->buf); + assert(imp->size - imp->filled >= LINE_CHUNK || + imp->size == ENTRY_SIZE_MAX); + + n = read(imp->fd, + imp->buf + imp->filled, + imp->size - imp->filled); + if (n < 0) { + if (errno != EAGAIN) + log_error_errno(errno, "read(%d, ..., %zu): %m", + imp->fd, + imp->size - imp->filled); + return -errno; + } else if (n == 0) + return 0; + + imp->filled += n; + } + + *line = imp->buf + imp->offset; + *size = c + 1 - imp->buf - imp->offset; + imp->offset += *size; + + return 1; +} + +static int fill_fixed_size(JournalImporter *imp, void **data, size_t size) { + + assert(imp); + assert(IN_SET(imp->state, IMPORTER_STATE_DATA_START, IMPORTER_STATE_DATA, IMPORTER_STATE_DATA_FINISH)); + assert(size <= DATA_SIZE_MAX); + assert(imp->offset <= imp->filled); + assert(imp->filled <= imp->size); + assert(imp->buf || imp->size == 0); + assert(!imp->buf || imp->size > 0); + assert(imp->fd >= 0); + assert(data); + + while (imp->filled - imp->offset < size) { + int n; + + if (imp->passive_fd) + /* we have to wait for some data to come to us */ + return -EAGAIN; + + if (!realloc_buffer(imp, imp->offset + size)) + return log_oom(); + + n = read(imp->fd, imp->buf + imp->filled, + imp->size - imp->filled); + if (n < 0) { + if (errno != EAGAIN) + log_error_errno(errno, "read(%d, ..., %zu): %m", imp->fd, + imp->size - imp->filled); + return -errno; + } else if (n == 0) + return 0; + + imp->filled += n; + } + + *data = imp->buf + imp->offset; + imp->offset += size; + + return 1; +} + +static int get_data_size(JournalImporter *imp) { + int r; + void *data; + + assert(imp); + assert(imp->state == IMPORTER_STATE_DATA_START); + assert(imp->data_size == 0); + + r = fill_fixed_size(imp, &data, sizeof(uint64_t)); + if (r <= 0) + return r; + + imp->data_size = unaligned_read_le64(data); + if (imp->data_size > DATA_SIZE_MAX) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Stream declares field with size %zu > DATA_SIZE_MAX = %u", + imp->data_size, DATA_SIZE_MAX); + if (imp->data_size == 0) + log_warning("Binary field with zero length"); + + return 1; +} + +static int get_data_data(JournalImporter *imp, void **data) { + int r; + + assert(imp); + assert(data); + assert(imp->state == IMPORTER_STATE_DATA); + + r = fill_fixed_size(imp, data, imp->data_size); + if (r <= 0) + return r; + + return 1; +} + +static int get_data_newline(JournalImporter *imp) { + int r; + char *data; + + assert(imp); + assert(imp->state == IMPORTER_STATE_DATA_FINISH); + + r = fill_fixed_size(imp, (void**) &data, 1); + if (r <= 0) + return r; + + assert(data); + if (*data != '\n') { + char buf[4]; + int l; + + l = cescape_char(*data, buf); + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Expected newline, got '%.*s'", l, buf); + } + + return 1; +} + +static int process_special_field(JournalImporter *imp, char *line) { + const char *value; + char buf[CELLESCAPE_DEFAULT_LENGTH]; + int r; + + assert(line); + + value = startswith(line, "__CURSOR="); + if (value) + /* ignore __CURSOR */ + return 1; + + value = startswith(line, "__REALTIME_TIMESTAMP="); + if (value) { + uint64_t x; + + r = safe_atou64(value, &x); + if (r < 0) + return log_warning_errno(r, "Failed to parse __REALTIME_TIMESTAMP '%s': %m", + cellescape(buf, sizeof buf, value)); + else if (!VALID_REALTIME(x)) { + log_warning("__REALTIME_TIMESTAMP out of range, ignoring: %"PRIu64, x); + return -ERANGE; + } + + imp->ts.realtime = x; + return 1; + } + + value = startswith(line, "__MONOTONIC_TIMESTAMP="); + if (value) { + uint64_t x; + + r = safe_atou64(value, &x); + if (r < 0) + return log_warning_errno(r, "Failed to parse __MONOTONIC_TIMESTAMP '%s': %m", + cellescape(buf, sizeof buf, value)); + else if (!VALID_MONOTONIC(x)) { + log_warning("__MONOTONIC_TIMESTAMP out of range, ignoring: %"PRIu64, x); + return -ERANGE; + } + + imp->ts.monotonic = x; + return 1; + } + + /* Just a single underline, but it needs special treatment too. */ + value = startswith(line, "_BOOT_ID="); + if (value) { + r = sd_id128_from_string(value, &imp->boot_id); + if (r < 0) + return log_warning_errno(r, "Failed to parse _BOOT_ID '%s': %m", + cellescape(buf, sizeof buf, value)); + + /* store the field in the usual fashion too */ + return 0; + } + + value = startswith(line, "__"); + if (value) { + log_notice("Unknown dunder line __%s, ignoring.", cellescape(buf, sizeof buf, value)); + return 1; + } + + /* no dunder */ + return 0; +} + +int journal_importer_process_data(JournalImporter *imp) { + int r; + + switch(imp->state) { + case IMPORTER_STATE_LINE: { + char *line, *sep; + size_t n = 0; + + assert(imp->data_size == 0); + + r = get_line(imp, &line, &n); + if (r < 0) + return r; + if (r == 0) { + imp->state = IMPORTER_STATE_EOF; + return 0; + } + assert(n > 0); + assert(line[n-1] == '\n'); + + if (n == 1) { + log_trace("Received empty line, event is ready"); + return 1; + } + + /* MESSAGE=xxx\n + or + COREDUMP\n + LLLLLLLL0011223344...\n + */ + sep = memchr(line, '=', n); + if (sep) { + /* chomp newline */ + n--; + + if (!journal_field_valid(line, sep - line, true)) { + char buf[64], *t; + + t = strndupa(line, sep - line); + log_debug("Ignoring invalid field: \"%s\"", + cellescape(buf, sizeof buf, t)); + + return 0; + } + + line[n] = '\0'; + r = process_special_field(imp, line); + if (r != 0) + return r < 0 ? r : 0; + + r = iovw_put(&imp->iovw, line, n); + if (r < 0) + return r; + } else { + if (!journal_field_valid(line, n - 1, true)) { + char buf[64], *t; + + t = strndupa(line, n - 1); + log_debug("Ignoring invalid field: \"%s\"", + cellescape(buf, sizeof buf, t)); + + return 0; + } + + /* replace \n with = */ + line[n-1] = '='; + + imp->field_len = n; + imp->state = IMPORTER_STATE_DATA_START; + + /* we cannot put the field in iovec until we have all data */ + } + + log_trace("Received: %.*s (%s)", (int) n, line, sep ? "text" : "binary"); + + return 0; /* continue */ + } + + case IMPORTER_STATE_DATA_START: + assert(imp->data_size == 0); + + r = get_data_size(imp); + // log_debug("get_data_size() -> %d", r); + if (r < 0) + return r; + if (r == 0) { + imp->state = IMPORTER_STATE_EOF; + return 0; + } + + imp->state = imp->data_size > 0 ? + IMPORTER_STATE_DATA : IMPORTER_STATE_DATA_FINISH; + + return 0; /* continue */ + + case IMPORTER_STATE_DATA: { + void *data; + char *field; + + assert(imp->data_size > 0); + + r = get_data_data(imp, &data); + // log_debug("get_data_data() -> %d", r); + if (r < 0) + return r; + if (r == 0) { + imp->state = IMPORTER_STATE_EOF; + return 0; + } + + assert(data); + + field = (char*) data - sizeof(uint64_t) - imp->field_len; + memmove(field + sizeof(uint64_t), field, imp->field_len); + + r = iovw_put(&imp->iovw, field + sizeof(uint64_t), imp->field_len + imp->data_size); + if (r < 0) + return r; + + imp->state = IMPORTER_STATE_DATA_FINISH; + + return 0; /* continue */ + } + + case IMPORTER_STATE_DATA_FINISH: + r = get_data_newline(imp); + // log_debug("get_data_newline() -> %d", r); + if (r < 0) + return r; + if (r == 0) { + imp->state = IMPORTER_STATE_EOF; + return 0; + } + + imp->data_size = 0; + imp->state = IMPORTER_STATE_LINE; + + return 0; /* continue */ + default: + assert_not_reached("wtf?"); + } +} + +int journal_importer_push_data(JournalImporter *imp, const char *data, size_t size) { + assert(imp); + assert(imp->state != IMPORTER_STATE_EOF); + + if (!realloc_buffer(imp, imp->filled + size)) + return log_error_errno(SYNTHETIC_ERRNO(ENOMEM), + "Failed to store received data of size %zu " + "(in addition to existing %zu bytes with %zu filled): %s", + size, imp->size, imp->filled, + strerror_safe(ENOMEM)); + + memcpy(imp->buf + imp->filled, data, size); + imp->filled += size; + + return 0; +} + +void journal_importer_drop_iovw(JournalImporter *imp) { + size_t remain, target; + + /* This function drops processed data that along with the iovw that points at it */ + + iovw_free_contents(&imp->iovw, false); + + /* possibly reset buffer position */ + remain = imp->filled - imp->offset; + + if (remain == 0) /* no brainer */ + imp->offset = imp->scanned = imp->filled = 0; + else if (imp->offset > imp->size - imp->filled && + imp->offset > remain) { + memcpy(imp->buf, imp->buf + imp->offset, remain); + imp->offset = imp->scanned = 0; + imp->filled = remain; + } + + target = imp->size; + while (target > 16 * LINE_CHUNK && imp->filled < target / 2) + target /= 2; + if (target < imp->size) { + char *tmp; + + tmp = realloc(imp->buf, target); + if (!tmp) + log_warning("Failed to reallocate buffer to (smaller) size %zu", + target); + else { + log_debug("Reallocated buffer from %zu to %zu bytes", + imp->size, target); + imp->buf = tmp; + imp->size = target; + } + } +} + +bool journal_importer_eof(const JournalImporter *imp) { + return imp->state == IMPORTER_STATE_EOF; +} diff --git a/src/shared/journal-importer.h b/src/shared/journal-importer.h new file mode 100644 index 0000000..e0073fc --- /dev/null +++ b/src/shared/journal-importer.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#pragma once + +#include <stddef.h> +#include <stdbool.h> +#include <sys/uio.h> + +#include "sd-id128.h" + +#include "io-util.h" +#include "time-util.h" + +/* Make sure not to make this smaller than the maximum coredump size. + * See JOURNAL_SIZE_MAX in coredump.c */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +#define ENTRY_SIZE_MAX (1024*1024*770u) +#define DATA_SIZE_MAX (1024*1024*768u) +#else +#define ENTRY_SIZE_MAX (1024*1024*13u) +#define DATA_SIZE_MAX (1024*1024*11u) +#endif +#define LINE_CHUNK 8*1024u + +/* The maximum number of fields in an entry */ +#define ENTRY_FIELD_COUNT_MAX 1024 + +typedef struct JournalImporter { + int fd; + bool passive_fd; + char *name; + + char *buf; + size_t size; /* total size of the buffer */ + size_t offset; /* offset to the beginning of live data in the buffer */ + size_t scanned; /* number of bytes since the beginning of data without a newline */ + size_t filled; /* total number of bytes in the buffer */ + + size_t field_len; /* used for binary fields: the field name length */ + size_t data_size; /* and the size of the binary data chunk being processed */ + + struct iovec_wrapper iovw; + + int state; + dual_timestamp ts; + sd_id128_t boot_id; +} JournalImporter; + +#define JOURNAL_IMPORTER_INIT(_fd) { .fd = (_fd), .iovw = {} } +#define JOURNAL_IMPORTER_MAKE(_fd) (JournalImporter) JOURNAL_IMPORTER_INIT(_fd) + +void journal_importer_cleanup(JournalImporter *); +int journal_importer_process_data(JournalImporter *); +int journal_importer_push_data(JournalImporter *, const char *data, size_t size); +void journal_importer_drop_iovw(JournalImporter *); +bool journal_importer_eof(const JournalImporter *); + +static inline size_t journal_importer_bytes_remaining(const JournalImporter *imp) { + return imp->filled; +} diff --git a/src/shared/journal-util.c b/src/shared/journal-util.c new file mode 100644 index 0000000..9e1870e --- /dev/null +++ b/src/shared/journal-util.c @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "acl-util.h" +#include "fs-util.h" +#include "hashmap.h" +#include "journal-internal.h" +#include "journal-util.h" +#include "log.h" +#include "strv.h" +#include "user-util.h" + +static int access_check_var_log_journal(sd_journal *j, bool want_other_users) { +#if HAVE_ACL + _cleanup_strv_free_ char **g = NULL; + const char* dir; +#endif + int r; + + assert(j); + + /* If we are root, we should have access, don't warn. */ + if (getuid() == 0) + return 0; + + /* If we are in the 'systemd-journal' group, we should have + * access too. */ + r = in_group("systemd-journal"); + if (r < 0) + return log_error_errno(r, "Failed to check if we are in the 'systemd-journal' group: %m"); + if (r > 0) + return 0; + +#if HAVE_ACL + if (laccess("/run/log/journal", F_OK) >= 0) + dir = "/run/log/journal"; + else + dir = "/var/log/journal"; + + /* If we are in any of the groups listed in the journal ACLs, + * then all is good, too. Let's enumerate all groups from the + * default ACL of the directory, which generally should allow + * access to most journal files too. */ + r = acl_search_groups(dir, &g); + if (r < 0) + return log_error_errno(r, "Failed to search journal ACL: %m"); + if (r > 0) + return 0; + + /* Print a pretty list, if there were ACLs set. */ + if (!strv_isempty(g)) { + _cleanup_free_ char *s = NULL; + + /* There are groups in the ACL, let's list them */ + r = strv_extend(&g, "systemd-journal"); + if (r < 0) + return log_oom(); + + strv_sort(g); + strv_uniq(g); + + s = strv_join(g, "', '"); + if (!s) + return log_oom(); + + log_notice("Hint: You are currently not seeing messages from %s.\n" + " Users in groups '%s' can see all messages.\n" + " Pass -q to turn off this notice.", + want_other_users ? "other users and the system" : "the system", + s); + return 1; + } +#endif + + /* If no ACLs were found, print a short version of the message. */ + log_notice("Hint: You are currently not seeing messages from %s.\n" + " Users in the 'systemd-journal' group can see all messages. Pass -q to\n" + " turn off this notice.", + want_other_users ? "other users and the system" : "the system"); + + return 1; +} + +int journal_access_blocked(sd_journal *j) { + return hashmap_contains(j->errors, INT_TO_PTR(-EACCES)); +} + +int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users) { + void *code; + char *path; + int r = 0; + + assert(j); + + if (hashmap_isempty(j->errors)) { + if (ordered_hashmap_isempty(j->files) && !quiet) + log_notice("No journal files were found."); + + return 0; + } + + if (journal_access_blocked(j)) { + if (!quiet) + (void) access_check_var_log_journal(j, want_other_users); + + if (ordered_hashmap_isempty(j->files)) + r = log_error_errno(EACCES, "No journal files were opened due to insufficient permissions."); + } + + HASHMAP_FOREACH_KEY(path, code, j->errors) { + int err; + + err = abs(PTR_TO_INT(code)); + + switch (err) { + case EACCES: + continue; + + case ENODATA: + log_warning_errno(err, "Journal file %s is truncated, ignoring file.", path); + break; + + case EPROTONOSUPPORT: + log_warning_errno(err, "Journal file %1$s uses an unsupported feature, ignoring file.\n" + "Use SYSTEMD_LOG_LEVEL=debug journalctl --file=%1$s to see the details.", + path); + break; + + case EBADMSG: + log_warning_errno(err, "Journal file %s corrupted, ignoring file.", path); + break; + + default: + log_warning_errno(err, "An error was encountered while opening journal file or directory %s, ignoring file: %m", path); + break; + } + } + + return r; +} diff --git a/src/shared/journal-util.h b/src/shared/journal-util.h new file mode 100644 index 0000000..86fcba0 --- /dev/null +++ b/src/shared/journal-util.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <sys/types.h> + +#include "sd-journal.h" + +int journal_access_blocked(sd_journal *j); +int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users); diff --git a/src/shared/json-internal.h b/src/shared/json-internal.h new file mode 100644 index 0000000..63afd22 --- /dev/null +++ b/src/shared/json-internal.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#pragma once + +#include "json.h" + +/* This header should include all prototypes only the JSON parser itself and + * its tests need access to. Normal code consuming the JSON parser should not + * interface with this. */ + +typedef union JsonValue { + /* Encodes a simple value. On x86-64 this structure is 16 bytes wide (as long double is 128bit). */ + bool boolean; + long double real; + intmax_t integer; + uintmax_t unsig; +} JsonValue; + +/* Let's protect us against accidental structure size changes on our most relevant arch */ +#ifdef __x86_64__ +assert_cc(sizeof(JsonValue) == 16U); +#endif + +#define JSON_VALUE_NULL ((JsonValue) {}) + +/* We use fake JsonVariant objects for some special values, in order to avoid memory allocations for them. Note that + * effectively this means that there are multiple ways to encode the same objects: via these magic values or as + * properly allocated JsonVariant. We convert between both on-the-fly as necessary. */ +enum +{ + _JSON_VARIANT_MAGIC_TRUE = 1, +#define JSON_VARIANT_MAGIC_TRUE ((JsonVariant*) _JSON_VARIANT_MAGIC_TRUE) + _JSON_VARIANT_MAGIC_FALSE, +#define JSON_VARIANT_MAGIC_FALSE ((JsonVariant*) _JSON_VARIANT_MAGIC_FALSE) + _JSON_VARIANT_MAGIC_NULL, +#define JSON_VARIANT_MAGIC_NULL ((JsonVariant*) _JSON_VARIANT_MAGIC_NULL) + _JSON_VARIANT_MAGIC_ZERO_INTEGER, +#define JSON_VARIANT_MAGIC_ZERO_INTEGER ((JsonVariant*) _JSON_VARIANT_MAGIC_ZERO_INTEGER) + _JSON_VARIANT_MAGIC_ZERO_UNSIGNED, +#define JSON_VARIANT_MAGIC_ZERO_UNSIGNED ((JsonVariant*) _JSON_VARIANT_MAGIC_ZERO_UNSIGNED) + _JSON_VARIANT_MAGIC_ZERO_REAL, +#define JSON_VARIANT_MAGIC_ZERO_REAL ((JsonVariant*) _JSON_VARIANT_MAGIC_ZERO_REAL) + _JSON_VARIANT_MAGIC_EMPTY_STRING, +#define JSON_VARIANT_MAGIC_EMPTY_STRING ((JsonVariant*) _JSON_VARIANT_MAGIC_EMPTY_STRING) + _JSON_VARIANT_MAGIC_EMPTY_ARRAY, +#define JSON_VARIANT_MAGIC_EMPTY_ARRAY ((JsonVariant*) _JSON_VARIANT_MAGIC_EMPTY_ARRAY) + _JSON_VARIANT_MAGIC_EMPTY_OBJECT, +#define JSON_VARIANT_MAGIC_EMPTY_OBJECT ((JsonVariant*) _JSON_VARIANT_MAGIC_EMPTY_OBJECT) + __JSON_VARIANT_MAGIC_MAX +#define _JSON_VARIANT_MAGIC_MAX ((JsonVariant*) __JSON_VARIANT_MAGIC_MAX) +}; + +/* This is only safe as long as we don't define more than 4K magic pointers, i.e. the page size of the simplest + * architectures we support. That's because we rely on the fact that malloc() will never allocate from the first memory + * page, as it is a faulting page for catching NULL pointer dereferences. */ +assert_cc((unsigned) __JSON_VARIANT_MAGIC_MAX < 4096U); + +enum { /* JSON tokens */ + JSON_TOKEN_END, + JSON_TOKEN_COLON, + JSON_TOKEN_COMMA, + JSON_TOKEN_OBJECT_OPEN, + JSON_TOKEN_OBJECT_CLOSE, + JSON_TOKEN_ARRAY_OPEN, + JSON_TOKEN_ARRAY_CLOSE, + JSON_TOKEN_STRING, + JSON_TOKEN_REAL, + JSON_TOKEN_INTEGER, + JSON_TOKEN_UNSIGNED, + JSON_TOKEN_BOOLEAN, + JSON_TOKEN_NULL, + _JSON_TOKEN_MAX, + _JSON_TOKEN_INVALID = -1, +}; + +int json_tokenize(const char **p, char **ret_string, JsonValue *ret_value, unsigned *ret_line, unsigned *ret_column, void **state, unsigned *line, unsigned *column); diff --git a/src/shared/json.c b/src/shared/json.c new file mode 100644 index 0000000..ddf6dcb --- /dev/null +++ b/src/shared/json.c @@ -0,0 +1,4410 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <locale.h> +#include <math.h> +#include <stdarg.h> +#include <stdlib.h> +#include <sys/types.h> + +#include "sd-messages.h" + +#include "alloc-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "float.h" +#include "hexdecoct.h" +#include "json-internal.h" +#include "json.h" +#include "macro.h" +#include "memory-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "user-util.h" +#include "utf8.h" + +/* Refuse putting together variants with a larger depth than 2K by default (as a protection against overflowing stacks + * if code processes JSON objects recursively. Note that we store the depth in an uint16_t, hence make sure this + * remains under 2^16. + * + * The value first was 16k, but it was discovered to be too high on llvm/x86-64. See also: + * https://github.com/systemd/systemd/issues/10738 + * + * The value then was 4k, but it was discovered to be too high on s390x/aarch64. See also: + * https://github.com/systemd/systemd/issues/14396 */ + +#define DEPTH_MAX (2U*1024U) +assert_cc(DEPTH_MAX <= UINT16_MAX); + +typedef struct JsonSource { + /* When we parse from a file or similar, encodes the filename, to indicate the source of a json variant */ + size_t n_ref; + unsigned max_line; + unsigned max_column; + char name[]; +} JsonSource; + +/* On x86-64 this whole structure should have a size of 6 * 64 bit = 48 bytes */ +struct JsonVariant { + union { + /* We either maintain a reference counter for this variant itself, or we are embedded into an + * array/object, in which case only that surrounding object is ref-counted. (If 'embedded' is false, + * see below.) */ + size_t n_ref; + + /* If this JsonVariant is part of an array/object, then this field points to the surrounding + * JSON_VARIANT_ARRAY/JSON_VARIANT_OBJECT object. (If 'embedded' is true, see below.) */ + JsonVariant *parent; + }; + + /* If this was parsed from some file or buffer, this stores where from, as well as the source line/column */ + JsonSource *source; + unsigned line, column; + + JsonVariantType type:5; + + /* A marker whether this variant is embedded into in array/object or not. If true, the 'parent' pointer above + * is valid. If false, the 'n_ref' field above is valid instead. */ + bool is_embedded:1; + + /* In some conditions (for example, if this object is part of an array of strings or objects), we don't store + * any data inline, but instead simply reference an external object and act as surrogate of it. In that case + * this bool is set, and the external object is referenced through the .reference field below. */ + bool is_reference:1; + + /* While comparing two arrays, we use this for marking what we already have seen */ + bool is_marked:1; + + /* Erase from memory when freeing */ + bool sensitive:1; + + /* If this is an object the fields are strictly ordered by name */ + bool sorted:1; + + /* If in addition to this object all objects referenced by it are also ordered strictly by name */ + bool normalized:1; + + /* The current 'depth' of the JsonVariant, i.e. how many levels of member variants this has */ + uint16_t depth; + + union { + /* For simple types we store the value in-line. */ + JsonValue value; + + /* For objects and arrays we store the number of elements immediately following */ + size_t n_elements; + + /* If is_reference as indicated above is set, this is where the reference object is actually stored. */ + JsonVariant *reference; + + /* Strings are placed immediately after the structure. Note that when this is a JsonVariant embedded + * into an array we might encode strings up to INLINE_STRING_LENGTH characters directly inside the + * element, while longer strings are stored as references. When this object is not embedded into an + * array, but stand-alone we allocate the right size for the whole structure, i.e. the array might be + * much larger than INLINE_STRING_LENGTH. + * + * Note that because we want to allocate arrays of the JsonVariant structure we specify [0] here, + * rather than the prettier []. If we wouldn't, then this char array would have undefined size, and so + * would the union and then the struct this is included in. And of structures with undefined size we + * can't allocate arrays (at least not easily). */ + char string[0]; + }; +}; + +/* Inside string arrays we have a series of JasonVariant structures one after the other. In this case, strings longer + * than INLINE_STRING_MAX are stored as references, and all shorter ones inline. (This means — on x86-64 — strings up + * to 15 chars are stored within the array elements, and all others in separate allocations) */ +#define INLINE_STRING_MAX (sizeof(JsonVariant) - offsetof(JsonVariant, string) - 1U) + +/* Let's make sure this structure isn't increased in size accidentally. This check is only for our most relevant arch + * (x86-64). */ +#ifdef __x86_64__ +assert_cc(sizeof(JsonVariant) == 48U); +assert_cc(INLINE_STRING_MAX == 15U); +#endif + +static JsonSource* json_source_new(const char *name) { + JsonSource *s; + + assert(name); + + s = malloc(offsetof(JsonSource, name) + strlen(name) + 1); + if (!s) + return NULL; + + *s = (JsonSource) { + .n_ref = 1, + }; + strcpy(s->name, name); + + return s; +} + +DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(JsonSource, json_source, mfree); + +static bool json_source_equal(JsonSource *a, JsonSource *b) { + if (a == b) + return true; + + if (!a || !b) + return false; + + return streq(a->name, b->name); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(JsonSource*, json_source_unref); + +/* There are four kind of JsonVariant* pointers: + * + * 1. NULL + * 2. A 'regular' one, i.e. pointing to malloc() memory + * 3. A 'magic' one, i.e. one of the special JSON_VARIANT_MAGIC_XYZ values, that encode a few very basic values directly in the pointer. + * 4. A 'const string' one, i.e. a pointer to a const string. + * + * The four kinds of pointers can be discerned like this: + * + * Detecting #1 is easy, just compare with NULL. Detecting #3 is similarly easy: all magic pointers are below + * _JSON_VARIANT_MAGIC_MAX (which is pretty low, within the first memory page, which is special on Linux and other + * OSes, as it is a faulting page). In order to discern #2 and #4 we check the lowest bit. If it's off it's #2, + * otherwise #4. This makes use of the fact that malloc() will return "maximum aligned" memory, which definitely + * means the pointer is even. This means we can use the uneven pointers to reference static strings, as long as we + * make sure that all static strings used like this are aligned to 2 (or higher), and that we mask the bit on + * access. The JSON_VARIANT_STRING_CONST() macro encodes strings as JsonVariant* pointers, with the bit set. */ + +static bool json_variant_is_magic(const JsonVariant *v) { + if (!v) + return false; + + return v < _JSON_VARIANT_MAGIC_MAX; +} + +static bool json_variant_is_const_string(const JsonVariant *v) { + + if (v < _JSON_VARIANT_MAGIC_MAX) + return false; + + /* A proper JsonVariant is aligned to whatever malloc() aligns things too, which is definitely not uneven. We + * hence use all uneven pointers as indicators for const strings. */ + + return (((uintptr_t) v) & 1) != 0; +} + +static bool json_variant_is_regular(const JsonVariant *v) { + + if (v < _JSON_VARIANT_MAGIC_MAX) + return false; + + return (((uintptr_t) v) & 1) == 0; +} + +static JsonVariant *json_variant_dereference(JsonVariant *v) { + + /* Recursively dereference variants that are references to other variants */ + + if (!v) + return NULL; + + if (!json_variant_is_regular(v)) + return v; + + if (!v->is_reference) + return v; + + return json_variant_dereference(v->reference); +} + +static uint16_t json_variant_depth(JsonVariant *v) { + + v = json_variant_dereference(v); + if (!v) + return 0; + + if (!json_variant_is_regular(v)) + return 0; + + return v->depth; +} + +static JsonVariant *json_variant_formalize(JsonVariant *v) { + + /* Converts json variant pointers to their normalized form, i.e. fully dereferenced and wherever + * possible converted to the "magic" version if there is one */ + + if (!v) + return NULL; + + v = json_variant_dereference(v); + + switch (json_variant_type(v)) { + + case JSON_VARIANT_BOOLEAN: + return json_variant_boolean(v) ? JSON_VARIANT_MAGIC_TRUE : JSON_VARIANT_MAGIC_FALSE; + + case JSON_VARIANT_NULL: + return JSON_VARIANT_MAGIC_NULL; + + case JSON_VARIANT_INTEGER: + return json_variant_integer(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_INTEGER : v; + + case JSON_VARIANT_UNSIGNED: + return json_variant_unsigned(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_UNSIGNED : v; + + case JSON_VARIANT_REAL: + DISABLE_WARNING_FLOAT_EQUAL; + return json_variant_real(v) == 0.0 ? JSON_VARIANT_MAGIC_ZERO_REAL : v; + REENABLE_WARNING; + + case JSON_VARIANT_STRING: + return isempty(json_variant_string(v)) ? JSON_VARIANT_MAGIC_EMPTY_STRING : v; + + case JSON_VARIANT_ARRAY: + return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_ARRAY : v; + + case JSON_VARIANT_OBJECT: + return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_OBJECT : v; + + default: + return v; + } +} + +static JsonVariant *json_variant_conservative_formalize(JsonVariant *v) { + + /* Much like json_variant_formalize(), but won't simplify if the variant has a source/line location attached to + * it, in order not to lose context */ + + if (!v) + return NULL; + + if (!json_variant_is_regular(v)) + return v; + + if (v->source || v->line > 0 || v->column > 0) + return v; + + return json_variant_formalize(v); +} + +static int json_variant_new(JsonVariant **ret, JsonVariantType type, size_t space) { + JsonVariant *v; + + assert_return(ret, -EINVAL); + + v = malloc0(MAX(sizeof(JsonVariant), + offsetof(JsonVariant, value) + space)); + if (!v) + return -ENOMEM; + + v->n_ref = 1; + v->type = type; + + *ret = v; + return 0; +} + +int json_variant_new_integer(JsonVariant **ret, intmax_t i) { + JsonVariant *v; + int r; + + assert_return(ret, -EINVAL); + + if (i == 0) { + *ret = JSON_VARIANT_MAGIC_ZERO_INTEGER; + return 0; + } + + r = json_variant_new(&v, JSON_VARIANT_INTEGER, sizeof(i)); + if (r < 0) + return r; + + v->value.integer = i; + *ret = v; + + return 0; +} + +int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u) { + JsonVariant *v; + int r; + + assert_return(ret, -EINVAL); + if (u == 0) { + *ret = JSON_VARIANT_MAGIC_ZERO_UNSIGNED; + return 0; + } + + r = json_variant_new(&v, JSON_VARIANT_UNSIGNED, sizeof(u)); + if (r < 0) + return r; + + v->value.unsig = u; + *ret = v; + + return 0; +} + +int json_variant_new_real(JsonVariant **ret, long double d) { + JsonVariant *v; + int r; + + assert_return(ret, -EINVAL); + + DISABLE_WARNING_FLOAT_EQUAL; + if (d == 0.0) { + *ret = JSON_VARIANT_MAGIC_ZERO_REAL; + return 0; + } + REENABLE_WARNING; + + r = json_variant_new(&v, JSON_VARIANT_REAL, sizeof(d)); + if (r < 0) + return r; + + v->value.real = d; + *ret = v; + + return 0; +} + +int json_variant_new_boolean(JsonVariant **ret, bool b) { + assert_return(ret, -EINVAL); + + if (b) + *ret = JSON_VARIANT_MAGIC_TRUE; + else + *ret = JSON_VARIANT_MAGIC_FALSE; + + return 0; +} + +int json_variant_new_null(JsonVariant **ret) { + assert_return(ret, -EINVAL); + + *ret = JSON_VARIANT_MAGIC_NULL; + return 0; +} + +int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n) { + JsonVariant *v; + int r; + + assert_return(ret, -EINVAL); + if (!s) { + assert_return(IN_SET(n, 0, (size_t) -1), -EINVAL); + return json_variant_new_null(ret); + } + if (n == (size_t) -1) /* determine length automatically */ + n = strlen(s); + else if (memchr(s, 0, n)) /* don't allow embedded NUL, as we can't express that in JSON */ + return -EINVAL; + if (n == 0) { + *ret = JSON_VARIANT_MAGIC_EMPTY_STRING; + return 0; + } + + if (!utf8_is_valid_n(s, n)) /* JSON strings must be valid UTF-8 */ + return -EUCLEAN; + + r = json_variant_new(&v, JSON_VARIANT_STRING, n + 1); + if (r < 0) + return r; + + memcpy(v->string, s, n); + v->string[n] = 0; + + *ret = v; + return 0; +} + +int json_variant_new_base64(JsonVariant **ret, const void *p, size_t n) { + _cleanup_free_ char *s = NULL; + ssize_t k; + + assert_return(ret, -EINVAL); + assert_return(n == 0 || p, -EINVAL); + + k = base64mem(p, n, &s); + if (k < 0) + return k; + + return json_variant_new_stringn(ret, s, k); +} + +int json_variant_new_id128(JsonVariant **ret, sd_id128_t id) { + char s[SD_ID128_STRING_MAX]; + + return json_variant_new_string(ret, sd_id128_to_string(id, s)); +} + +static void json_variant_set(JsonVariant *a, JsonVariant *b) { + assert(a); + + b = json_variant_dereference(b); + if (!b) { + a->type = JSON_VARIANT_NULL; + return; + } + + a->type = json_variant_type(b); + switch (a->type) { + + case JSON_VARIANT_INTEGER: + a->value.integer = json_variant_integer(b); + break; + + case JSON_VARIANT_UNSIGNED: + a->value.unsig = json_variant_unsigned(b); + break; + + case JSON_VARIANT_REAL: + a->value.real = json_variant_real(b); + break; + + case JSON_VARIANT_BOOLEAN: + a->value.boolean = json_variant_boolean(b); + break; + + case JSON_VARIANT_STRING: { + const char *s; + + assert_se(s = json_variant_string(b)); + + /* Short strings we can store inline */ + if (strnlen(s, INLINE_STRING_MAX+1) <= INLINE_STRING_MAX) { + strcpy(a->string, s); + break; + } + + /* For longer strings, use a reference… */ + _fallthrough_; + } + + case JSON_VARIANT_ARRAY: + case JSON_VARIANT_OBJECT: + a->is_reference = true; + a->reference = json_variant_ref(json_variant_conservative_formalize(b)); + break; + + case JSON_VARIANT_NULL: + break; + + default: + assert_not_reached("Unexpected variant type"); + } +} + +static void json_variant_copy_source(JsonVariant *v, JsonVariant *from) { + assert(v); + assert(from); + + if (!json_variant_is_regular(from)) + return; + + v->line = from->line; + v->column = from->column; + v->source = json_source_ref(from->source); +} + +int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n) { + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + bool normalized = true; + + assert_return(ret, -EINVAL); + if (n == 0) { + *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY; + return 0; + } + assert_return(array, -EINVAL); + + v = new(JsonVariant, n + 1); + if (!v) + return -ENOMEM; + + *v = (JsonVariant) { + .n_ref = 1, + .type = JSON_VARIANT_ARRAY, + }; + + for (v->n_elements = 0; v->n_elements < n; v->n_elements++) { + JsonVariant *w = v + 1 + v->n_elements, + *c = array[v->n_elements]; + uint16_t d; + + d = json_variant_depth(c); + if (d >= DEPTH_MAX) /* Refuse too deep nesting */ + return -ELNRNG; + if (d >= v->depth) + v->depth = d + 1; + + *w = (JsonVariant) { + .is_embedded = true, + .parent = v, + }; + + json_variant_set(w, c); + json_variant_copy_source(w, c); + + if (!json_variant_is_normalized(c)) + normalized = false; + } + + v->normalized = normalized; + + *ret = TAKE_PTR(v); + return 0; +} + +int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n) { + JsonVariant *v; + size_t i; + + assert_return(ret, -EINVAL); + if (n == 0) { + *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY; + return 0; + } + assert_return(p, -EINVAL); + + v = new(JsonVariant, n + 1); + if (!v) + return -ENOMEM; + + *v = (JsonVariant) { + .n_ref = 1, + .type = JSON_VARIANT_ARRAY, + .n_elements = n, + .depth = 1, + }; + + for (i = 0; i < n; i++) { + JsonVariant *w = v + 1 + i; + + *w = (JsonVariant) { + .is_embedded = true, + .parent = v, + .type = JSON_VARIANT_UNSIGNED, + .value.unsig = ((const uint8_t*) p)[i], + }; + } + + v->normalized = true; + + *ret = v; + return 0; +} + +int json_variant_new_array_strv(JsonVariant **ret, char **l) { + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + size_t n; + int r; + + assert(ret); + + n = strv_length(l); + if (n == 0) { + *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY; + return 0; + } + + v = new(JsonVariant, n + 1); + if (!v) + return -ENOMEM; + + *v = (JsonVariant) { + .n_ref = 1, + .type = JSON_VARIANT_ARRAY, + .depth = 1, + }; + + for (v->n_elements = 0; v->n_elements < n; v->n_elements++) { + JsonVariant *w = v + 1 + v->n_elements; + size_t k; + + *w = (JsonVariant) { + .is_embedded = true, + .parent = v, + .type = JSON_VARIANT_STRING, + }; + + k = strlen(l[v->n_elements]); + + if (k > INLINE_STRING_MAX) { + /* If string is too long, store it as reference. */ + + r = json_variant_new_string(&w->reference, l[v->n_elements]); + if (r < 0) + return r; + + w->is_reference = true; + } else { + if (!utf8_is_valid_n(l[v->n_elements], k)) /* JSON strings must be valid UTF-8 */ + return -EUCLEAN; + + memcpy(w->string, l[v->n_elements], k+1); + } + } + + v->normalized = true; + + *ret = TAKE_PTR(v); + return 0; +} + +int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n) { + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + const char *prev = NULL; + bool sorted = true, normalized = true; + + assert_return(ret, -EINVAL); + if (n == 0) { + *ret = JSON_VARIANT_MAGIC_EMPTY_OBJECT; + return 0; + } + assert_return(array, -EINVAL); + assert_return(n % 2 == 0, -EINVAL); + + v = new(JsonVariant, n + 1); + if (!v) + return -ENOMEM; + + *v = (JsonVariant) { + .n_ref = 1, + .type = JSON_VARIANT_OBJECT, + }; + + for (v->n_elements = 0; v->n_elements < n; v->n_elements++) { + JsonVariant *w = v + 1 + v->n_elements, + *c = array[v->n_elements]; + uint16_t d; + + if ((v->n_elements & 1) == 0) { + const char *k; + + if (!json_variant_is_string(c)) + return -EINVAL; /* Every second one needs to be a string, as it is the key name */ + + assert_se(k = json_variant_string(c)); + + if (prev && strcmp(k, prev) <= 0) + sorted = normalized = false; + + prev = k; + } else if (!json_variant_is_normalized(c)) + normalized = false; + + d = json_variant_depth(c); + if (d >= DEPTH_MAX) /* Refuse too deep nesting */ + return -ELNRNG; + if (d >= v->depth) + v->depth = d + 1; + + *w = (JsonVariant) { + .is_embedded = true, + .parent = v, + }; + + json_variant_set(w, c); + json_variant_copy_source(w, c); + } + + v->normalized = normalized; + v->sorted = sorted; + + *ret = TAKE_PTR(v); + return 0; +} + +static size_t json_variant_size(JsonVariant* v) { + + if (!json_variant_is_regular(v)) + return 0; + + if (v->is_reference) + return offsetof(JsonVariant, reference) + sizeof(JsonVariant*); + + switch (v->type) { + + case JSON_VARIANT_STRING: + return offsetof(JsonVariant, string) + strlen(v->string) + 1; + + case JSON_VARIANT_REAL: + return offsetof(JsonVariant, value) + sizeof(long double); + + case JSON_VARIANT_UNSIGNED: + return offsetof(JsonVariant, value) + sizeof(uintmax_t); + + case JSON_VARIANT_INTEGER: + return offsetof(JsonVariant, value) + sizeof(intmax_t); + + case JSON_VARIANT_BOOLEAN: + return offsetof(JsonVariant, value) + sizeof(bool); + + case JSON_VARIANT_ARRAY: + case JSON_VARIANT_OBJECT: + return offsetof(JsonVariant, n_elements) + sizeof(size_t); + + case JSON_VARIANT_NULL: + return offsetof(JsonVariant, value); + + default: + assert_not_reached("unexpected type"); + } +} + +static void json_variant_free_inner(JsonVariant *v, bool force_sensitive) { + bool sensitive; + + assert(v); + + if (!json_variant_is_regular(v)) + return; + + json_source_unref(v->source); + + sensitive = v->sensitive || force_sensitive; + + if (v->is_reference) { + if (sensitive) + json_variant_sensitive(v->reference); + + json_variant_unref(v->reference); + return; + } + + if (IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT)) { + size_t i; + + for (i = 0; i < v->n_elements; i++) + json_variant_free_inner(v + 1 + i, sensitive); + } + + if (sensitive) + explicit_bzero_safe(v, json_variant_size(v)); +} + +JsonVariant *json_variant_ref(JsonVariant *v) { + if (!v) + return NULL; + if (!json_variant_is_regular(v)) + return v; + + if (v->is_embedded) + json_variant_ref(v->parent); /* ref the compounding variant instead */ + else { + assert(v->n_ref > 0); + v->n_ref++; + } + + return v; +} + +JsonVariant *json_variant_unref(JsonVariant *v) { + if (!v) + return NULL; + if (!json_variant_is_regular(v)) + return NULL; + + if (v->is_embedded) + json_variant_unref(v->parent); + else { + assert(v->n_ref > 0); + v->n_ref--; + + if (v->n_ref == 0) { + json_variant_free_inner(v, false); + free(v); + } + } + + return NULL; +} + +void json_variant_unref_many(JsonVariant **array, size_t n) { + size_t i; + + assert(array || n == 0); + + for (i = 0; i < n; i++) + json_variant_unref(array[i]); +} + +const char *json_variant_string(JsonVariant *v) { + if (!v) + return NULL; + if (v == JSON_VARIANT_MAGIC_EMPTY_STRING) + return ""; + if (json_variant_is_magic(v)) + goto mismatch; + if (json_variant_is_const_string(v)) { + uintptr_t p = (uintptr_t) v; + + assert((p & 1) != 0); + return (const char*) (p ^ 1U); + } + + if (v->is_reference) + return json_variant_string(v->reference); + if (v->type != JSON_VARIANT_STRING) + goto mismatch; + + return v->string; + +mismatch: + log_debug("Non-string JSON variant requested as string, returning NULL."); + return NULL; +} + +bool json_variant_boolean(JsonVariant *v) { + if (!v) + goto mismatch; + if (v == JSON_VARIANT_MAGIC_TRUE) + return true; + if (v == JSON_VARIANT_MAGIC_FALSE) + return false; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->type != JSON_VARIANT_BOOLEAN) + goto mismatch; + if (v->is_reference) + return json_variant_boolean(v->reference); + + return v->value.boolean; + +mismatch: + log_debug("Non-boolean JSON variant requested as boolean, returning false."); + return false; +} + +intmax_t json_variant_integer(JsonVariant *v) { + if (!v) + goto mismatch; + if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER || + v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || + v == JSON_VARIANT_MAGIC_ZERO_REAL) + return 0; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->is_reference) + return json_variant_integer(v->reference); + + switch (v->type) { + + case JSON_VARIANT_INTEGER: + return v->value.integer; + + case JSON_VARIANT_UNSIGNED: + if (v->value.unsig <= INTMAX_MAX) + return (intmax_t) v->value.unsig; + + log_debug("Unsigned integer %ju requested as signed integer and out of range, returning 0.", v->value.unsig); + return 0; + + case JSON_VARIANT_REAL: { + intmax_t converted; + + converted = (intmax_t) v->value.real; + + DISABLE_WARNING_FLOAT_EQUAL; + if ((long double) converted == v->value.real) + return converted; + REENABLE_WARNING; + + log_debug("Real %Lg requested as integer, and cannot be converted losslessly, returning 0.", v->value.real); + return 0; + } + + default: + break; + } + +mismatch: + log_debug("Non-integer JSON variant requested as integer, returning 0."); + return 0; +} + +uintmax_t json_variant_unsigned(JsonVariant *v) { + if (!v) + goto mismatch; + if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER || + v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || + v == JSON_VARIANT_MAGIC_ZERO_REAL) + return 0; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->is_reference) + return json_variant_integer(v->reference); + + switch (v->type) { + + case JSON_VARIANT_INTEGER: + if (v->value.integer >= 0) + return (uintmax_t) v->value.integer; + + log_debug("Signed integer %ju requested as unsigned integer and out of range, returning 0.", v->value.integer); + return 0; + + case JSON_VARIANT_UNSIGNED: + return v->value.unsig; + + case JSON_VARIANT_REAL: { + uintmax_t converted; + + converted = (uintmax_t) v->value.real; + + DISABLE_WARNING_FLOAT_EQUAL; + if ((long double) converted == v->value.real) + return converted; + REENABLE_WARNING; + + log_debug("Real %Lg requested as unsigned integer, and cannot be converted losslessly, returning 0.", v->value.real); + return 0; + } + + default: + break; + } + +mismatch: + log_debug("Non-integer JSON variant requested as unsigned, returning 0."); + return 0; +} + +long double json_variant_real(JsonVariant *v) { + if (!v) + return 0.0; + if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER || + v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || + v == JSON_VARIANT_MAGIC_ZERO_REAL) + return 0.0; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->is_reference) + return json_variant_real(v->reference); + + switch (v->type) { + + case JSON_VARIANT_REAL: + return v->value.real; + + case JSON_VARIANT_INTEGER: { + long double converted; + + converted = (long double) v->value.integer; + + if ((intmax_t) converted == v->value.integer) + return converted; + + log_debug("Signed integer %ji requested as real, and cannot be converted losslessly, returning 0.", v->value.integer); + return 0.0; + } + + case JSON_VARIANT_UNSIGNED: { + long double converted; + + converted = (long double) v->value.unsig; + + if ((uintmax_t) converted == v->value.unsig) + return converted; + + log_debug("Unsigned integer %ju requested as real, and cannot be converted losslessly, returning 0.", v->value.unsig); + return 0.0; + } + + default: + break; + } + +mismatch: + log_debug("Non-integer JSON variant requested as integer, returning 0."); + return 0.0; +} + +bool json_variant_is_negative(JsonVariant *v) { + if (!v) + goto mismatch; + if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER || + v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || + v == JSON_VARIANT_MAGIC_ZERO_REAL) + return false; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->is_reference) + return json_variant_is_negative(v->reference); + + /* This function is useful as checking whether numbers are negative is pretty complex since we have three types + * of numbers. And some JSON code (OCI for example) uses negative numbers to mark "not defined" numeric + * values. */ + + switch (v->type) { + + case JSON_VARIANT_REAL: + return v->value.real < 0; + + case JSON_VARIANT_INTEGER: + return v->value.integer < 0; + + case JSON_VARIANT_UNSIGNED: + return false; + + default: + break; + } + +mismatch: + log_debug("Non-integer JSON variant tested for negativity, returning false."); + return false; +} + +bool json_variant_is_blank_object(JsonVariant *v) { + /* Returns true if the specified object is null or empty */ + return !v || + json_variant_is_null(v) || + (json_variant_is_object(v) && json_variant_elements(v) == 0); +} + +bool json_variant_is_blank_array(JsonVariant *v) { + return !v || + json_variant_is_null(v) || + (json_variant_is_array(v) && json_variant_elements(v) == 0); +} + +JsonVariantType json_variant_type(JsonVariant *v) { + + if (!v) + return _JSON_VARIANT_TYPE_INVALID; + + if (json_variant_is_const_string(v)) + return JSON_VARIANT_STRING; + + if (v == JSON_VARIANT_MAGIC_TRUE || v == JSON_VARIANT_MAGIC_FALSE) + return JSON_VARIANT_BOOLEAN; + + if (v == JSON_VARIANT_MAGIC_NULL) + return JSON_VARIANT_NULL; + + if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER) + return JSON_VARIANT_INTEGER; + + if (v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED) + return JSON_VARIANT_UNSIGNED; + + if (v == JSON_VARIANT_MAGIC_ZERO_REAL) + return JSON_VARIANT_REAL; + + if (v == JSON_VARIANT_MAGIC_EMPTY_STRING) + return JSON_VARIANT_STRING; + + if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY) + return JSON_VARIANT_ARRAY; + + if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT) + return JSON_VARIANT_OBJECT; + + return v->type; +} + +_function_no_sanitize_float_cast_overflow_ bool json_variant_has_type(JsonVariant *v, JsonVariantType type) { + JsonVariantType rt; + + /* Note: we turn off ubsan float cast overflo detection for this function, since it would complain + * about our float casts but we do them explicitly to detect conversion errors. */ + + v = json_variant_dereference(v); + if (!v) + return false; + + rt = json_variant_type(v); + if (rt == type) + return true; + + /* If it's a const string, then it only can be a string, and if it is not, it's not */ + if (json_variant_is_const_string(v)) + return false; + + /* All three magic zeroes qualify as integer, unsigned and as real */ + if ((v == JSON_VARIANT_MAGIC_ZERO_INTEGER || v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || v == JSON_VARIANT_MAGIC_ZERO_REAL) && + IN_SET(type, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL, JSON_VARIANT_NUMBER)) + return true; + + /* All other magic variant types are only equal to themselves */ + if (json_variant_is_magic(v)) + return false; + + /* Handle the "number" pseudo type */ + if (type == JSON_VARIANT_NUMBER) + return IN_SET(rt, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL); + + /* Integer conversions are OK in many cases */ + if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_UNSIGNED) + return v->value.integer >= 0; + if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_INTEGER) + return v->value.unsig <= INTMAX_MAX; + + /* Any integer that can be converted lossley to a real and back may also be considered a real */ + if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_REAL) + return (intmax_t) (long double) v->value.integer == v->value.integer; + if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_REAL) + return (uintmax_t) (long double) v->value.unsig == v->value.unsig; + + DISABLE_WARNING_FLOAT_EQUAL; + + /* Any real that can be converted losslessly to an integer and back may also be considered an integer */ + if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_INTEGER) + return (long double) (intmax_t) v->value.real == v->value.real; + if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_UNSIGNED) + return (long double) (uintmax_t) v->value.real == v->value.real; + + REENABLE_WARNING; + + return false; +} + +size_t json_variant_elements(JsonVariant *v) { + if (!v) + return 0; + if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY || + v == JSON_VARIANT_MAGIC_EMPTY_OBJECT) + return 0; + if (!json_variant_is_regular(v)) + goto mismatch; + if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT)) + goto mismatch; + if (v->is_reference) + return json_variant_elements(v->reference); + + return v->n_elements; + +mismatch: + log_debug("Number of elements in non-array/non-object JSON variant requested, returning 0."); + return 0; +} + +JsonVariant *json_variant_by_index(JsonVariant *v, size_t idx) { + if (!v) + return NULL; + if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY || + v == JSON_VARIANT_MAGIC_EMPTY_OBJECT) + return NULL; + if (!json_variant_is_regular(v)) + goto mismatch; + if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT)) + goto mismatch; + if (v->is_reference) + return json_variant_by_index(v->reference, idx); + if (idx >= v->n_elements) + return NULL; + + return json_variant_conservative_formalize(v + 1 + idx); + +mismatch: + log_debug("Element in non-array/non-object JSON variant requested by index, returning NULL."); + return NULL; +} + +JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key) { + size_t i; + + if (!v) + goto not_found; + if (!key) + goto not_found; + if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT) + goto not_found; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->type != JSON_VARIANT_OBJECT) + goto mismatch; + if (v->is_reference) + return json_variant_by_key(v->reference, key); + + if (v->sorted) { + size_t a = 0, b = v->n_elements/2; + + /* If the variant is sorted we can use bisection to find the entry we need in O(log(n)) time */ + + while (b > a) { + JsonVariant *p; + const char *f; + int c; + + i = (a + b) / 2; + p = json_variant_dereference(v + 1 + i*2); + + assert_se(f = json_variant_string(p)); + + c = strcmp(key, f); + if (c == 0) { + if (ret_key) + *ret_key = json_variant_conservative_formalize(v + 1 + i*2); + + return json_variant_conservative_formalize(v + 1 + i*2 + 1); + } else if (c < 0) + b = i; + else + a = i + 1; + } + + goto not_found; + } + + /* The variant is not sorted, hence search for the field linearly */ + for (i = 0; i < v->n_elements; i += 2) { + JsonVariant *p; + + p = json_variant_dereference(v + 1 + i); + + if (!json_variant_has_type(p, JSON_VARIANT_STRING)) + continue; + + if (streq(json_variant_string(p), key)) { + + if (ret_key) + *ret_key = json_variant_conservative_formalize(v + 1 + i); + + return json_variant_conservative_formalize(v + 1 + i + 1); + } + } + +not_found: + if (ret_key) + *ret_key = NULL; + + return NULL; + +mismatch: + log_debug("Element in non-object JSON variant requested by key, returning NULL."); + if (ret_key) + *ret_key = NULL; + + return NULL; +} + +JsonVariant *json_variant_by_key(JsonVariant *v, const char *key) { + return json_variant_by_key_full(v, key, NULL); +} + +bool json_variant_equal(JsonVariant *a, JsonVariant *b) { + JsonVariantType t; + + a = json_variant_formalize(a); + b = json_variant_formalize(b); + + if (a == b) + return true; + + t = json_variant_type(a); + if (!json_variant_has_type(b, t)) + return false; + + switch (t) { + + case JSON_VARIANT_STRING: + return streq(json_variant_string(a), json_variant_string(b)); + + case JSON_VARIANT_INTEGER: + return json_variant_integer(a) == json_variant_integer(b); + + case JSON_VARIANT_UNSIGNED: + return json_variant_unsigned(a) == json_variant_unsigned(b); + + case JSON_VARIANT_REAL: + DISABLE_WARNING_FLOAT_EQUAL; + return json_variant_real(a) == json_variant_real(b); + REENABLE_WARNING; + + case JSON_VARIANT_BOOLEAN: + return json_variant_boolean(a) == json_variant_boolean(b); + + case JSON_VARIANT_NULL: + return true; + + case JSON_VARIANT_ARRAY: { + size_t i, n; + + n = json_variant_elements(a); + if (n != json_variant_elements(b)) + return false; + + for (i = 0; i < n; i++) { + if (!json_variant_equal(json_variant_by_index(a, i), json_variant_by_index(b, i))) + return false; + } + + return true; + } + + case JSON_VARIANT_OBJECT: { + size_t i, n; + + n = json_variant_elements(a); + if (n != json_variant_elements(b)) + return false; + + /* Iterate through all keys in 'a' */ + for (i = 0; i < n; i += 2) { + bool found = false; + size_t j; + + /* Match them against all keys in 'b' */ + for (j = 0; j < n; j += 2) { + JsonVariant *key_b; + + key_b = json_variant_by_index(b, j); + + /* During the first iteration unmark everything */ + if (i == 0) + key_b->is_marked = false; + else if (key_b->is_marked) /* In later iterations if we already marked something, don't bother with it again */ + continue; + + if (found) + continue; + + if (json_variant_equal(json_variant_by_index(a, i), key_b) && + json_variant_equal(json_variant_by_index(a, i+1), json_variant_by_index(b, j+1))) { + /* Key and values match! */ + key_b->is_marked = found = true; + + /* In the first iteration we continue the inner loop since we want to mark + * everything, otherwise exit the loop quickly after we found what we were + * looking for. */ + if (i != 0) + break; + } + } + + if (!found) + return false; + } + + return true; + } + + default: + assert_not_reached("Unknown variant type."); + } +} + +void json_variant_sensitive(JsonVariant *v) { + assert(v); + + /* Marks a variant as "sensitive", so that it is erased from memory when it is destroyed. This is a + * one-way operation: as soon as it is marked this way it remains marked this way until it's + * destroyed. A magic variant is never sensitive though, even when asked, since it's too + * basic. Similar, const string variant are never sensitive either, after all they are included in + * the source code as they are, which is not suitable for inclusion of secrets. + * + * Note that this flag has a recursive effect: when we destroy an object or array we'll propagate the + * flag to all contained variants. And if those are then destroyed this is propagated further down, + * and so on. */ + + v = json_variant_formalize(v); + if (!json_variant_is_regular(v)) + return; + + v->sensitive = true; +} + +bool json_variant_is_sensitive(JsonVariant *v) { + v = json_variant_formalize(v); + if (!json_variant_is_regular(v)) + return false; + + return v->sensitive; +} + +static void json_variant_propagate_sensitive(JsonVariant *from, JsonVariant *to) { + if (json_variant_is_sensitive(from)) + json_variant_sensitive(to); +} + +int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column) { + assert_return(v, -EINVAL); + + if (ret_source) + *ret_source = json_variant_is_regular(v) && v->source ? v->source->name : NULL; + + if (ret_line) + *ret_line = json_variant_is_regular(v) ? v->line : 0; + + if (ret_column) + *ret_column = json_variant_is_regular(v) ? v->column : 0; + + return 0; +} + +static int print_source(FILE *f, JsonVariant *v, JsonFormatFlags flags, bool whitespace) { + size_t w, k; + + if (!FLAGS_SET(flags, JSON_FORMAT_SOURCE|JSON_FORMAT_PRETTY)) + return 0; + + if (!json_variant_is_regular(v)) + return 0; + + if (!v->source && v->line == 0 && v->column == 0) + return 0; + + /* The max width we need to format the line numbers for this source file */ + w = (v->source && v->source->max_line > 0) ? + DECIMAL_STR_WIDTH(v->source->max_line) : + DECIMAL_STR_MAX(unsigned)-1; + k = (v->source && v->source->max_column > 0) ? + DECIMAL_STR_WIDTH(v->source->max_column) : + DECIMAL_STR_MAX(unsigned) -1; + + if (whitespace) { + size_t i, n; + + n = 1 + (v->source ? strlen(v->source->name) : 0) + + ((v->source && (v->line > 0 || v->column > 0)) ? 1 : 0) + + (v->line > 0 ? w : 0) + + (((v->source || v->line > 0) && v->column > 0) ? 1 : 0) + + (v->column > 0 ? k : 0) + + 2; + + for (i = 0; i < n; i++) + fputc(' ', f); + } else { + fputc('[', f); + + if (v->source) + fputs(v->source->name, f); + if (v->source && (v->line > 0 || v->column > 0)) + fputc(':', f); + if (v->line > 0) + fprintf(f, "%*u", (int) w, v->line); + if ((v->source || v->line > 0) || v->column > 0) + fputc(':', f); + if (v->column > 0) + fprintf(f, "%*u", (int) k, v->column); + + fputc(']', f); + fputc(' ', f); + } + + return 0; +} + +static void json_format_string(FILE *f, const char *q, JsonFormatFlags flags) { + assert(q); + + fputc('"', f); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_GREEN, f); + + for (; *q; q++) + switch (*q) { + case '"': + fputs("\\\"", f); + break; + + case '\\': + fputs("\\\\", f); + break; + + case '\b': + fputs("\\b", f); + break; + + case '\f': + fputs("\\f", f); + break; + + case '\n': + fputs("\\n", f); + break; + + case '\r': + fputs("\\r", f); + break; + + case '\t': + fputs("\\t", f); + break; + + default: + if ((signed char) *q >= 0 && *q < ' ') + fprintf(f, "\\u%04x", *q); + else + fputc(*q, f); + break; + } + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + + fputc('"', f); +} + +static int json_format(FILE *f, JsonVariant *v, JsonFormatFlags flags, const char *prefix) { + int r; + + assert(f); + assert(v); + + switch (json_variant_type(v)) { + + case JSON_VARIANT_REAL: { + locale_t loc; + + loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0); + if (loc == (locale_t) 0) + return -errno; + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_HIGHLIGHT_BLUE, f); + + fprintf(f, "%.*Le", DECIMAL_DIG, json_variant_real(v)); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + + freelocale(loc); + break; + } + + case JSON_VARIANT_INTEGER: + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_HIGHLIGHT_BLUE, f); + + fprintf(f, "%" PRIdMAX, json_variant_integer(v)); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + break; + + case JSON_VARIANT_UNSIGNED: + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_HIGHLIGHT_BLUE, f); + + fprintf(f, "%" PRIuMAX, json_variant_unsigned(v)); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + break; + + case JSON_VARIANT_BOOLEAN: + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_HIGHLIGHT, f); + + if (json_variant_boolean(v)) + fputs("true", f); + else + fputs("false", f); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + + break; + + case JSON_VARIANT_NULL: + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_HIGHLIGHT, f); + + fputs("null", f); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + break; + + case JSON_VARIANT_STRING: + json_format_string(f, json_variant_string(v), flags); + break; + + case JSON_VARIANT_ARRAY: { + size_t i, n; + + n = json_variant_elements(v); + + if (n == 0) + fputs("[]", f); + else { + _cleanup_free_ char *joined = NULL; + const char *prefix2; + + if (flags & JSON_FORMAT_PRETTY) { + joined = strjoin(strempty(prefix), "\t"); + if (!joined) + return -ENOMEM; + + prefix2 = joined; + fputs("[\n", f); + } else { + prefix2 = strempty(prefix); + fputc('[', f); + } + + for (i = 0; i < n; i++) { + JsonVariant *e; + + assert_se(e = json_variant_by_index(v, i)); + + if (i > 0) { + if (flags & JSON_FORMAT_PRETTY) + fputs(",\n", f); + else + fputc(',', f); + } + + if (flags & JSON_FORMAT_PRETTY) { + print_source(f, e, flags, false); + fputs(prefix2, f); + } + + r = json_format(f, e, flags, prefix2); + if (r < 0) + return r; + } + + if (flags & JSON_FORMAT_PRETTY) { + fputc('\n', f); + print_source(f, v, flags, true); + fputs(strempty(prefix), f); + } + + fputc(']', f); + } + break; + } + + case JSON_VARIANT_OBJECT: { + size_t i, n; + + n = json_variant_elements(v); + + if (n == 0) + fputs("{}", f); + else { + _cleanup_free_ char *joined = NULL; + const char *prefix2; + + if (flags & JSON_FORMAT_PRETTY) { + joined = strjoin(strempty(prefix), "\t"); + if (!joined) + return -ENOMEM; + + prefix2 = joined; + fputs("{\n", f); + } else { + prefix2 = strempty(prefix); + fputc('{', f); + } + + for (i = 0; i < n; i += 2) { + JsonVariant *e; + + e = json_variant_by_index(v, i); + + if (i > 0) { + if (flags & JSON_FORMAT_PRETTY) + fputs(",\n", f); + else + fputc(',', f); + } + + if (flags & JSON_FORMAT_PRETTY) { + print_source(f, e, flags, false); + fputs(prefix2, f); + } + + r = json_format(f, e, flags, prefix2); + if (r < 0) + return r; + + fputs(flags & JSON_FORMAT_PRETTY ? " : " : ":", f); + + r = json_format(f, json_variant_by_index(v, i+1), flags, prefix2); + if (r < 0) + return r; + } + + if (flags & JSON_FORMAT_PRETTY) { + fputc('\n', f); + print_source(f, v, flags, true); + fputs(strempty(prefix), f); + } + + fputc('}', f); + } + break; + } + + default: + assert_not_reached("Unexpected variant type."); + } + + return 0; +} + +int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret) { + _cleanup_free_ char *s = NULL; + size_t sz = 0; + int r; + + /* Returns the length of the generated string (without the terminating NUL), + * or negative on error. */ + + assert_return(v, -EINVAL); + assert_return(ret, -EINVAL); + + { + _cleanup_fclose_ FILE *f = NULL; + + f = open_memstream_unlocked(&s, &sz); + if (!f) + return -ENOMEM; + + json_variant_dump(v, flags, f, NULL); + + /* Add terminating 0, so that the output buffer is a valid string. */ + fputc('\0', f); + + r = fflush_and_check(f); + } + if (r < 0) + return r; + + assert(s); + *ret = TAKE_PTR(s); + assert(sz > 0); + return (int) sz - 1; +} + +void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix) { + if (!v) + return; + + if (!f) + f = stdout; + + print_source(f, v, flags, false); + + if (((flags & (JSON_FORMAT_COLOR_AUTO|JSON_FORMAT_COLOR)) == JSON_FORMAT_COLOR_AUTO) && colors_enabled()) + flags |= JSON_FORMAT_COLOR; + + if (((flags & (JSON_FORMAT_PRETTY_AUTO|JSON_FORMAT_PRETTY)) == JSON_FORMAT_PRETTY_AUTO)) + flags |= on_tty() ? JSON_FORMAT_PRETTY : JSON_FORMAT_NEWLINE; + + if (flags & JSON_FORMAT_SSE) + fputs("data: ", f); + if (flags & JSON_FORMAT_SEQ) + fputc('\x1e', f); /* ASCII Record Separator */ + + json_format(f, v, flags, prefix); + + if (flags & (JSON_FORMAT_PRETTY|JSON_FORMAT_SEQ|JSON_FORMAT_SSE|JSON_FORMAT_NEWLINE)) + fputc('\n', f); + if (flags & JSON_FORMAT_SSE) + fputc('\n', f); /* In case of SSE add a second newline */ + + if (flags & JSON_FORMAT_FLUSH) + fflush(f); +} + +int json_variant_filter(JsonVariant **v, char **to_remove) { + _cleanup_(json_variant_unrefp) JsonVariant *w = NULL; + _cleanup_free_ JsonVariant **array = NULL; + size_t i, n = 0, k = 0; + int r; + + assert(v); + + if (json_variant_is_blank_object(*v)) + return 0; + if (!json_variant_is_object(*v)) + return -EINVAL; + + if (strv_isempty(to_remove)) + return 0; + + for (i = 0; i < json_variant_elements(*v); i += 2) { + JsonVariant *p; + + p = json_variant_by_index(*v, i); + if (!json_variant_has_type(p, JSON_VARIANT_STRING)) + return -EINVAL; + + if (strv_contains(to_remove, json_variant_string(p))) { + if (!array) { + array = new(JsonVariant*, json_variant_elements(*v) - 2); + if (!array) + return -ENOMEM; + + for (k = 0; k < i; k++) + array[k] = json_variant_by_index(*v, k); + } + + n++; + } else if (array) { + array[k++] = p; + array[k++] = json_variant_by_index(*v, i + 1); + } + } + + if (n == 0) + return 0; + + r = json_variant_new_object(&w, array, k); + if (r < 0) + return r; + + json_variant_propagate_sensitive(*v, w); + + json_variant_unref(*v); + *v = TAKE_PTR(w); + + return (int) n; +} + +int json_variant_set_field(JsonVariant **v, const char *field, JsonVariant *value) { + _cleanup_(json_variant_unrefp) JsonVariant *field_variant = NULL, *w = NULL; + _cleanup_free_ JsonVariant **array = NULL; + size_t i, k = 0; + int r; + + assert(v); + assert(field); + + if (json_variant_is_blank_object(*v)) { + array = new(JsonVariant*, 2); + if (!array) + return -ENOMEM; + + } else { + if (!json_variant_is_object(*v)) + return -EINVAL; + + for (i = 0; i < json_variant_elements(*v); i += 2) { + JsonVariant *p; + + p = json_variant_by_index(*v, i); + if (!json_variant_is_string(p)) + return -EINVAL; + + if (streq(json_variant_string(p), field)) { + + if (!array) { + array = new(JsonVariant*, json_variant_elements(*v)); + if (!array) + return -ENOMEM; + + for (k = 0; k < i; k++) + array[k] = json_variant_by_index(*v, k); + } + + } else if (array) { + array[k++] = p; + array[k++] = json_variant_by_index(*v, i + 1); + } + } + + if (!array) { + array = new(JsonVariant*, json_variant_elements(*v) + 2); + if (!array) + return -ENOMEM; + + for (k = 0; k < json_variant_elements(*v); k++) + array[k] = json_variant_by_index(*v, k); + } + } + + r = json_variant_new_string(&field_variant, field); + if (r < 0) + return r; + + array[k++] = field_variant; + array[k++] = value; + + r = json_variant_new_object(&w, array, k); + if (r < 0) + return r; + + json_variant_propagate_sensitive(*v, w); + + json_variant_unref(*v); + *v = TAKE_PTR(w); + + return 1; +} + +int json_variant_set_field_string(JsonVariant **v, const char *field, const char *value) { + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + r = json_variant_new_string(&m, value); + if (r < 0) + return r; + + return json_variant_set_field(v, field, m); +} + +int json_variant_set_field_integer(JsonVariant **v, const char *field, intmax_t i) { + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + r = json_variant_new_integer(&m, i); + if (r < 0) + return r; + + return json_variant_set_field(v, field, m); +} + +int json_variant_set_field_unsigned(JsonVariant **v, const char *field, uintmax_t u) { + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + r = json_variant_new_unsigned(&m, u); + if (r < 0) + return r; + + return json_variant_set_field(v, field, m); +} + +int json_variant_set_field_boolean(JsonVariant **v, const char *field, bool b) { + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + r = json_variant_new_boolean(&m, b); + if (r < 0) + return r; + + return json_variant_set_field(v, field, m); +} + +int json_variant_set_field_strv(JsonVariant **v, const char *field, char **l) { + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + r = json_variant_new_array_strv(&m, l); + if (r < 0) + return r; + + return json_variant_set_field(v, field, m); +} + +int json_variant_merge(JsonVariant **v, JsonVariant *m) { + _cleanup_(json_variant_unrefp) JsonVariant *w = NULL; + _cleanup_free_ JsonVariant **array = NULL; + size_t v_elements, m_elements, i, k; + bool v_blank, m_blank; + int r; + + m = json_variant_dereference(m); + + v_blank = json_variant_is_blank_object(*v); + m_blank = json_variant_is_blank_object(m); + + if (!v_blank && !json_variant_is_object(*v)) + return -EINVAL; + if (!m_blank && !json_variant_is_object(m)) + return -EINVAL; + + if (m_blank) + return 0; /* nothing to do */ + + if (v_blank) { + json_variant_unref(*v); + *v = json_variant_ref(m); + return 1; + } + + v_elements = json_variant_elements(*v); + m_elements = json_variant_elements(m); + if (v_elements > SIZE_MAX - m_elements) /* overflow check */ + return -ENOMEM; + + array = new(JsonVariant*, v_elements + m_elements); + if (!array) + return -ENOMEM; + + k = 0; + for (i = 0; i < v_elements; i += 2) { + JsonVariant *u; + + u = json_variant_by_index(*v, i); + if (!json_variant_is_string(u)) + return -EINVAL; + + if (json_variant_by_key(m, json_variant_string(u))) + continue; /* skip if exists in second variant */ + + array[k++] = u; + array[k++] = json_variant_by_index(*v, i + 1); + } + + for (i = 0; i < m_elements; i++) + array[k++] = json_variant_by_index(m, i); + + r = json_variant_new_object(&w, array, k); + if (r < 0) + return r; + + json_variant_propagate_sensitive(*v, w); + json_variant_propagate_sensitive(m, w); + + json_variant_unref(*v); + *v = TAKE_PTR(w); + + return 1; +} + +int json_variant_append_array(JsonVariant **v, JsonVariant *element) { + _cleanup_(json_variant_unrefp) JsonVariant *nv = NULL; + bool blank; + int r; + + assert(v); + assert(element); + + + if (!*v || json_variant_is_null(*v)) + blank = true; + else if (!json_variant_is_array(*v)) + return -EINVAL; + else + blank = json_variant_elements(*v) == 0; + + if (blank) + r = json_variant_new_array(&nv, (JsonVariant*[]) { element }, 1); + else { + _cleanup_free_ JsonVariant **array = NULL; + size_t i; + + array = new(JsonVariant*, json_variant_elements(*v) + 1); + if (!array) + return -ENOMEM; + + for (i = 0; i < json_variant_elements(*v); i++) + array[i] = json_variant_by_index(*v, i); + + array[i] = element; + + r = json_variant_new_array(&nv, array, i + 1); + } + if (r < 0) + return r; + + json_variant_propagate_sensitive(*v, nv); + + json_variant_unref(*v); + *v = TAKE_PTR(nv); + + return 0; +} + +int json_variant_strv(JsonVariant *v, char ***ret) { + char **l = NULL; + size_t n, i; + bool sensitive; + int r; + + assert(ret); + + if (!v || json_variant_is_null(v)) { + l = new0(char*, 1); + if (!l) + return -ENOMEM; + + *ret = l; + return 0; + } + + if (!json_variant_is_array(v)) + return -EINVAL; + + sensitive = v->sensitive; + + n = json_variant_elements(v); + l = new(char*, n+1); + if (!l) + return -ENOMEM; + + for (i = 0; i < n; i++) { + JsonVariant *e; + + assert_se(e = json_variant_by_index(v, i)); + sensitive = sensitive || e->sensitive; + + if (!json_variant_is_string(e)) { + l[i] = NULL; + r = -EINVAL; + goto fail; + } + + l[i] = strdup(json_variant_string(e)); + if (!l[i]) { + r = -ENOMEM; + goto fail; + } + } + + l[i] = NULL; + *ret = TAKE_PTR(l); + + return 0; + +fail: + if (sensitive) + strv_free_erase(l); + else + strv_free(l); + + return r; +} + +static int json_variant_copy(JsonVariant **nv, JsonVariant *v) { + JsonVariantType t; + JsonVariant *c; + JsonValue value; + const void *source; + size_t k; + + assert(nv); + assert(v); + + /* Let's copy the simple types literally, and the larger types by references */ + t = json_variant_type(v); + switch (t) { + case JSON_VARIANT_INTEGER: + k = sizeof(intmax_t); + value.integer = json_variant_integer(v); + source = &value; + break; + + case JSON_VARIANT_UNSIGNED: + k = sizeof(uintmax_t); + value.unsig = json_variant_unsigned(v); + source = &value; + break; + + case JSON_VARIANT_REAL: + k = sizeof(long double); + value.real = json_variant_real(v); + source = &value; + break; + + case JSON_VARIANT_BOOLEAN: + k = sizeof(bool); + value.boolean = json_variant_boolean(v); + source = &value; + break; + + case JSON_VARIANT_NULL: + k = 0; + source = NULL; + break; + + case JSON_VARIANT_STRING: + source = json_variant_string(v); + k = strnlen(source, INLINE_STRING_MAX + 1); + if (k <= INLINE_STRING_MAX) { + k ++; + break; + } + + _fallthrough_; + + default: + /* Everything else copy by reference */ + + c = malloc0(MAX(sizeof(JsonVariant), + offsetof(JsonVariant, reference) + sizeof(JsonVariant*))); + if (!c) + return -ENOMEM; + + c->n_ref = 1; + c->type = t; + c->is_reference = true; + c->reference = json_variant_ref(json_variant_formalize(v)); + + *nv = c; + return 0; + } + + c = malloc0(MAX(sizeof(JsonVariant), + offsetof(JsonVariant, value) + k)); + if (!c) + return -ENOMEM; + + c->n_ref = 1; + c->type = t; + + memcpy_safe(&c->value, source, k); + + json_variant_propagate_sensitive(v, c); + + *nv = c; + return 0; +} + +static bool json_single_ref(JsonVariant *v) { + + /* Checks whether the caller is the single owner of the object, i.e. can get away with changing it */ + + if (!json_variant_is_regular(v)) + return false; + + if (v->is_embedded) + return json_single_ref(v->parent); + + assert(v->n_ref > 0); + return v->n_ref == 1; +} + +static int json_variant_set_source(JsonVariant **v, JsonSource *source, unsigned line, unsigned column) { + JsonVariant *w; + int r; + + assert(v); + + /* Patch in source and line/column number. Tries to do this in-place if the caller is the sole referencer of + * the object. If not, allocates a new object, possibly a surrogate for the original one */ + + if (!*v) + return 0; + + if (source && line > source->max_line) + source->max_line = line; + if (source && column > source->max_column) + source->max_column = column; + + if (!json_variant_is_regular(*v)) { + + if (!source && line == 0 && column == 0) + return 0; + + } else { + if (json_source_equal((*v)->source, source) && + (*v)->line == line && + (*v)->column == column) + return 0; + + if (json_single_ref(*v)) { /* Sole reference? */ + json_source_unref((*v)->source); + (*v)->source = json_source_ref(source); + (*v)->line = line; + (*v)->column = column; + return 1; + } + } + + r = json_variant_copy(&w, *v); + if (r < 0) + return r; + + assert(json_variant_is_regular(w)); + assert(!w->is_embedded); + assert(w->n_ref == 1); + assert(!w->source); + + w->source = json_source_ref(source); + w->line = line; + w->column = column; + + json_variant_unref(*v); + *v = w; + + return 1; +} + +static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, size_t n) { + assert(line); + assert(column); + assert(s || n == 0); + + while (n > 0) { + if (*s == '\n') { + (*line)++; + *column = 1; + } else if ((signed char) *s >= 0 && *s < 127) /* Process ASCII chars quickly */ + (*column)++; + else { + int w; + + w = utf8_encoded_valid_unichar(s, n); + if (w < 0) /* count invalid unichars as normal characters */ + w = 1; + else if ((size_t) w > n) /* never read more than the specified number of characters */ + w = (int) n; + + (*column)++; + + s += w; + n -= w; + continue; + } + + s++; + n--; + } +} + +static int unhex_ucs2(const char *c, uint16_t *ret) { + int aa, bb, cc, dd; + uint16_t x; + + assert(c); + assert(ret); + + aa = unhexchar(c[0]); + if (aa < 0) + return -EINVAL; + + bb = unhexchar(c[1]); + if (bb < 0) + return -EINVAL; + + cc = unhexchar(c[2]); + if (cc < 0) + return -EINVAL; + + dd = unhexchar(c[3]); + if (dd < 0) + return -EINVAL; + + x = ((uint16_t) aa << 12) | + ((uint16_t) bb << 8) | + ((uint16_t) cc << 4) | + ((uint16_t) dd); + + if (x <= 0) + return -EINVAL; + + *ret = x; + + return 0; +} + +static int json_parse_string(const char **p, char **ret) { + _cleanup_free_ char *s = NULL; + size_t n = 0, allocated = 0; + const char *c; + + assert(p); + assert(*p); + assert(ret); + + c = *p; + + if (*c != '"') + return -EINVAL; + + c++; + + for (;;) { + int len; + + /* Check for EOF */ + if (*c == 0) + return -EINVAL; + + /* Check for control characters 0x00..0x1f */ + if (*c > 0 && *c < ' ') + return -EINVAL; + + /* Check for control character 0x7f */ + if (*c == 0x7f) + return -EINVAL; + + if (*c == '"') { + if (!s) { + s = strdup(""); + if (!s) + return -ENOMEM; + } else + s[n] = 0; + + *p = c + 1; + + *ret = TAKE_PTR(s); + return JSON_TOKEN_STRING; + } + + if (*c == '\\') { + char ch = 0; + c++; + + if (*c == 0) + return -EINVAL; + + if (IN_SET(*c, '"', '\\', '/')) + ch = *c; + else if (*c == 'b') + ch = '\b'; + else if (*c == 'f') + ch = '\f'; + else if (*c == 'n') + ch = '\n'; + else if (*c == 'r') + ch = '\r'; + else if (*c == 't') + ch = '\t'; + else if (*c == 'u') { + char16_t x; + int r; + + r = unhex_ucs2(c + 1, &x); + if (r < 0) + return r; + + c += 5; + + if (!GREEDY_REALLOC(s, allocated, n + 5)) + return -ENOMEM; + + if (!utf16_is_surrogate(x)) + n += utf8_encode_unichar(s + n, (char32_t) x); + else if (utf16_is_trailing_surrogate(x)) + return -EINVAL; + else { + char16_t y; + + if (c[0] != '\\' || c[1] != 'u') + return -EINVAL; + + r = unhex_ucs2(c + 2, &y); + if (r < 0) + return r; + + c += 6; + + if (!utf16_is_trailing_surrogate(y)) + return -EINVAL; + + n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y)); + } + + continue; + } else + return -EINVAL; + + if (!GREEDY_REALLOC(s, allocated, n + 2)) + return -ENOMEM; + + s[n++] = ch; + c ++; + continue; + } + + len = utf8_encoded_valid_unichar(c, (size_t) -1); + if (len < 0) + return len; + + if (!GREEDY_REALLOC(s, allocated, n + len + 1)) + return -ENOMEM; + + memcpy(s + n, c, len); + n += len; + c += len; + } +} + +static int json_parse_number(const char **p, JsonValue *ret) { + bool negative = false, exponent_negative = false, is_real = false; + long double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0; + intmax_t i = 0; + uintmax_t u = 0; + const char *c; + + assert(p); + assert(*p); + assert(ret); + + c = *p; + + if (*c == '-') { + negative = true; + c++; + } + + if (*c == '0') + c++; + else { + if (!strchr("123456789", *c) || *c == 0) + return -EINVAL; + + do { + if (!is_real) { + if (negative) { + + if (i < INTMAX_MIN / 10) /* overflow */ + is_real = true; + else { + intmax_t t = 10 * i; + + if (t < INTMAX_MIN + (*c - '0')) /* overflow */ + is_real = true; + else + i = t - (*c - '0'); + } + } else { + if (u > UINTMAX_MAX / 10) /* overflow */ + is_real = true; + else { + uintmax_t t = 10 * u; + + if (t > UINTMAX_MAX - (*c - '0')) /* overflow */ + is_real = true; + else + u = t + (*c - '0'); + } + } + } + + x = 10.0 * x + (*c - '0'); + + c++; + } while (strchr("0123456789", *c) && *c != 0); + } + + if (*c == '.') { + is_real = true; + c++; + + if (!strchr("0123456789", *c) || *c == 0) + return -EINVAL; + + do { + y = 10.0 * y + (*c - '0'); + shift = 10.0 * shift; + c++; + } while (strchr("0123456789", *c) && *c != 0); + } + + if (IN_SET(*c, 'e', 'E')) { + is_real = true; + c++; + + if (*c == '-') { + exponent_negative = true; + c++; + } else if (*c == '+') + c++; + + if (!strchr("0123456789", *c) || *c == 0) + return -EINVAL; + + do { + exponent = 10.0 * exponent + (*c - '0'); + c++; + } while (strchr("0123456789", *c) && *c != 0); + } + + *p = c; + + if (is_real) { + ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10l((exponent_negative ? -1.0 : 1.0) * exponent); + return JSON_TOKEN_REAL; + } else if (negative) { + ret->integer = i; + return JSON_TOKEN_INTEGER; + } else { + ret->unsig = u; + return JSON_TOKEN_UNSIGNED; + } +} + +int json_tokenize( + const char **p, + char **ret_string, + JsonValue *ret_value, + unsigned *ret_line, /* 'ret_line' returns the line at the beginning of this token */ + unsigned *ret_column, + void **state, + unsigned *line, /* 'line' is used as a line state, it always reflect the line we are at after the token was read */ + unsigned *column) { + + unsigned start_line, start_column; + const char *start, *c; + size_t n; + int t, r; + + enum { + STATE_NULL, + STATE_VALUE, + STATE_VALUE_POST, + }; + + assert(p); + assert(*p); + assert(ret_string); + assert(ret_value); + assert(ret_line); + assert(ret_column); + assert(line); + assert(column); + assert(state); + + t = PTR_TO_INT(*state); + if (t == STATE_NULL) { + *line = 1; + *column = 1; + t = STATE_VALUE; + } + + /* Skip over the whitespace */ + n = strspn(*p, WHITESPACE); + inc_lines_columns(line, column, *p, n); + c = *p + n; + + /* Remember where we started processing this token */ + start = c; + start_line = *line; + start_column = *column; + + if (*c == 0) { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + r = JSON_TOKEN_END; + goto finish; + } + + switch (t) { + + case STATE_VALUE: + + if (*c == '{') { + c++; + *state = INT_TO_PTR(STATE_VALUE); + r = JSON_TOKEN_OBJECT_OPEN; + goto null_return; + + } else if (*c == '}') { + c++; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_OBJECT_CLOSE; + goto null_return; + + } else if (*c == '[') { + c++; + *state = INT_TO_PTR(STATE_VALUE); + r = JSON_TOKEN_ARRAY_OPEN; + goto null_return; + + } else if (*c == ']') { + c++; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_ARRAY_CLOSE; + goto null_return; + + } else if (*c == '"') { + + r = json_parse_string(&c, ret_string); + if (r < 0) + return r; + + *ret_value = JSON_VALUE_NULL; + *state = INT_TO_PTR(STATE_VALUE_POST); + goto finish; + + } else if (strchr("-0123456789", *c)) { + + r = json_parse_number(&c, ret_value); + if (r < 0) + return r; + + *ret_string = NULL; + *state = INT_TO_PTR(STATE_VALUE_POST); + goto finish; + + } else if (startswith(c, "true")) { + *ret_string = NULL; + ret_value->boolean = true; + c += 4; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_BOOLEAN; + goto finish; + + } else if (startswith(c, "false")) { + *ret_string = NULL; + ret_value->boolean = false; + c += 5; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_BOOLEAN; + goto finish; + + } else if (startswith(c, "null")) { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + c += 4; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_NULL; + goto finish; + + } + + return -EINVAL; + + case STATE_VALUE_POST: + + if (*c == ':') { + c++; + *state = INT_TO_PTR(STATE_VALUE); + r = JSON_TOKEN_COLON; + goto null_return; + + } else if (*c == ',') { + c++; + *state = INT_TO_PTR(STATE_VALUE); + r = JSON_TOKEN_COMMA; + goto null_return; + + } else if (*c == '}') { + c++; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_OBJECT_CLOSE; + goto null_return; + + } else if (*c == ']') { + c++; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_ARRAY_CLOSE; + goto null_return; + } + + return -EINVAL; + + default: + assert_not_reached("Unexpected tokenizer state"); + } + +null_return: + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + +finish: + inc_lines_columns(line, column, start, c - start); + *p = c; + + *ret_line = start_line; + *ret_column = start_column; + + return r; +} + +typedef enum JsonExpect { + /* The following values are used by json_parse() */ + EXPECT_TOPLEVEL, + EXPECT_END, + EXPECT_OBJECT_FIRST_KEY, + EXPECT_OBJECT_NEXT_KEY, + EXPECT_OBJECT_COLON, + EXPECT_OBJECT_VALUE, + EXPECT_OBJECT_COMMA, + EXPECT_ARRAY_FIRST_ELEMENT, + EXPECT_ARRAY_NEXT_ELEMENT, + EXPECT_ARRAY_COMMA, + + /* And these are used by json_build() */ + EXPECT_ARRAY_ELEMENT, + EXPECT_OBJECT_KEY, +} JsonExpect; + +typedef struct JsonStack { + JsonExpect expect; + JsonVariant **elements; + size_t n_elements, n_elements_allocated; + unsigned line_before; + unsigned column_before; + size_t n_suppress; /* When building: if > 0, suppress this many subsequent elements. If == (size_t) -1, suppress all subsequent elements */ +} JsonStack; + +static void json_stack_release(JsonStack *s) { + assert(s); + + json_variant_unref_many(s->elements, s->n_elements); + s->elements = mfree(s->elements); +} + +static int json_parse_internal( + const char **input, + JsonSource *source, + JsonParseFlags flags, + JsonVariant **ret, + unsigned *line, + unsigned *column, + bool continue_end) { + + size_t n_stack = 1, n_stack_allocated = 0, i; + unsigned line_buffer = 0, column_buffer = 0; + void *tokenizer_state = NULL; + JsonStack *stack = NULL; + const char *p; + int r; + + assert_return(input, -EINVAL); + assert_return(ret, -EINVAL); + + p = *input; + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack)) + return -ENOMEM; + + stack[0] = (JsonStack) { + .expect = EXPECT_TOPLEVEL, + }; + + if (!line) + line = &line_buffer; + if (!column) + column = &column_buffer; + + for (;;) { + _cleanup_(json_variant_unrefp) JsonVariant *add = NULL; + _cleanup_free_ char *string = NULL; + unsigned line_token, column_token; + JsonStack *current; + JsonValue value; + int token; + + assert(n_stack > 0); + current = stack + n_stack - 1; + + if (continue_end && current->expect == EXPECT_END) + goto done; + + token = json_tokenize(&p, &string, &value, &line_token, &column_token, &tokenizer_state, line, column); + if (token < 0) { + r = token; + goto finish; + } + + switch (token) { + + case JSON_TOKEN_END: + if (current->expect != EXPECT_END) { + r = -EINVAL; + goto finish; + } + + assert(current->n_elements == 1); + assert(n_stack == 1); + goto done; + + case JSON_TOKEN_COLON: + + if (current->expect != EXPECT_OBJECT_COLON) { + r = -EINVAL; + goto finish; + } + + current->expect = EXPECT_OBJECT_VALUE; + break; + + case JSON_TOKEN_COMMA: + + if (current->expect == EXPECT_OBJECT_COMMA) + current->expect = EXPECT_OBJECT_NEXT_KEY; + else if (current->expect == EXPECT_ARRAY_COMMA) + current->expect = EXPECT_ARRAY_NEXT_ELEMENT; + else { + r = -EINVAL; + goto finish; + } + + break; + + case JSON_TOKEN_OBJECT_OPEN: + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) { + r = -ENOMEM; + goto finish; + } + current = stack + n_stack - 1; + + /* Prepare the expect for when we return from the child */ + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + stack[n_stack++] = (JsonStack) { + .expect = EXPECT_OBJECT_FIRST_KEY, + .line_before = line_token, + .column_before = column_token, + }; + + current = stack + n_stack - 1; + break; + + case JSON_TOKEN_OBJECT_CLOSE: + if (!IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_COMMA)) { + r = -EINVAL; + goto finish; + } + + assert(n_stack > 1); + + r = json_variant_new_object(&add, current->elements, current->n_elements); + if (r < 0) + goto finish; + + line_token = current->line_before; + column_token = current->column_before; + + json_stack_release(current); + n_stack--, current--; + + break; + + case JSON_TOKEN_ARRAY_OPEN: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) { + r = -ENOMEM; + goto finish; + } + current = stack + n_stack - 1; + + /* Prepare the expect for when we return from the child */ + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + stack[n_stack++] = (JsonStack) { + .expect = EXPECT_ARRAY_FIRST_ELEMENT, + .line_before = line_token, + .column_before = column_token, + }; + + break; + + case JSON_TOKEN_ARRAY_CLOSE: + if (!IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_COMMA)) { + r = -EINVAL; + goto finish; + } + + assert(n_stack > 1); + + r = json_variant_new_array(&add, current->elements, current->n_elements); + if (r < 0) + goto finish; + + line_token = current->line_before; + column_token = current->column_before; + + json_stack_release(current); + n_stack--, current--; + break; + + case JSON_TOKEN_STRING: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_string(&add, string); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY)) + current->expect = EXPECT_OBJECT_COLON; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + case JSON_TOKEN_REAL: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_real(&add, value.real); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + case JSON_TOKEN_INTEGER: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_integer(&add, value.integer); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + case JSON_TOKEN_UNSIGNED: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_unsigned(&add, value.unsig); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + case JSON_TOKEN_BOOLEAN: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_boolean(&add, value.boolean); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + case JSON_TOKEN_NULL: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_null(&add); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + default: + assert_not_reached("Unexpected token"); + } + + if (add) { + /* If we are asked to make this parsed object sensitive, then let's apply this + * immediately after allocating each variant, so that when we abort half-way + * everything we already allocated that is then freed is correctly marked. */ + if (FLAGS_SET(flags, JSON_PARSE_SENSITIVE)) + json_variant_sensitive(add); + + (void) json_variant_set_source(&add, source, line_token, column_token); + + if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) { + r = -ENOMEM; + goto finish; + } + + current->elements[current->n_elements++] = TAKE_PTR(add); + } + } + +done: + assert(n_stack == 1); + assert(stack[0].n_elements == 1); + + *ret = json_variant_ref(stack[0].elements[0]); + *input = p; + r = 0; + +finish: + for (i = 0; i < n_stack; i++) + json_stack_release(stack + i); + + free(stack); + + return r; +} + +int json_parse(const char *input, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) { + return json_parse_internal(&input, NULL, flags, ret, ret_line, ret_column, false); +} + +int json_parse_continue(const char **p, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) { + return json_parse_internal(p, NULL, flags, ret, ret_line, ret_column, true); +} + +int json_parse_file_at(FILE *f, int dir_fd, const char *path, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) { + _cleanup_(json_source_unrefp) JsonSource *source = NULL; + _cleanup_free_ char *text = NULL; + const char *p; + int r; + + if (f) + r = read_full_stream(f, &text, NULL); + else if (path) + r = read_full_file_full(dir_fd, path, 0, NULL, &text, NULL); + else + return -EINVAL; + if (r < 0) + return r; + + if (path) { + source = json_source_new(path); + if (!source) + return -ENOMEM; + } + + p = text; + return json_parse_internal(&p, source, flags, ret, ret_line, ret_column, false); +} + +int json_buildv(JsonVariant **ret, va_list ap) { + JsonStack *stack = NULL; + size_t n_stack = 1, n_stack_allocated = 0, i; + int r; + + assert_return(ret, -EINVAL); + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack)) + return -ENOMEM; + + stack[0] = (JsonStack) { + .expect = EXPECT_TOPLEVEL, + }; + + for (;;) { + _cleanup_(json_variant_unrefp) JsonVariant *add = NULL; + size_t n_subtract = 0; /* how much to subtract from current->n_suppress, i.e. how many elements would + * have been added to the current variant */ + JsonStack *current; + int command; + + assert(n_stack > 0); + current = stack + n_stack - 1; + + if (current->expect == EXPECT_END) + goto done; + + command = va_arg(ap, int); + + switch (command) { + + case _JSON_BUILD_STRING: { + const char *p; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + p = va_arg(ap, const char *); + + if (current->n_suppress == 0) { + r = json_variant_new_string(&add, p); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_INTEGER: { + intmax_t j; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + j = va_arg(ap, intmax_t); + + if (current->n_suppress == 0) { + r = json_variant_new_integer(&add, j); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_UNSIGNED: { + uintmax_t j; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + j = va_arg(ap, uintmax_t); + + if (current->n_suppress == 0) { + r = json_variant_new_unsigned(&add, j); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_REAL: { + long double d; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + d = va_arg(ap, long double); + + if (current->n_suppress == 0) { + r = json_variant_new_real(&add, d); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_BOOLEAN: { + bool b; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + b = va_arg(ap, int); + + if (current->n_suppress == 0) { + r = json_variant_new_boolean(&add, b); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_NULL: + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + if (current->n_suppress == 0) { + r = json_variant_new_null(&add); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + + case _JSON_BUILD_VARIANT: + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + /* Note that we don't care for current->n_suppress here, after all the variant is already + * allocated anyway... */ + add = va_arg(ap, JsonVariant*); + if (!add) + add = JSON_VARIANT_MAGIC_NULL; + else + json_variant_ref(add); + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + + case _JSON_BUILD_VARIANT_ARRAY: { + JsonVariant **array; + size_t n; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + array = va_arg(ap, JsonVariant**); + n = va_arg(ap, size_t); + + if (current->n_suppress == 0) { + r = json_variant_new_array(&add, array, n); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_LITERAL: { + const char *l; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + l = va_arg(ap, const char *); + + if (l) { + /* Note that we don't care for current->n_suppress here, we should generate parsing + * errors even in suppressed object properties */ + + r = json_parse(l, 0, &add, NULL, NULL); + if (r < 0) + goto finish; + } else + add = JSON_VARIANT_MAGIC_NULL; + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_ARRAY_BEGIN: + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) { + r = -ENOMEM; + goto finish; + } + current = stack + n_stack - 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + stack[n_stack++] = (JsonStack) { + .expect = EXPECT_ARRAY_ELEMENT, + .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the + * new array, then we should + * also suppress all array + * members */ + }; + + break; + + case _JSON_BUILD_ARRAY_END: + if (current->expect != EXPECT_ARRAY_ELEMENT) { + r = -EINVAL; + goto finish; + } + + assert(n_stack > 1); + + if (current->n_suppress == 0) { + r = json_variant_new_array(&add, current->elements, current->n_elements); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + json_stack_release(current); + n_stack--, current--; + + break; + + case _JSON_BUILD_STRV: { + char **l; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + l = va_arg(ap, char **); + + if (current->n_suppress == 0) { + r = json_variant_new_array_strv(&add, l); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_BASE64: { + const void *p; + size_t n; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + p = va_arg(ap, const void *); + n = va_arg(ap, size_t); + + if (current->n_suppress == 0) { + r = json_variant_new_base64(&add, p, n); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_ID128: { + sd_id128_t id; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + id = va_arg(ap, sd_id128_t); + + if (current->n_suppress == 0) { + r = json_variant_new_id128(&add, id); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_BYTE_ARRAY: { + const void *array; + size_t n; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + array = va_arg(ap, const void*); + n = va_arg(ap, size_t); + + if (current->n_suppress == 0) { + r = json_variant_new_array_bytes(&add, array, n); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_OBJECT_BEGIN: + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) { + r = -ENOMEM; + goto finish; + } + current = stack + n_stack - 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + stack[n_stack++] = (JsonStack) { + .expect = EXPECT_OBJECT_KEY, + .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the + * new object, then we should + * also suppress all object + * members */ + }; + + break; + + case _JSON_BUILD_OBJECT_END: + + if (current->expect != EXPECT_OBJECT_KEY) { + r = -EINVAL; + goto finish; + } + + assert(n_stack > 1); + + if (current->n_suppress == 0) { + r = json_variant_new_object(&add, current->elements, current->n_elements); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + json_stack_release(current); + n_stack--, current--; + + break; + + case _JSON_BUILD_PAIR: { + const char *n; + + if (current->expect != EXPECT_OBJECT_KEY) { + r = -EINVAL; + goto finish; + } + + n = va_arg(ap, const char *); + + if (current->n_suppress == 0) { + r = json_variant_new_string(&add, n); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + current->expect = EXPECT_OBJECT_VALUE; + break; + } + + case _JSON_BUILD_PAIR_CONDITION: { + const char *n; + bool b; + + if (current->expect != EXPECT_OBJECT_KEY) { + r = -EINVAL; + goto finish; + } + + b = va_arg(ap, int); + n = va_arg(ap, const char *); + + if (b && current->n_suppress == 0) { + r = json_variant_new_string(&add, n); + if (r < 0) + goto finish; + } + + n_subtract = 1; /* we generated one item */ + + if (!b && current->n_suppress != (size_t) -1) + current->n_suppress += 2; /* Suppress this one and the next item */ + + current->expect = EXPECT_OBJECT_VALUE; + break; + }} + + /* If a variant was generated, add it to our current variant, but only if we are not supposed to suppress additions */ + if (add && current->n_suppress == 0) { + if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) { + r = -ENOMEM; + goto finish; + } + + current->elements[current->n_elements++] = TAKE_PTR(add); + } + + /* If we are supposed to suppress items, let's subtract how many items where generated from that + * counter. Except if the counter is (size_t) -1, i.e. we shall suppress an infinite number of elements + * on this stack level */ + if (current->n_suppress != (size_t) -1) { + if (current->n_suppress <= n_subtract) /* Saturated */ + current->n_suppress = 0; + else + current->n_suppress -= n_subtract; + } + } + +done: + assert(n_stack == 1); + assert(stack[0].n_elements == 1); + + *ret = json_variant_ref(stack[0].elements[0]); + r = 0; + +finish: + for (i = 0; i < n_stack; i++) + json_stack_release(stack + i); + + free(stack); + + return r; +} + +int json_build(JsonVariant **ret, ...) { + va_list ap; + int r; + + va_start(ap, ret); + r = json_buildv(ret, ap); + va_end(ap); + + return r; +} + +int json_log_internal( + JsonVariant *variant, + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) { + + PROTECT_ERRNO; + + unsigned source_line, source_column; + char buffer[LINE_MAX]; + const char *source; + va_list ap; + int r; + + errno = ERRNO_VALUE(error); + + va_start(ap, format); + (void) vsnprintf(buffer, sizeof buffer, format, ap); + va_end(ap); + + if (variant) { + r = json_variant_get_source(variant, &source, &source_line, &source_column); + if (r < 0) + return r; + } else { + source = NULL; + source_line = 0; + source_column = 0; + } + + if (source && source_line > 0 && source_column > 0) + return log_struct_internal( + LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level), + error, + file, line, func, + "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR, + "CONFIG_FILE=%s", source, + "CONFIG_LINE=%u", source_line, + "CONFIG_COLUMN=%u", source_column, + LOG_MESSAGE("%s:%u:%u: %s", source, source_line, source_column, buffer), + NULL); + else + return log_struct_internal( + LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level), + error, + file, line, func, + "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR, + LOG_MESSAGE("%s", buffer), + NULL); +} + +int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata) { + const JsonDispatch *p; + size_t i, n, m; + int r, done = 0; + bool *found; + + if (!json_variant_is_object(v)) { + json_log(v, flags, 0, "JSON variant is not an object."); + + if (flags & JSON_PERMISSIVE) + return 0; + + return -EINVAL; + } + + for (p = table, m = 0; p->name; p++) + m++; + + found = newa0(bool, m); + + n = json_variant_elements(v); + for (i = 0; i < n; i += 2) { + JsonVariant *key, *value; + + assert_se(key = json_variant_by_index(v, i)); + assert_se(value = json_variant_by_index(v, i+1)); + + for (p = table; p->name; p++) + if (p->name == POINTER_MAX || + streq_ptr(json_variant_string(key), p->name)) + break; + + if (p->name) { /* Found a matching entry! :-) */ + JsonDispatchFlags merged_flags; + + merged_flags = flags | p->flags; + + if (p->type != _JSON_VARIANT_TYPE_INVALID && + !json_variant_has_type(value, p->type)) { + + json_log(value, merged_flags, 0, + "Object field '%s' has wrong type %s, expected %s.", json_variant_string(key), + json_variant_type_to_string(json_variant_type(value)), json_variant_type_to_string(p->type)); + + if (merged_flags & JSON_PERMISSIVE) + continue; + + return -EINVAL; + } + + if (found[p-table]) { + json_log(value, merged_flags, 0, "Duplicate object field '%s'.", json_variant_string(key)); + + if (merged_flags & JSON_PERMISSIVE) + continue; + + return -ENOTUNIQ; + } + + found[p-table] = true; + + if (p->callback) { + r = p->callback(json_variant_string(key), value, merged_flags, (uint8_t*) userdata + p->offset); + if (r < 0) { + if (merged_flags & JSON_PERMISSIVE) + continue; + + return r; + } + } + + done ++; + + } else { /* Didn't find a matching entry! :-( */ + + if (bad) { + r = bad(json_variant_string(key), value, flags, userdata); + if (r < 0) { + if (flags & JSON_PERMISSIVE) + continue; + + return r; + } else + done ++; + + } else { + json_log(value, flags, 0, "Unexpected object field '%s'.", json_variant_string(key)); + + if (flags & JSON_PERMISSIVE) + continue; + + return -EADDRNOTAVAIL; + } + } + } + + for (p = table; p->name; p++) { + JsonDispatchFlags merged_flags = p->flags | flags; + + if ((merged_flags & JSON_MANDATORY) && !found[p-table]) { + json_log(v, merged_flags, 0, "Missing object field '%s'.", p->name); + + if ((merged_flags & JSON_PERMISSIVE)) + continue; + + return -ENXIO; + } + } + + return done; +} + +int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + bool *b = userdata; + + assert(variant); + assert(b); + + if (!json_variant_is_boolean(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a boolean.", strna(name)); + + *b = json_variant_boolean(variant); + return 0; +} + +int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + int *b = userdata; + + assert(variant); + assert(b); + + if (json_variant_is_null(variant)) { + *b = -1; + return 0; + } + + if (!json_variant_is_boolean(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a boolean.", strna(name)); + + *b = json_variant_boolean(variant); + return 0; +} + +int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + intmax_t *i = userdata; + + assert(variant); + assert(i); + + if (!json_variant_is_integer(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an integer.", strna(name)); + + *i = json_variant_integer(variant); + return 0; +} + +int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + uintmax_t *u = userdata; + + assert(variant); + assert(u); + + if (!json_variant_is_unsigned(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an unsigned integer.", strna(name)); + + *u = json_variant_unsigned(variant); + return 0; +} + +int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + uint32_t *u = userdata; + + assert(variant); + assert(u); + + if (!json_variant_is_unsigned(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an unsigned integer.", strna(name)); + + if (json_variant_unsigned(variant) > UINT32_MAX) + return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' out of bounds.", strna(name)); + + *u = (uint32_t) json_variant_unsigned(variant); + return 0; +} + +int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + int32_t *i = userdata; + + assert(variant); + assert(i); + + if (!json_variant_is_integer(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an integer.", strna(name)); + + if (json_variant_integer(variant) < INT32_MIN || json_variant_integer(variant) > INT32_MAX) + return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' out of bounds.", strna(name)); + + *i = (int32_t) json_variant_integer(variant); + return 0; +} + +int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + char **s = userdata; + int r; + + assert(variant); + assert(s); + + if (json_variant_is_null(variant)) { + *s = mfree(*s); + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(variant))) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name)); + + r = free_and_strdup(s, json_variant_string(variant)); + if (r < 0) + return json_log(variant, flags, r, "Failed to allocate string: %m"); + + return 0; +} + +int json_dispatch_const_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + const char **s = userdata; + + assert(variant); + assert(s); + + if (json_variant_is_null(variant)) { + *s = NULL; + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(variant))) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name)); + + *s = json_variant_string(variant); + return 0; +} + +int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + _cleanup_strv_free_ char **l = NULL; + char ***s = userdata; + JsonVariant *e; + int r; + + assert(variant); + assert(s); + + if (json_variant_is_null(variant)) { + *s = strv_free(*s); + return 0; + } + + /* Let's be flexible here: accept a single string in place of a single-item array */ + if (json_variant_is_string(variant)) { + if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(variant))) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name)); + + l = strv_new(json_variant_string(variant)); + if (!l) + return log_oom(); + + strv_free_and_replace(*s, l); + return 0; + } + + if (!json_variant_is_array(variant)) + return json_log(variant, SYNTHETIC_ERRNO(EINVAL), flags, "JSON field '%s' is not an array.", strna(name)); + + JSON_VARIANT_ARRAY_FOREACH(e, variant) { + if (!json_variant_is_string(e)) + return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string."); + + if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(e))) + return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name)); + + r = strv_extend(&l, json_variant_string(e)); + if (r < 0) + return json_log(e, flags, r, "Failed to append array element: %m"); + } + + strv_free_and_replace(*s, l); + return 0; +} + +int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + JsonVariant **p = userdata; + + assert(variant); + assert(p); + + json_variant_unref(*p); + *p = json_variant_ref(variant); + + return 0; +} + +int json_dispatch_uid_gid(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + uid_t *uid = userdata; + uintmax_t k; + + assert_cc(sizeof(uid_t) == sizeof(uint32_t)); + assert_cc(sizeof(gid_t) == sizeof(uint32_t)); + + DISABLE_WARNING_TYPE_LIMITS; + assert_cc(((uid_t) -1 < (uid_t) 0) == ((gid_t) -1 < (gid_t) 0)); + REENABLE_WARNING; + + if (json_variant_is_null(variant)) { + *uid = UID_INVALID; + return 0; + } + + if (!json_variant_is_unsigned(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a integer.", strna(name)); + + k = json_variant_unsigned(variant); + if (k > UINT32_MAX || !uid_is_valid(k)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid UID/GID.", strna(name)); + + *uid = k; + return 0; +} + +int json_dispatch_user_group_name(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + char **s = userdata; + const char *n; + int r; + + if (json_variant_is_null(variant)) { + *s = mfree(*s); + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + n = json_variant_string(variant); + if (!valid_user_group_name(n, FLAGS_SET(flags, JSON_RELAX) ? VALID_USER_RELAX : 0)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid user/group name.", strna(name)); + + r = free_and_strdup(s, n); + if (r < 0) + return json_log(variant, flags, r, "Failed to allocate string: %m"); + + return 0; +} + +int json_dispatch_id128(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + sd_id128_t *uuid = userdata; + int r; + + if (json_variant_is_null(variant)) { + *uuid = SD_ID128_NULL; + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + r = sd_id128_from_string(json_variant_string(variant), uuid); + if (r < 0) + return json_log(variant, flags, r, "JSON field '%s' is not a valid UID.", strna(name)); + + return 0; +} + +int json_dispatch_unsupported(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not allowed in this object.", strna(name)); +} + +static int json_cmp_strings(const void *x, const void *y) { + JsonVariant *const *a = x, *const *b = y; + + if (!json_variant_is_string(*a) || !json_variant_is_string(*b)) + return CMP(*a, *b); + + return strcmp(json_variant_string(*a), json_variant_string(*b)); +} + +int json_variant_sort(JsonVariant **v) { + _cleanup_free_ JsonVariant **a = NULL; + JsonVariant *n = NULL; + size_t i, m; + int r; + + assert(v); + + if (json_variant_is_sorted(*v)) + return 0; + + if (!json_variant_is_object(*v)) + return -EMEDIUMTYPE; + + /* Sorts they key/value pairs in an object variant */ + + m = json_variant_elements(*v); + a = new(JsonVariant*, m); + if (!a) + return -ENOMEM; + + for (i = 0; i < m; i++) + a[i] = json_variant_by_index(*v, i); + + qsort(a, m/2, sizeof(JsonVariant*)*2, json_cmp_strings); + + r = json_variant_new_object(&n, a, m); + if (r < 0) + return r; + + json_variant_propagate_sensitive(*v, n); + + if (!n->sorted) /* Check if this worked. This will fail if there are multiple identical keys used. */ + return -ENOTUNIQ; + + json_variant_unref(*v); + *v = n; + + return 1; +} + +int json_variant_normalize(JsonVariant **v) { + _cleanup_free_ JsonVariant **a = NULL; + JsonVariant *n = NULL; + size_t i, j, m; + int r; + + assert(v); + + if (json_variant_is_normalized(*v)) + return 0; + + if (!json_variant_is_object(*v) && !json_variant_is_array(*v)) + return -EMEDIUMTYPE; + + /* Sorts the key/value pairs in an object variant anywhere down the tree in the specified variant */ + + m = json_variant_elements(*v); + a = new(JsonVariant*, m); + if (!a) + return -ENOMEM; + + for (i = 0; i < m; i++) { + a[i] = json_variant_ref(json_variant_by_index(*v, i)); + + r = json_variant_normalize(a + i); + if (r < 0) + goto finish; + } + + qsort(a, m/2, sizeof(JsonVariant*)*2, json_cmp_strings); + + if (json_variant_is_object(*v)) + r = json_variant_new_object(&n, a, m); + else { + assert(json_variant_is_array(*v)); + r = json_variant_new_array(&n, a, m); + } + if (r < 0) + goto finish; + + json_variant_propagate_sensitive(*v, n); + + if (!n->normalized) { /* Let's see if normalization worked. It will fail if there are multiple + * identical keys used in the same object anywhere, or if there are floating + * point numbers used (see below) */ + r = -ENOTUNIQ; + goto finish; + } + + json_variant_unref(*v); + *v = n; + + r = 1; + +finish: + for (j = 0; j < i; j++) + json_variant_unref(a[j]); + + return r; +} + +bool json_variant_is_normalized(JsonVariant *v) { + + /* For now, let's consider anything containing numbers not expressible as integers as + * non-normalized. That's because we cannot sensibly compare them due to accuracy issues, nor even + * store them if they are too large. */ + if (json_variant_is_real(v) && !json_variant_is_integer(v) && !json_variant_is_unsigned(v)) + return false; + + /* The concept only applies to variants that include other variants, i.e. objects and arrays. All + * others are normalized anyway. */ + if (!json_variant_is_object(v) && !json_variant_is_array(v)) + return true; + + /* Empty objects/arrays don't include any other variant, hence are always normalized too */ + if (json_variant_elements(v) == 0) + return true; + + return v->normalized; /* For everything else there's an explicit boolean we maintain */ +} + +bool json_variant_is_sorted(JsonVariant *v) { + + /* Returns true if all key/value pairs of an object are properly sorted. Note that this only applies + * to objects, not arrays. */ + + if (!json_variant_is_object(v)) + return true; + if (json_variant_elements(v) <= 1) + return true; + + return v->sorted; +} + +int json_variant_unbase64(JsonVariant *v, void **ret, size_t *ret_size) { + + if (!json_variant_is_string(v)) + return -EINVAL; + + return unbase64mem(json_variant_string(v), (size_t) -1, ret, ret_size); +} + +static const char* const json_variant_type_table[_JSON_VARIANT_TYPE_MAX] = { + [JSON_VARIANT_STRING] = "string", + [JSON_VARIANT_INTEGER] = "integer", + [JSON_VARIANT_UNSIGNED] = "unsigned", + [JSON_VARIANT_REAL] = "real", + [JSON_VARIANT_NUMBER] = "number", + [JSON_VARIANT_BOOLEAN] = "boolean", + [JSON_VARIANT_ARRAY] = "array", + [JSON_VARIANT_OBJECT] = "object", + [JSON_VARIANT_NULL] = "null", +}; + +DEFINE_STRING_TABLE_LOOKUP(json_variant_type, JsonVariantType); diff --git a/src/shared/json.h b/src/shared/json.h new file mode 100644 index 0000000..0809f31 --- /dev/null +++ b/src/shared/json.h @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <fcntl.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +#include "sd-id128.h" + +#include "macro.h" +#include "string-util.h" +#include "log.h" + +/* + In case you wonder why we have our own JSON implementation, here are a couple of reasons why this implementation has + benefits over various other implementations: + + - We need support for 64bit signed and unsigned integers, i.e. the full 64,5bit range of -9223372036854775808…18446744073709551615 + - All our variants are immutable after creation + - Special values such as true, false, zero, null, empty strings, empty array, empty objects require zero dynamic memory + - Progressive parsing + - Our integer/real type implicitly converts, but only if that's safe and loss-lessly possible + - There's a "builder" for putting together objects easily in varargs function calls + - There's a "dispatcher" for mapping objects to C data structures + - Every variant optionally carries parsing location information, which simplifies debugging and parse log error generation + - Formatter has color, line, column support + + Limitations: + - Doesn't allow embedded NUL in strings + - Can't store integers outside of the -9223372036854775808…18446744073709551615 range (it will use 'long double' for + values outside this range, which is lossy) + - Can't store negative zero (will be treated identical to positive zero, and not retained across serialization) + - Can't store non-integer numbers that can't be stored in "long double" losslessly + - Allows creation and parsing of objects with duplicate keys. The "dispatcher" will refuse them however. This means + we can parse and pass around such objects, but will carefully refuse them when we convert them into our own data. + + (These limitations should be pretty much in line with those of other JSON implementations, in fact might be less + limiting in most cases even.) +*/ + +typedef struct JsonVariant JsonVariant; + +typedef enum JsonVariantType { + JSON_VARIANT_STRING, + JSON_VARIANT_INTEGER, + JSON_VARIANT_UNSIGNED, + JSON_VARIANT_REAL, + JSON_VARIANT_NUMBER, /* This a pseudo-type: we can never create variants of this type, but we use it as wildcard check for the above three types */ + JSON_VARIANT_BOOLEAN, + JSON_VARIANT_ARRAY, + JSON_VARIANT_OBJECT, + JSON_VARIANT_NULL, + _JSON_VARIANT_TYPE_MAX, + _JSON_VARIANT_TYPE_INVALID = -1 +} JsonVariantType; + +int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n); +int json_variant_new_base64(JsonVariant **ret, const void *p, size_t n); +int json_variant_new_integer(JsonVariant **ret, intmax_t i); +int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u); +int json_variant_new_real(JsonVariant **ret, long double d); +int json_variant_new_boolean(JsonVariant **ret, bool b); +int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n); +int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n); +int json_variant_new_array_strv(JsonVariant **ret, char **l); +int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n); +int json_variant_new_null(JsonVariant **ret); +int json_variant_new_id128(JsonVariant **ret, sd_id128_t id); + +static inline int json_variant_new_string(JsonVariant **ret, const char *s) { + return json_variant_new_stringn(ret, s, (size_t) -1); +} + +JsonVariant *json_variant_ref(JsonVariant *v); +JsonVariant *json_variant_unref(JsonVariant *v); +void json_variant_unref_many(JsonVariant **array, size_t n); + +DEFINE_TRIVIAL_CLEANUP_FUNC(JsonVariant *, json_variant_unref); + +const char *json_variant_string(JsonVariant *v); +intmax_t json_variant_integer(JsonVariant *v); +uintmax_t json_variant_unsigned(JsonVariant *v); +long double json_variant_real(JsonVariant *v); +bool json_variant_boolean(JsonVariant *v); + +JsonVariantType json_variant_type(JsonVariant *v); +bool json_variant_has_type(JsonVariant *v, JsonVariantType type); + +static inline bool json_variant_is_string(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_STRING); +} + +static inline bool json_variant_is_integer(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_INTEGER); +} + +static inline bool json_variant_is_unsigned(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_UNSIGNED); +} + +static inline bool json_variant_is_real(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_REAL); +} + +static inline bool json_variant_is_number(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_NUMBER); +} + +static inline bool json_variant_is_boolean(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_BOOLEAN); +} + +static inline bool json_variant_is_array(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_ARRAY); +} + +static inline bool json_variant_is_object(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_OBJECT); +} + +static inline bool json_variant_is_null(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_NULL); +} + +bool json_variant_is_negative(JsonVariant *v); +bool json_variant_is_blank_object(JsonVariant *v); +bool json_variant_is_blank_array(JsonVariant *v); +bool json_variant_is_normalized(JsonVariant *v); +bool json_variant_is_sorted(JsonVariant *v); + +size_t json_variant_elements(JsonVariant *v); +JsonVariant *json_variant_by_index(JsonVariant *v, size_t index); +JsonVariant *json_variant_by_key(JsonVariant *v, const char *key); +JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key); + +bool json_variant_equal(JsonVariant *a, JsonVariant *b); + +void json_variant_sensitive(JsonVariant *v); +bool json_variant_is_sensitive(JsonVariant *v); + +struct json_variant_foreach_state { + JsonVariant *variant; + size_t idx; +}; + +#define JSON_VARIANT_ARRAY_FOREACH(i, v) \ + for (struct json_variant_foreach_state _state = { (v), 0 }; \ + json_variant_is_array(_state.variant) && \ + _state.idx < json_variant_elements(_state.variant) && \ + ({ i = json_variant_by_index(_state.variant, _state.idx); \ + true; }); \ + _state.idx++) + +#define JSON_VARIANT_OBJECT_FOREACH(k, e, v) \ + for (struct json_variant_foreach_state _state = { (v), 0 }; \ + json_variant_is_object(_state.variant) && \ + _state.idx < json_variant_elements(_state.variant) && \ + ({ k = json_variant_string(json_variant_by_index(_state.variant, _state.idx)); \ + e = json_variant_by_index(_state.variant, _state.idx + 1); \ + true; }); \ + _state.idx += 2) + +int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column); + +typedef enum JsonFormatFlags { + JSON_FORMAT_NEWLINE = 1 << 0, /* suffix with newline */ + JSON_FORMAT_PRETTY = 1 << 1, /* add internal whitespace to appeal to human readers */ + JSON_FORMAT_PRETTY_AUTO = 1 << 2, /* same, but only if connected to a tty (and JSON_FORMAT_NEWLINE otherwise) */ + JSON_FORMAT_COLOR = 1 << 3, /* insert ANSI color sequences */ + JSON_FORMAT_COLOR_AUTO = 1 << 4, /* insert ANSI color sequences if colors_enabled() says so */ + JSON_FORMAT_SOURCE = 1 << 5, /* prefix with source filename/line/column */ + JSON_FORMAT_SSE = 1 << 6, /* prefix/suffix with W3C server-sent events */ + JSON_FORMAT_SEQ = 1 << 7, /* prefix/suffix with RFC 7464 application/json-seq */ + JSON_FORMAT_FLUSH = 1 << 8, /* call fflush() after dumping JSON */ +} JsonFormatFlags; + +int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret); +void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix); + +int json_variant_filter(JsonVariant **v, char **to_remove); + +int json_variant_set_field(JsonVariant **v, const char *field, JsonVariant *value); +int json_variant_set_field_string(JsonVariant **v, const char *field, const char *value); +int json_variant_set_field_integer(JsonVariant **v, const char *field, intmax_t value); +int json_variant_set_field_unsigned(JsonVariant **v, const char *field, uintmax_t value); +int json_variant_set_field_boolean(JsonVariant **v, const char *field, bool b); +int json_variant_set_field_strv(JsonVariant **v, const char *field, char **l); + +int json_variant_append_array(JsonVariant **v, JsonVariant *element); + +int json_variant_merge(JsonVariant **v, JsonVariant *m); + +int json_variant_strv(JsonVariant *v, char ***ret); + +int json_variant_sort(JsonVariant **v); +int json_variant_normalize(JsonVariant **v); + +typedef enum JsonParseFlags { + JSON_PARSE_SENSITIVE = 1 << 0, /* mark variant as "sensitive", i.e. something containing secret key material or such */ +} JsonParseFlags; + +int json_parse(const char *string, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column); +int json_parse_continue(const char **p, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column); +int json_parse_file_at(FILE *f, int dir_fd, const char *path, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column); + +static inline int json_parse_file(FILE *f, const char *path, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) { + return json_parse_file_at(f, AT_FDCWD, path, flags, ret, ret_line, ret_column); +} + +enum { + _JSON_BUILD_STRING, + _JSON_BUILD_INTEGER, + _JSON_BUILD_UNSIGNED, + _JSON_BUILD_REAL, + _JSON_BUILD_BOOLEAN, + _JSON_BUILD_ARRAY_BEGIN, + _JSON_BUILD_ARRAY_END, + _JSON_BUILD_OBJECT_BEGIN, + _JSON_BUILD_OBJECT_END, + _JSON_BUILD_PAIR, + _JSON_BUILD_PAIR_CONDITION, + _JSON_BUILD_NULL, + _JSON_BUILD_VARIANT, + _JSON_BUILD_VARIANT_ARRAY, + _JSON_BUILD_LITERAL, + _JSON_BUILD_STRV, + _JSON_BUILD_BASE64, + _JSON_BUILD_ID128, + _JSON_BUILD_BYTE_ARRAY, + _JSON_BUILD_MAX, +}; + +#define JSON_BUILD_STRING(s) _JSON_BUILD_STRING, ({ const char *_x = s; _x; }) +#define JSON_BUILD_INTEGER(i) _JSON_BUILD_INTEGER, ({ intmax_t _x = i; _x; }) +#define JSON_BUILD_UNSIGNED(u) _JSON_BUILD_UNSIGNED, ({ uintmax_t _x = u; _x; }) +#define JSON_BUILD_REAL(d) _JSON_BUILD_REAL, ({ long double _x = d; _x; }) +#define JSON_BUILD_BOOLEAN(b) _JSON_BUILD_BOOLEAN, ({ bool _x = b; _x; }) +#define JSON_BUILD_ARRAY(...) _JSON_BUILD_ARRAY_BEGIN, __VA_ARGS__, _JSON_BUILD_ARRAY_END +#define JSON_BUILD_EMPTY_ARRAY _JSON_BUILD_ARRAY_BEGIN, _JSON_BUILD_ARRAY_END +#define JSON_BUILD_OBJECT(...) _JSON_BUILD_OBJECT_BEGIN, __VA_ARGS__, _JSON_BUILD_OBJECT_END +#define JSON_BUILD_EMPTY_OBJECT _JSON_BUILD_OBJECT_BEGIN, _JSON_BUILD_OBJECT_END +#define JSON_BUILD_PAIR(n, ...) _JSON_BUILD_PAIR, ({ const char *_x = n; _x; }), __VA_ARGS__ +#define JSON_BUILD_PAIR_CONDITION(c, n, ...) _JSON_BUILD_PAIR_CONDITION, ({ bool _x = c; _x; }), ({ const char *_x = n; _x; }), __VA_ARGS__ +#define JSON_BUILD_NULL _JSON_BUILD_NULL +#define JSON_BUILD_VARIANT(v) _JSON_BUILD_VARIANT, ({ JsonVariant *_x = v; _x; }) +#define JSON_BUILD_VARIANT_ARRAY(v, n) _JSON_BUILD_VARIANT_ARRAY, ({ JsonVariant **_x = v; _x; }), ({ size_t _y = n; _y; }) +#define JSON_BUILD_LITERAL(l) _JSON_BUILD_LITERAL, ({ const char *_x = l; _x; }) +#define JSON_BUILD_STRV(l) _JSON_BUILD_STRV, ({ char **_x = l; _x; }) +#define JSON_BUILD_BASE64(p, n) _JSON_BUILD_BASE64, ({ const void *_x = p; _x; }), ({ size_t _y = n; _y; }) +#define JSON_BUILD_ID128(id) _JSON_BUILD_ID128, ({ sd_id128_t _x = id; _x; }) +#define JSON_BUILD_BYTE_ARRAY(v, n) _JSON_BUILD_BYTE_ARRAY, ({ const void *_x = v; _x; }), ({ size_t _y = n; _y; }) + +int json_build(JsonVariant **ret, ...); +int json_buildv(JsonVariant **ret, va_list ap); + +/* A bitmask of flags used by the dispatch logic. Note that this is a combined bit mask, that is generated from the bit + * mask originally passed into json_dispatch(), the individual bitmask associated with the static JsonDispatch callout + * entry, as well the bitmask specified for json_log() calls */ +typedef enum JsonDispatchFlags { + /* The following three may be set in JsonDispatch's .flags field or the json_dispatch() flags parameter */ + JSON_PERMISSIVE = 1 << 0, /* Shall parsing errors be considered fatal for this property? */ + JSON_MANDATORY = 1 << 1, /* Should existence of this property be mandatory? */ + JSON_LOG = 1 << 2, /* Should the parser log about errors? */ + JSON_SAFE = 1 << 3, /* Don't accept "unsafe" strings in json_dispatch_string() + json_dispatch_string() */ + JSON_RELAX = 1 << 4, /* Use relaxed user name checking in json_dispatch_user_group_name */ + + /* The following two may be passed into log_json() in addition to the three above */ + JSON_DEBUG = 1 << 4, /* Indicates that this log message is a debug message */ + JSON_WARNING = 1 << 5, /* Indicates that this log message is a warning message */ +} JsonDispatchFlags; + +typedef int (*JsonDispatchCallback)(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); + +typedef struct JsonDispatch { + const char *name; + JsonVariantType type; + JsonDispatchCallback callback; + size_t offset; + JsonDispatchFlags flags; +} JsonDispatch; + +int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata); + +int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_const_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_uid_gid(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_user_group_name(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_id128(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_unsupported(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); + +assert_cc(sizeof(uintmax_t) == sizeof(uint64_t)); +#define json_dispatch_uint64 json_dispatch_unsigned + +assert_cc(sizeof(intmax_t) == sizeof(int64_t)); +#define json_dispatch_int64 json_dispatch_integer + +assert_cc(sizeof(uint32_t) == sizeof(unsigned)); +#define json_dispatch_uint json_dispatch_uint32 + +assert_cc(sizeof(int32_t) == sizeof(int)); +#define json_dispatch_int json_dispatch_int32 + +static inline int json_dispatch_level(JsonDispatchFlags flags) { + + /* Did the user request no logging? If so, then never log higher than LOG_DEBUG. Also, if this is marked as + * debug message, then also log at debug level. */ + + if (!(flags & JSON_LOG) || + (flags & JSON_DEBUG)) + return LOG_DEBUG; + + /* Are we invoked in permissive mode, or is this explicitly marked as warning message? Then this should be + * printed at LOG_WARNING */ + if (flags & (JSON_PERMISSIVE|JSON_WARNING)) + return LOG_WARNING; + + /* Otherwise it's an error. */ + return LOG_ERR; +} + +int json_log_internal(JsonVariant *variant, int level, int error, const char *file, int line, const char *func, const char *format, ...) _printf_(7, 8); + +#define json_log(variant, flags, error, ...) \ + ({ \ + int _level = json_dispatch_level(flags), _e = (error); \ + (log_get_max_level() >= LOG_PRI(_level)) \ + ? json_log_internal(variant, _level, _e, PROJECT_FILE, __LINE__, __func__, __VA_ARGS__) \ + : -ERRNO_VALUE(_e); \ + }) + +#define json_log_oom(variant, flags) \ + json_log(variant, flags, SYNTHETIC_ERRNO(ENOMEM), "Out of memory.") + +#define JSON_VARIANT_STRING_CONST(x) _JSON_VARIANT_STRING_CONST(UNIQ, (x)) + +#define _JSON_VARIANT_STRING_CONST(xq, x) \ + ({ \ + _align_(2) static const char UNIQ_T(json_string_const, xq)[] = (x); \ + assert((((uintptr_t) UNIQ_T(json_string_const, xq)) & 1) == 0); \ + (JsonVariant*) ((uintptr_t) UNIQ_T(json_string_const, xq) + 1); \ + }) + +int json_variant_unbase64(JsonVariant *v, void **ret, size_t *ret_size); + +const char *json_variant_type_to_string(JsonVariantType t); +JsonVariantType json_variant_type_from_string(const char *s); diff --git a/src/shared/libcrypt-util.c b/src/shared/libcrypt-util.c new file mode 100644 index 0000000..5b31541 --- /dev/null +++ b/src/shared/libcrypt-util.c @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#if HAVE_CRYPT_H +/* libxcrypt is a replacement for glibc's libcrypt, and libcrypt might be + * removed from glibc at some point. As part of the removal, defines for + * crypt(3) are dropped from unistd.h, and we must include crypt.h instead. + * + * Newer versions of glibc (v2.0+) already ship crypt.h with a definition + * of crypt(3) as well, so we simply include it if it is present. MariaDB, + * MySQL, PostgreSQL, Perl and some other wide-spread packages do it the + * same way since ages without any problems. + */ +# include <crypt.h> +#else +# include <unistd.h> +#endif + +#include <errno.h> +#include <stdlib.h> + +#include "alloc-util.h" +#include "errno-util.h" +#include "libcrypt-util.h" +#include "log.h" +#include "macro.h" +#include "memory-util.h" +#include "missing_stdlib.h" +#include "random-util.h" +#include "string-util.h" +#include "strv.h" + +int make_salt(char **ret) { + +#if HAVE_CRYPT_GENSALT_RA + const char *e; + char *salt; + + /* If we have crypt_gensalt_ra() we default to the "preferred method" (i.e. usually yescrypt). + * crypt_gensalt_ra() is usually provided by libxcrypt. */ + + e = secure_getenv("SYSTEMD_CRYPT_PREFIX"); + if (!e) +#if HAVE_CRYPT_PREFERRED_METHOD + e = crypt_preferred_method(); +#else + e = "$6$"; +#endif + + log_debug("Generating salt for hash prefix: %s", e); + + salt = crypt_gensalt_ra(e, 0, NULL, 0); + if (!salt) + return -errno; + + *ret = salt; + return 0; +#else + /* If crypt_gensalt_ra() is not available, we use SHA512 and generate the salt on our own. */ + + static const char table[] = + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789" + "./"; + + uint8_t raw[16]; + char *salt, *j; + size_t i; + int r; + + /* This is a bit like crypt_gensalt_ra(), but doesn't require libcrypt, and doesn't do anything but + * SHA512, i.e. is legacy-free and minimizes our deps. */ + + assert_cc(sizeof(table) == 64U + 1U); + + log_debug("Generating fallback salt for hash prefix: $6$"); + + /* Insist on the best randomness by setting RANDOM_BLOCK, this is about keeping passwords secret after all. */ + r = genuine_random_bytes(raw, sizeof(raw), RANDOM_BLOCK); + if (r < 0) + return r; + + salt = new(char, 3+sizeof(raw)+1+1); + if (!salt) + return -ENOMEM; + + /* We only bother with SHA512 hashed passwords, the rest is legacy, and we don't do legacy. */ + j = stpcpy(salt, "$6$"); + for (i = 0; i < sizeof(raw); i++) + j[i] = table[raw[i] & 63]; + j[i++] = '$'; + j[i] = 0; + + *ret = salt; + return 0; +#endif +} + +#if HAVE_CRYPT_RA +# define CRYPT_RA_NAME "crypt_ra" +#else +# define CRYPT_RA_NAME "crypt_r" + +/* Provide a poor man's fallback that uses a fixed size buffer. */ + +static char* systemd_crypt_ra(const char *phrase, const char *setting, void **data, int *size) { + assert(data); + assert(size); + + /* We allocate the buffer because crypt(3) says: struct crypt_data may be quite large (32kB in this + * implementation of libcrypt; over 128kB in some other implementations). This is large enough that + * it may be unwise to allocate it on the stack. */ + + if (!*data) { + *data = new0(struct crypt_data, 1); + if (!*data) { + errno = -ENOMEM; + return NULL; + } + + *size = (int) (sizeof(struct crypt_data)); + } + + char *t = crypt_r(phrase, setting, *data); + if (!t) + return NULL; + + /* crypt_r may return a pointer to an invalid hashed password on error. Our callers expect NULL on + * error, so let's just return that. */ + if (t[0] == '*') + return NULL; + + return t; +} + +#define crypt_ra systemd_crypt_ra + +#endif + +int hash_password_full(const char *password, void **cd_data, int *cd_size, char **ret) { + _cleanup_free_ char *salt = NULL; + _cleanup_(erase_and_freep) void *_cd_data = NULL; + char *p; + int r, _cd_size = 0; + + assert(!!cd_data == !!cd_size); + + r = make_salt(&salt); + if (r < 0) + return log_debug_errno(r, "Failed to generate salt: %m"); + + errno = 0; + p = crypt_ra(password, salt, cd_data ?: &_cd_data, cd_size ?: &_cd_size); + if (!p) + return log_debug_errno(errno_or_else(SYNTHETIC_ERRNO(EINVAL)), + CRYPT_RA_NAME "() failed: %m"); + + p = strdup(p); + if (!p) + return -ENOMEM; + + *ret = p; + return 0; +} + +bool looks_like_hashed_password(const char *s) { + /* Returns false if the specified string is certainly not a hashed UNIX password. crypt(5) lists + * various hashing methods. We only reject (return false) strings which are documented to have + * different meanings. + * + * In particular, we allow locked passwords, i.e. strings starting with "!", including just "!", + * i.e. the locked empty password. See also fc58c0c7bf7e4f525b916e3e5be0de2307fef04e. + */ + if (!s) + return false; + + s += strspn(s, "!"); /* Skip (possibly duplicated) locking prefix */ + + return !STR_IN_SET(s, "x", "*"); +} + +int test_password_one(const char *hashed_password, const char *password) { + _cleanup_(erase_and_freep) void *cd_data = NULL; + int cd_size = 0; + const char *k; + + errno = 0; + k = crypt_ra(password, hashed_password, &cd_data, &cd_size); + if (!k) { + if (errno == ENOMEM) + return -ENOMEM; + /* Unknown or unavailable hashing method or string too short */ + return 0; + } + + return streq(k, hashed_password); +} + +int test_password_many(char **hashed_password, const char *password) { + char **hpw; + int r; + + STRV_FOREACH(hpw, hashed_password) { + r = test_password_one(*hpw, password); + if (r < 0) + return r; + if (r > 0) + return true; + } + + return false; +} diff --git a/src/shared/libcrypt-util.h b/src/shared/libcrypt-util.h new file mode 100644 index 0000000..5b9b945 --- /dev/null +++ b/src/shared/libcrypt-util.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +int make_salt(char **ret); +int hash_password_full(const char *password, void **cd_data, int *cd_size, char **ret); +static inline int hash_password(const char *password, char **ret) { + return hash_password_full(password, NULL, NULL, ret); +} +bool looks_like_hashed_password(const char *s); +int test_password_one(const char *hashed_password, const char *password); +int test_password_many(char **hashed_password, const char *password); diff --git a/src/shared/libmount-util.h b/src/shared/libmount-util.h new file mode 100644 index 0000000..db9728c --- /dev/null +++ b/src/shared/libmount-util.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdio.h> + +/* This needs to be after sys/mount.h */ +#include <libmount.h> + +#include "macro.h" + +DEFINE_TRIVIAL_CLEANUP_FUNC(struct libmnt_table*, mnt_free_table); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct libmnt_iter*, mnt_free_iter); + +static inline int libmount_parse( + const char *path, + FILE *source, + struct libmnt_table **ret_table, + struct libmnt_iter **ret_iter) { + + _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL; + _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL; + int r; + + /* Older libmount seems to require this. */ + assert(!source || path); + + table = mnt_new_table(); + iter = mnt_new_iter(MNT_ITER_FORWARD); + if (!table || !iter) + return -ENOMEM; + + /* If source or path are specified, we use on the functions which ignore utab. + * Only if both are empty, we use mnt_table_parse_mtab(). */ + + if (source) + r = mnt_table_parse_stream(table, source, path); + else if (path) + r = mnt_table_parse_file(table, path); + else + r = mnt_table_parse_mtab(table, NULL); + if (r < 0) + return r; + + *ret_table = TAKE_PTR(table); + *ret_iter = TAKE_PTR(iter); + return 0; +} diff --git a/src/shared/libshared.sym b/src/shared/libshared.sym new file mode 100644 index 0000000..6a7495a --- /dev/null +++ b/src/shared/libshared.sym @@ -0,0 +1,3 @@ +SD_SHARED { + global: *; +}; diff --git a/src/shared/linux/README b/src/shared/linux/README new file mode 100644 index 0000000..46d5547 --- /dev/null +++ b/src/shared/linux/README @@ -0,0 +1,8 @@ +The files in this directory are copied from kernel-5.2, and the following modifications are applied: +- auto_dev-ioctl.h: set AUTOFS_DEV_IOCTL_VERSION_MINOR to 0 +- auto_dev-ioctl.h: define AUTOFS_IOCTL if not defined +- bpf_insn.h: This is imported from samples/bpf/bpf_insn.h +- bpf_insn.h: BPF_JMP_A() macro is also imported from include/linux/filter.h +- dm-ioctl.h: set DM_VERSION_MINOR to 27 +- ethtool.h: define __KERNEL_DIV_ROUND_UP if not defined +- ethtool.h: add casts in ethtool_cmd_speed() diff --git a/src/shared/linux/auto_dev-ioctl.h b/src/shared/linux/auto_dev-ioctl.h new file mode 100644 index 0000000..261546c --- /dev/null +++ b/src/shared/linux/auto_dev-ioctl.h @@ -0,0 +1,220 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright 2008 Red Hat, Inc. All rights reserved. + * Copyright 2008 Ian Kent <raven@themaw.net> + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + */ + +#ifndef _UAPI_LINUX_AUTO_DEV_IOCTL_H +#define _UAPI_LINUX_AUTO_DEV_IOCTL_H + +#include <linux/auto_fs.h> +#include <linux/string.h> + +#define AUTOFS_DEVICE_NAME "autofs" + +#define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1 +#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0 + +#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) + +/* + * An ioctl interface for autofs mount point control. + */ + +struct args_protover { + __u32 version; +}; + +struct args_protosubver { + __u32 sub_version; +}; + +struct args_openmount { + __u32 devid; +}; + +struct args_ready { + __u32 token; +}; + +struct args_fail { + __u32 token; + __s32 status; +}; + +struct args_setpipefd { + __s32 pipefd; +}; + +struct args_timeout { + __u64 timeout; +}; + +struct args_requester { + __u32 uid; + __u32 gid; +}; + +struct args_expire { + __u32 how; +}; + +struct args_askumount { + __u32 may_umount; +}; + +struct args_ismountpoint { + union { + struct args_in { + __u32 type; + } in; + struct args_out { + __u32 devid; + __u32 magic; + } out; + }; +}; + +/* + * All the ioctls use this structure. + * When sending a path size must account for the total length + * of the chunk of memory otherwise is is the size of the + * structure. + */ + +struct autofs_dev_ioctl { + __u32 ver_major; + __u32 ver_minor; + __u32 size; /* total size of data passed in + * including this struct */ + __s32 ioctlfd; /* automount command fd */ + + /* Command parameters */ + + union { + struct args_protover protover; + struct args_protosubver protosubver; + struct args_openmount openmount; + struct args_ready ready; + struct args_fail fail; + struct args_setpipefd setpipefd; + struct args_timeout timeout; + struct args_requester requester; + struct args_expire expire; + struct args_askumount askumount; + struct args_ismountpoint ismountpoint; + }; + + char path[0]; +}; + +static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) +{ + memset(in, 0, AUTOFS_DEV_IOCTL_SIZE); + in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; + in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; + in->size = AUTOFS_DEV_IOCTL_SIZE; + in->ioctlfd = -1; +} + +enum { + /* Get various version info */ + AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71, + AUTOFS_DEV_IOCTL_PROTOVER_CMD, + AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, + + /* Open mount ioctl fd */ + AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, + + /* Close mount ioctl fd */ + AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, + + /* Mount/expire status returns */ + AUTOFS_DEV_IOCTL_READY_CMD, + AUTOFS_DEV_IOCTL_FAIL_CMD, + + /* Activate/deactivate autofs mount */ + AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, + AUTOFS_DEV_IOCTL_CATATONIC_CMD, + + /* Expiry timeout */ + AUTOFS_DEV_IOCTL_TIMEOUT_CMD, + + /* Get mount last requesting uid and gid */ + AUTOFS_DEV_IOCTL_REQUESTER_CMD, + + /* Check for eligible expire candidates */ + AUTOFS_DEV_IOCTL_EXPIRE_CMD, + + /* Request busy status */ + AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, + + /* Check if path is a mountpoint */ + AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, +}; + +#ifndef AUTOFS_IOCTL +#define AUTOFS_IOCTL 0x93 +#endif + +#define AUTOFS_DEV_IOCTL_VERSION \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_PROTOVER \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_PROTOSUBVER \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_OPENMOUNT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_READY \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_FAIL \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_SETPIPEFD \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_CATATONIC \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_TIMEOUT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_REQUESTER \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_EXPIRE \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_ASKUMOUNT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl) + +#endif /* _UAPI_LINUX_AUTO_DEV_IOCTL_H */ diff --git a/src/shared/linux/bpf.h b/src/shared/linux/bpf.h new file mode 100644 index 0000000..359fc37 --- /dev/null +++ b/src/shared/linux/bpf.h @@ -0,0 +1,3057 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_H__ +#define _UAPI__LINUX_BPF_H__ + +#include <linux/types.h> +#include <linux/bpf_common.h> + +/* Extended instruction set based on top of classic BPF */ + +/* instruction classes */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ + +/* ld/ldx fields */ +#define BPF_DW 0x18 /* double word (64-bit) */ +#define BPF_XADD 0xc0 /* exclusive add */ + +/* alu/jmp fields */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ + +/* change endianness of a register */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +/* jmp encodings */ +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ +#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ +#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ + +/* Register numbers */ +enum { + BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, +}; + +/* BPF has 10 general purpose 64-bit registers and stack frame. */ +#define MAX_BPF_REG __MAX_BPF_REG + +struct bpf_insn { + __u8 code; /* opcode */ + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ +}; + +/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ +struct bpf_lpm_trie_key { + __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ + __u8 data[0]; /* Arbitrary size */ +}; + +struct bpf_cgroup_storage_key { + __u64 cgroup_inode_id; /* cgroup inode id */ + __u32 attach_type; /* program attach type */ +}; + +/* BPF syscall commands, see bpf(2) man-page for details. */ +enum bpf_cmd { + BPF_MAP_CREATE, + BPF_MAP_LOOKUP_ELEM, + BPF_MAP_UPDATE_ELEM, + BPF_MAP_DELETE_ELEM, + BPF_MAP_GET_NEXT_KEY, + BPF_PROG_LOAD, + BPF_OBJ_PIN, + BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, + BPF_PROG_TEST_RUN, + BPF_PROG_GET_NEXT_ID, + BPF_MAP_GET_NEXT_ID, + BPF_PROG_GET_FD_BY_ID, + BPF_MAP_GET_FD_BY_ID, + BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, + BPF_RAW_TRACEPOINT_OPEN, + BPF_BTF_LOAD, + BPF_BTF_GET_FD_BY_ID, + BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, +}; + +enum bpf_map_type { + BPF_MAP_TYPE_UNSPEC, + BPF_MAP_TYPE_HASH, + BPF_MAP_TYPE_ARRAY, + BPF_MAP_TYPE_PROG_ARRAY, + BPF_MAP_TYPE_PERF_EVENT_ARRAY, + BPF_MAP_TYPE_PERCPU_HASH, + BPF_MAP_TYPE_PERCPU_ARRAY, + BPF_MAP_TYPE_STACK_TRACE, + BPF_MAP_TYPE_CGROUP_ARRAY, + BPF_MAP_TYPE_LRU_HASH, + BPF_MAP_TYPE_LRU_PERCPU_HASH, + BPF_MAP_TYPE_LPM_TRIE, + BPF_MAP_TYPE_ARRAY_OF_MAPS, + BPF_MAP_TYPE_HASH_OF_MAPS, + BPF_MAP_TYPE_DEVMAP, + BPF_MAP_TYPE_SOCKMAP, + BPF_MAP_TYPE_CPUMAP, + BPF_MAP_TYPE_XSKMAP, + BPF_MAP_TYPE_SOCKHASH, + BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_QUEUE, + BPF_MAP_TYPE_STACK, +}; + +/* Note that tracing related programs such as + * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} + * are not subject to a stable API since kernel internal data + * structures can change from release to release and may + * therefore break existing tracing BPF programs. Tracing BPF + * programs correspond to /a/ specific kernel which is to be + * analyzed, and not /a/ specific kernel /and/ all future ones. + */ +enum bpf_prog_type { + BPF_PROG_TYPE_UNSPEC, + BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, + BPF_PROG_TYPE_SCHED_CLS, + BPF_PROG_TYPE_SCHED_ACT, + BPF_PROG_TYPE_TRACEPOINT, + BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_PERF_EVENT, + BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, + BPF_PROG_TYPE_SOCK_OPS, + BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, + BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_RAW_TRACEPOINT, + BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_PROG_TYPE_LWT_SEG6LOCAL, + BPF_PROG_TYPE_LIRC_MODE2, + BPF_PROG_TYPE_SK_REUSEPORT, + BPF_PROG_TYPE_FLOW_DISSECTOR, +}; + +enum bpf_attach_type { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_SOCK_OPS, + BPF_SK_SKB_STREAM_PARSER, + BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, + BPF_SK_MSG_VERDICT, + BPF_CGROUP_INET4_BIND, + BPF_CGROUP_INET6_BIND, + BPF_CGROUP_INET4_CONNECT, + BPF_CGROUP_INET6_CONNECT, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + BPF_LIRC_MODE2, + BPF_FLOW_DISSECTOR, + __MAX_BPF_ATTACH_TYPE +}; + +#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE + +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. + */ +#define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) + +/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel + * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, + * and NET_IP_ALIGN defined to 2. + */ +#define BPF_F_STRICT_ALIGNMENT (1U << 0) + +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + +/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ +#define BPF_PSEUDO_MAP_FD 1 + +/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function + */ +#define BPF_PSEUDO_CALL 1 + +/* flags for BPF_MAP_UPDATE_ELEM command */ +#define BPF_ANY 0 /* create new element or update existing */ +#define BPF_NOEXIST 1 /* create new element if it didn't exist */ +#define BPF_EXIST 2 /* update existing element */ + +/* flags for BPF_MAP_CREATE command */ +#define BPF_F_NO_PREALLOC (1U << 0) +/* Instead of having one common LRU list in the + * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list + * which can scale and perform better. + * Note, the LRU nodes (including free nodes) cannot be moved + * across different LRU lists. + */ +#define BPF_F_NO_COMMON_LRU (1U << 1) +/* Specify numa node during map creation */ +#define BPF_F_NUMA_NODE (1U << 2) + +#define BPF_OBJ_NAME_LEN 16U + +/* Flags for accessing BPF object */ +#define BPF_F_RDONLY (1U << 3) +#define BPF_F_WRONLY (1U << 4) + +/* Flag for stack_map, store build_id+offset instead of pointer */ +#define BPF_F_STACK_BUILD_ID (1U << 5) + +/* Zero-initialize hash function seed. This should only be used for testing. */ +#define BPF_F_ZERO_SEED (1U << 6) + +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + +enum bpf_stack_build_id_status { + /* user space need an empty entry to identify end of a trace */ + BPF_STACK_BUILD_ID_EMPTY = 0, + /* with valid build_id and offset */ + BPF_STACK_BUILD_ID_VALID = 1, + /* couldn't get build_id, fallback to ip */ + BPF_STACK_BUILD_ID_IP = 2, +}; + +#define BPF_BUILD_ID_SIZE 20 +struct bpf_stack_build_id { + __s32 status; + unsigned char build_id[BPF_BUILD_ID_SIZE]; + union { + __u64 offset; + __u64 ip; + }; +}; + +union bpf_attr { + struct { /* anonymous struct used by BPF_MAP_CREATE command */ + __u32 map_type; /* one of enum bpf_map_type */ + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ + __u32 map_flags; /* BPF_MAP_CREATE related + * flags defined above. + */ + __u32 inner_map_fd; /* fd pointing to the inner map */ + __u32 numa_node; /* numa node (effective only if + * BPF_F_NUMA_NODE is set). + */ + char map_name[BPF_OBJ_NAME_LEN]; + __u32 map_ifindex; /* ifindex of netdev to create on */ + __u32 btf_fd; /* fd pointing to a BTF type data */ + __u32 btf_key_type_id; /* BTF type_id of the key */ + __u32 btf_value_type_id; /* BTF type_id of the value */ + }; + + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + __u32 map_fd; + __aligned_u64 key; + union { + __aligned_u64 value; + __aligned_u64 next_key; + }; + __u64 flags; + }; + + struct { /* anonymous struct used by BPF_PROG_LOAD command */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* not used */ + __u32 prog_flags; + char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_ifindex; /* ifindex of netdev to prep for */ + /* For some prog types expected attach type must be known at + * load time to verify attach type specific parts of prog + * (context accesses, allowed helpers, etc). + */ + __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ + __aligned_u64 pathname; + __u32 bpf_fd; + __u32 file_flags; + }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + __u32 target_fd; /* container object to attach to */ + __u32 attach_bpf_fd; /* eBPF program to attach */ + __u32 attach_type; + __u32 attach_flags; + }; + + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ + __u32 prog_fd; + __u32 retval; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ + __aligned_u64 data_in; + __aligned_u64 data_out; + __u32 repeat; + __u32 duration; + } test; + + struct { /* anonymous struct used by BPF_*_GET_*_ID */ + union { + __u32 start_id; + __u32 prog_id; + __u32 map_id; + __u32 btf_id; + }; + __u32 next_id; + __u32 open_flags; + }; + + struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ + __u32 bpf_fd; + __u32 info_len; + __aligned_u64 info; + } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + } query; + + struct { + __u64 name; + __u32 prog_fd; + } raw_tracepoint; + + struct { /* anonymous struct for BPF_BTF_LOAD */ + __aligned_u64 btf; + __aligned_u64 btf_log_buf; + __u32 btf_size; + __u32 btf_log_size; + __u32 btf_log_level; + }; + + struct { + __u32 pid; /* input: pid */ + __u32 fd; /* input: fd */ + __u32 flags; /* input: flags */ + __u32 buf_len; /* input/output: buf len */ + __aligned_u64 buf; /* input/output: + * tp_name for tracepoint + * symbol for kprobe + * filename for uprobe + */ + __u32 prog_id; /* output: prod_id */ + __u32 fd_type; /* output: BPF_FD_TYPE_* */ + __u64 probe_offset; /* output: probe_offset */ + __u64 probe_addr; /* output: probe_addr */ + } task_fd_query; +} __attribute__((aligned(8))); + +/* The description below is an attempt at providing documentation to eBPF + * developers about the multiple available eBPF helper functions. It can be + * parsed and used to produce a manual page. The workflow is the following, + * and requires the rst2man utility: + * + * $ ./scripts/bpf_helpers_doc.py \ + * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst + * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 + * $ man /tmp/bpf-helpers.7 + * + * Note that in order to produce this external documentation, some RST + * formatting is used in the descriptions to get "bold" and "italics" in + * manual pages. Also note that the few trailing white spaces are + * intentional, removing them would break paragraphs for rst2man. + * + * Start of BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) + * Description + * Perform a lookup in *map* for an entry associated to *key*. + * Return + * Map value associated to *key*, or **NULL** if no entry was + * found. + * + * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * Description + * Add or update the value of the entry associated to *key* in + * *map* with *value*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * Flag value **BPF_NOEXIST** cannot be used for maps of types + * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all + * elements always exist), the helper would return an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * Description + * Delete entry with *key* from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is removed to + * make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_probe_read(void *dst, u32 size, const void *src) + * Description + * For tracing programs, safely attempt to read *size* bytes from + * address *src* and store the data in *dst*. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_ktime_get_ns(void) + * Description + * Return the time elapsed since system boot, in nanoseconds. + * Return + * Current *ktime*. + * + * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * Description + * This helper is a "printk()-like" facility for debugging. It + * prints a message defined by format *fmt* (of size *fmt_size*) + * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * available. It can take up to three additional **u64** + * arguments (as an eBPF helpers, the total number of arguments is + * limited to five). + * + * Each time the helper is called, it appends a line to the trace. + * The format of the trace is customizable, and the exact output + * one will get depends on the options set in + * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *README* file under the same directory). However, it usually + * defaults to something like: + * + * :: + * + * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg> + * + * In the above: + * + * * ``telnet`` is the name of the current task. + * * ``470`` is the PID of the current task. + * * ``001`` is the CPU number on which the task is + * running. + * * In ``.N..``, each character refers to a set of + * options (whether irqs are enabled, scheduling + * options, whether hard/softirqs are running, level of + * preempt_disabled respectively). **N** means that + * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** + * are set. + * * ``419421.045894`` is a timestamp. + * * ``0x00000001`` is a fake value used by BPF for the + * instruction pointer register. + * * ``<formatted msg>`` is the message formatted with + * *fmt*. + * + * The conversion specifiers supported by *fmt* are similar, but + * more limited than for printk(). They are **%d**, **%i**, + * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, + * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size + * of field, padding with zeroes, etc.) is available, and the + * helper will return **-EINVAL** (but print nothing) if it + * encounters an unknown specifier. + * + * Also, note that **bpf_trace_printk**\ () is slow, and should + * only be used for debugging purposes. For this reason, a notice + * bloc (spanning several lines) is printed to kernel logs and + * states that the helper should not be used "for production use" + * the first time this helper is used (or more precisely, when + * **trace_printk**\ () buffers are allocated). For passing values + * to user space, perf events should be preferred. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. + * + * u32 bpf_get_prandom_u32(void) + * Description + * Get a pseudo-random number. + * + * From a security point of view, this helper uses its own + * pseudo-random internal state, and cannot be used to infer the + * seed of other random functions in the kernel. However, it is + * essential to note that the generator used by the helper is not + * cryptographically secure. + * Return + * A random 32-bit unsigned value. + * + * u32 bpf_get_smp_processor_id(void) + * Description + * Get the SMP (symmetric multiprocessing) processor id. Note that + * all programs run with preemption disabled, which means that the + * SMP processor id is stable during all the execution of the + * program. + * Return + * The SMP id of the processor running the program. + * + * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * Description + * Store *len* bytes from address *from* into the packet + * associated to *skb*, at *offset*. *flags* are a combination of + * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the + * checksum for the packet after storing the bytes) and + * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ + * **->swhash** and *skb*\ **->l4hash** to 0). + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * Description + * Recompute the layer 3 (e.g. IP) checksum for the packet + * associated to *skb*. Computation is incremental, so the helper + * must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored in *size*. + * Alternatively, it is possible to store the difference between + * the previous and the new values of the header field in *to*, by + * setting *from* and *size* to 0. For both methods, *offset* + * indicates the location of the IP checksum within the packet. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * Description + * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the + * packet associated to *skb*. Computation is incremental, so the + * helper must know the former value of the header field that was + * modified (*from*), the new value of this field (*to*), and the + * number of bytes (2 or 4) for this field, stored on the lowest + * four bits of *flags*. Alternatively, it is possible to store + * the difference between the previous and the new values of the + * header field in *to*, by setting *from* and the four lowest + * bits of *flags* to 0. For both methods, *offset* indicates the + * location of the IP checksum within the packet. In addition to + * the size of the field, *flags* can be added (bitwise OR) actual + * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left + * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and + * for updates resulting in a null checksum the value is set to + * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates + * the checksum is to be computed against a pseudo-header. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more + * flexibility and can handle sizes larger than 2 or 4 for the + * checksum to update. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * Description + * This special helper is used to trigger a "tail call", or in + * other words, to jump into another eBPF program. The same stack + * frame is used (but values on stack and in registers for the + * caller are not accessible to the callee). This mechanism allows + * for program chaining, either for raising the maximum number of + * available eBPF instructions, or to execute given programs in + * conditional blocks. For security reasons, there is an upper + * limit to the number of successive tail calls that can be + * performed. + * + * Upon call of this helper, the program attempts to jump into a + * program referenced at index *index* in *prog_array_map*, a + * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes + * *ctx*, a pointer to the context. + * + * If the call succeeds, the kernel immediately runs the first + * instruction of the new program. This is not a function call, + * and it never returns to the previous program. If the call + * fails, then the helper has no effect, and the caller continues + * to run its subsequent instructions. A call can fail if the + * destination program for the jump does not exist (i.e. *index* + * is superior to the number of entries in *prog_array_map*), or + * if the maximum number of tail calls has been reached for this + * chain of programs. This limit is defined in the kernel by the + * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), + * which is currently set to 32. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * Description + * Clone and redirect the packet associated to *skb* to another + * net device of index *ifindex*. Both ingress and egress + * interfaces can be used for redirection. The **BPF_F_INGRESS** + * value in *flags* is used to make the distinction (ingress path + * is selected if the flag is present, egress path otherwise). + * This is the only flag supported for now. + * + * In comparison with **bpf_redirect**\ () helper, + * **bpf_clone_redirect**\ () has the associated cost of + * duplicating the packet buffer, but this can be executed out of + * the eBPF program. Conversely, **bpf_redirect**\ () is more + * efficient, but it is handled through an action code where the + * redirection happens only after the eBPF program has returned. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_get_current_pid_tgid(void) + * Return + * A 64-bit integer containing the current tgid and pid, and + * created as such: + * *current_task*\ **->tgid << 32 \|** + * *current_task*\ **->pid**. + * + * u64 bpf_get_current_uid_gid(void) + * Return + * A 64-bit integer containing the current GID and UID, and + * created as such: *current_gid* **<< 32 \|** *current_uid*. + * + * int bpf_get_current_comm(char *buf, u32 size_of_buf) + * Description + * Copy the **comm** attribute of the current task into *buf* of + * *size_of_buf*. The **comm** attribute contains the name of + * the executable (excluding the path) for the current task. The + * *size_of_buf* must be strictly positive. On success, the + * helper makes sure that the *buf* is NUL-terminated. On failure, + * it is filled with zeroes. + * Return + * 0 on success, or a negative error in case of failure. + * + * u32 bpf_get_cgroup_classid(struct sk_buff *skb) + * Description + * Retrieve the classid for the current task, i.e. for the net_cls + * cgroup to which *skb* belongs. + * + * This helper can be used on TC egress path, but not on ingress. + * + * The net_cls cgroup provides an interface to tag network packets + * based on a user-provided identifier for all traffic coming from + * the tasks belonging to the related cgroup. See also the related + * kernel documentation, available from the Linux sources in file + * *Documentation/cgroup-v1/net_cls.txt*. + * + * The Linux kernel has two versions for cgroups: there are + * cgroups v1 and cgroups v2. Both are available to users, who can + * use a mixture of them, but note that the net_cls cgroup is for + * cgroup v1 only. This makes it incompatible with BPF programs + * run on cgroups, which is a cgroup-v2-only feature (a socket can + * only hold data for one version of cgroups at a time). + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to + * "**y**" or to "**m**". + * Return + * The classid, or 0 for the default unconfigured classid. + * + * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * Description + * Push a *vlan_tci* (VLAN tag control information) of protocol + * *vlan_proto* to the packet associated to *skb*, then update + * the checksum. Note that if *vlan_proto* is different from + * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to + * be **ETH_P_8021Q**. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_vlan_pop(struct sk_buff *skb) + * Description + * Pop a VLAN header from the packet associated to *skb*. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * Description + * Get tunnel metadata. This helper takes a pointer *key* to an + * empty **struct bpf_tunnel_key** of **size**, that will be + * filled with tunnel metadata for the packet associated to *skb*. + * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which + * indicates that the tunnel is based on IPv6 protocol instead of + * IPv4. + * + * The **struct bpf_tunnel_key** is an object that generalizes the + * principal parameters used by various tunneling protocols into a + * single struct. This way, it can be used to easily make a + * decision based on the contents of the encapsulation header, + * "summarized" in this struct. In particular, it holds the IP + * address of the remote end (IPv4 or IPv6, depending on the case) + * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, + * this struct exposes the *key*\ **->tunnel_id**, which is + * generally mapped to a VNI (Virtual Network Identifier), making + * it programmable together with the **bpf_skb_set_tunnel_key**\ + * () helper. + * + * Let's imagine that the following code is part of a program + * attached to the TC ingress interface, on one end of a GRE + * tunnel, and is supposed to filter out all messages coming from + * remote ends with IPv4 address other than 10.0.0.1: + * + * :: + * + * int ret; + * struct bpf_tunnel_key key = {}; + * + * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + * if (ret < 0) + * return TC_ACT_SHOT; // drop packet + * + * if (key.remote_ipv4 != 0x0a000001) + * return TC_ACT_SHOT; // drop packet + * + * return TC_ACT_OK; // accept packet + * + * This interface can also be used with all encapsulation devices + * that can operate in "collect metadata" mode: instead of having + * one network device per specific configuration, the "collect + * metadata" mode only requires a single device where the + * configuration can be extracted from this helper. + * + * This can be used together with various tunnels such as VXLan, + * Geneve, GRE or IP in IP (IPIP). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * Description + * Populate tunnel metadata for packet associated to *skb.* The + * tunnel metadata is set to the contents of *key*, of *size*. The + * *flags* can be set to a combination of the following values: + * + * **BPF_F_TUNINFO_IPV6** + * Indicate that the tunnel is based on IPv6 protocol + * instead of IPv4. + * **BPF_F_ZERO_CSUM_TX** + * For IPv4 packets, add a flag to tunnel metadata + * indicating that checksum computation should be skipped + * and checksum set to zeroes. + * **BPF_F_DONT_FRAGMENT** + * Add a flag to tunnel metadata indicating that the + * packet should not be fragmented. + * **BPF_F_SEQ_NUMBER** + * Add a flag to tunnel metadata indicating that a + * sequence number should be added to tunnel header before + * sending the packet. This flag was added for GRE + * encapsulation, but might be used with other protocols + * as well in the future. + * + * Here is a typical usage on the transmit path: + * + * :: + * + * struct bpf_tunnel_key key; + * populate key ... + * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); + * + * See also the description of the **bpf_skb_get_tunnel_key**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags) + * Description + * Read the value of a perf event counter. This helper relies on a + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of + * the perf event counter is selected when *map* is updated with + * perf event file descriptors. The *map* is an array whose size + * is the number of available CPUs, and each cell contains a value + * relative to one CPU. The value to retrieve is indicated by + * *flags*, that contains the index of the CPU to look up, masked + * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * Note that before Linux 4.13, only hardware perf event can be + * retrieved. + * + * Also, be aware that the newer helper + * **bpf_perf_event_read_value**\ () is recommended over + * **bpf_perf_event_read**\ () in general. The latter has some ABI + * quirks where error and counter value are used as a return code + * (which is wrong to do since ranges may overlap). This issue is + * fixed with **bpf_perf_event_read_value**\ (), which at the same + * time provides more features over the **bpf_perf_event_read**\ + * () interface. Please refer to the description of + * **bpf_perf_event_read_value**\ () for details. + * Return + * The value of the perf event counter read from the map, or a + * negative error code in case of failure. + * + * int bpf_redirect(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_clone_redirect**\ + * (), except that the packet is not cloned, which provides + * increased performance. + * + * Except for XDP, both ingress and egress interfaces can be used + * for redirection. The **BPF_F_INGRESS** value in *flags* is used + * to make the distinction (ingress path is selected if the flag + * is present, egress path otherwise). Currently, XDP only + * supports redirection to the egress interface, and accepts no + * flag at all. + * + * The same effect can be attained with the more generic + * **bpf_redirect_map**\ (), which requires specific maps to be + * used but offers better performance. + * Return + * For XDP, the helper returns **XDP_REDIRECT** on success or + * **XDP_ABORTED** on error. For other program types, the values + * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on + * error. + * + * u32 bpf_get_route_realm(struct sk_buff *skb) + * Description + * Retrieve the realm or the route, that is to say the + * **tclassid** field of the destination for the *skb*. The + * identifier retrieved is a user-provided tag, similar to the + * one used with the net_cls cgroup (see description for + * **bpf_get_cgroup_classid**\ () helper), but here this tag is + * held by a route (a destination entry), not by a task. + * + * Retrieving this identifier works with the clsact TC egress hook + * (see also **tc-bpf(8)**), or alternatively on conventional + * classful egress qdiscs, but not on TC ingress path. In case of + * clsact TC egress hook, this has the advantage that, internally, + * the destination entry has not been dropped yet in the transmit + * path. Therefore, the destination entry does not need to be + * artificially held via **netif_keep_dst**\ () for a classful + * qdisc until the *skb* is freed. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_IP_ROUTE_CLASSID** configuration option. + * Return + * The realm of the route for the packet associated to *skb*, or 0 + * if none was found. + * + * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * The context of the program *ctx* needs also be passed to the + * helper. + * + * On user space, a program willing to read the values needs to + * call **perf_event_open**\ () on the perf event (either for + * one or for all CPUs) and to store the file descriptor into the + * *map*. This must be done before the eBPF program can send data + * into it. An example is available in file + * *samples/bpf/trace_output_user.c* in the Linux kernel source + * tree (the eBPF program counterpart is in + * *samples/bpf/trace_output_kern.c*). + * + * **bpf_perf_event_output**\ () achieves better performance + * than **bpf_trace_printk**\ () for sharing data with user + * space, and is much better suitable for streaming data from eBPF + * programs. + * + * Note that this helper is not restricted to tracing use cases + * and can be used with programs attached to TC or XDP as well, + * where it allows for passing data to user space listeners. Data + * can be: + * + * * Only custom structs, + * * Only the packet payload, or + * * A combination of both. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len) + * Description + * This helper was provided as an easy way to load data from a + * packet. It can be used to load *len* bytes from *offset* from + * the packet associated to *skb*, into the buffer pointed by + * *to*. + * + * Since Linux 4.7, usage of this helper has mostly been replaced + * by "direct packet access", enabling packet data to be + * manipulated with *skb*\ **->data** and *skb*\ **->data_end** + * pointing respectively to the first byte of packet data and to + * the byte after the last byte of packet data. However, it + * remains useful if one wishes to read large quantities of data + * at once from a packet into the eBPF stack. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags) + * Description + * Walk a user or a kernel stack and return its id. To achieve + * this, the helper needs *ctx*, which is a pointer to the context + * on which the tracing program is executed, and a pointer to a + * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * a combination of the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_FAST_STACK_CMP** + * Compare stacks by hash only. + * **BPF_F_REUSE_STACKID** + * If two different stacks hash into the same *stackid*, + * discard the old one. + * + * The stack id retrieved is a 32 bit long integer handle which + * can be further combined with other data (including other stack + * ids) and used as a key into maps. This can be useful for + * generating a variety of graphs (such as flame graphs or off-cpu + * graphs). + * + * For walking a stack, this helper is an improvement over + * **bpf_probe_read**\ (), which can be used with unrolled loops + * but is not efficient and consumes a lot of eBPF instructions. + * Instead, **bpf_get_stackid**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack=<new value> + * Return + * The positive or null stack id on success, or a negative error + * in case of failure. + * + * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed) + * Description + * Compute a checksum difference, from the raw buffer pointed by + * *from*, of length *from_size* (that must be a multiple of 4), + * towards the raw buffer pointed by *to*, of size *to_size* + * (same remark). An optional *seed* can be added to the value + * (this can be cascaded, the seed may come from a previous call + * to the helper). + * + * This is flexible enough to be used in several ways: + * + * * With *from_size* == 0, *to_size* > 0 and *seed* set to + * checksum, it can be used when pushing new data. + * * With *from_size* > 0, *to_size* == 0 and *seed* set to + * checksum, it can be used when removing data from a packet. + * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it + * can be used to compute a diff. Note that *from_size* and + * *to_size* do not need to be equal. + * + * This helper can be used in combination with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to + * which one can feed in the difference computed with + * **bpf_csum_diff**\ (). + * Return + * The checksum result, or a negative error code in case of + * failure. + * + * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * Description + * Retrieve tunnel options metadata for the packet associated to + * *skb*, and store the raw tunnel option data to the buffer *opt* + * of *size*. + * + * This helper can be used with encapsulation devices that can + * operate in "collect metadata" mode (please refer to the related + * note in the description of **bpf_skb_get_tunnel_key**\ () for + * more details). A particular example where this can be used is + * in combination with the Geneve encapsulation protocol, where it + * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) + * and retrieving arbitrary TLVs (Type-Length-Value headers) from + * the eBPF program. This allows for full customization of these + * headers. + * Return + * The size of the option data retrieved. + * + * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size) + * Description + * Set tunnel options metadata for the packet associated to *skb* + * to the option data contained in the raw buffer *opt* of *size*. + * + * See also the description of the **bpf_skb_get_tunnel_opt**\ () + * helper for additional information. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * Description + * Change the protocol of the *skb* to *proto*. Currently + * supported are transition from IPv4 to IPv6, and from IPv6 to + * IPv4. The helper takes care of the groundwork for the + * transition, including resizing the socket buffer. The eBPF + * program is expected to fill the new headers, if any, via + * **skb_store_bytes**\ () and to recompute the checksums with + * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ + * (). The main case for this helper is to perform NAT64 + * operations out of an eBPF program. + * + * Internally, the GSO type is marked as dodgy so that headers are + * checked and segments are recalculated by the GSO/GRO engine. + * The size for GSO target is adapted as well. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * Description + * Change the packet type for the packet associated to *skb*. This + * comes down to setting *skb*\ **->pkt_type** to *type*, except + * the eBPF program does not have a write access to *skb*\ + * **->pkt_type** beside this helper. Using a helper here allows + * for graceful handling of errors. + * + * The major use case is to change incoming *skb*s to + * **PACKET_HOST** in a programmatic way instead of having to + * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for + * example. + * + * Note that *type* only allows certain values. At this time, they + * are: + * + * **PACKET_HOST** + * Packet is for us. + * **PACKET_BROADCAST** + * Send packet to all. + * **PACKET_MULTICAST** + * Send packet to group. + * **PACKET_OTHERHOST** + * Send packet to someone else. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * Description + * Check whether *skb* is a descendant of the cgroup2 held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * Return + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* failed the cgroup2 descendant test. + * * 1, if the *skb* succeeded the cgroup2 descendant test. + * * A negative error code, if an error occurred. + * + * u32 bpf_get_hash_recalc(struct sk_buff *skb) + * Description + * Retrieve the hash of the packet, *skb*\ **->hash**. If it is + * not set, in particular if the hash was cleared due to mangling, + * recompute this hash. Later accesses to the hash can be done + * directly with *skb*\ **->hash**. + * + * Calling **bpf_set_hash_invalid**\ (), changing a packet + * prototype with **bpf_skb_change_proto**\ (), or calling + * **bpf_skb_store_bytes**\ () with the + * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear + * the hash and to trigger a new computation for the next call to + * **bpf_get_hash_recalc**\ (). + * Return + * The 32-bit hash. + * + * u64 bpf_get_current_task(void) + * Return + * A pointer to the current task struct. + * + * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * Description + * Attempt in a safe way to write *len* bytes from the buffer + * *src* to *dst* in memory. It only works for threads that are in + * user context, and *dst* must be a valid user space address. + * + * This helper should not be used to implement any kind of + * security mechanism because of TOC-TOU attacks, but rather to + * debug, divert, and manipulate execution of semi-cooperative + * processes. + * + * Keep in mind that this feature is meant for experiments, and it + * has a risk of crashing the system and running programs. + * Therefore, when an eBPF program using this helper is attached, + * a warning including PID and process name is printed to kernel + * logs. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * Description + * Check whether the probe is being run is the context of a given + * subset of the cgroup2 hierarchy. The cgroup2 to test is held by + * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. + * Return + * The return value depends on the result of the test, and can be: + * + * * 0, if the *skb* task belongs to the cgroup2. + * * 1, if the *skb* task does not belong to the cgroup2. + * * A negative error code, if an error occurred. + * + * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * Description + * Resize (trim or grow) the packet associated to *skb* to the + * new *len*. The *flags* are reserved for future usage, and must + * be left at zero. + * + * The basic idea is that the helper performs the needed work to + * change the size of the packet, then the eBPF program rewrites + * the rest via helpers like **bpf_skb_store_bytes**\ (), + * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () + * and others. This helper is a slow path utility intended for + * replies with control messages. And because it is targeted for + * slow path, the helper itself can afford to be slow: it + * implicitly linearizes, unclones and drops offloads from the + * *skb*. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * Description + * Pull in non-linear data in case the *skb* is non-linear and not + * all of *len* are part of the linear section. Make *len* bytes + * from *skb* readable and writable. If a zero value is passed for + * *len*, then the whole length of the *skb* is pulled. + * + * This helper is only needed for reading and writing with direct + * packet access. + * + * For direct packet access, testing that offsets to access + * are within packet boundaries (test on *skb*\ **->data_end**) is + * susceptible to fail if offsets are invalid, or if the requested + * data is in non-linear parts of the *skb*. On failure the + * program can just bail out, or in the case of a non-linear + * buffer, use a helper to make the data available. The + * **bpf_skb_load_bytes**\ () helper is a first solution to access + * the data. Another one consists in using **bpf_skb_pull_data** + * to pull in once the non-linear parts, then retesting and + * eventually access the data. + * + * At the same time, this also makes sure the *skb* is uncloned, + * which is a necessary condition for direct write. As this needs + * to be an invariant for the write part only, the verifier + * detects writes and adds a prologue that is calling + * **bpf_skb_pull_data()** to effectively unclone the *skb* from + * the very beginning in case it is indeed cloned. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum) + * Description + * Add the checksum *csum* into *skb*\ **->csum** in case the + * driver has supplied a checksum for the entire packet into that + * field. Return an error otherwise. This helper is intended to be + * used in combination with **bpf_csum_diff**\ (), in particular + * when the checksum needs to be updated after data has been + * written into the packet through direct packet access. + * Return + * The checksum on success, or a negative error code in case of + * failure. + * + * void bpf_set_hash_invalid(struct sk_buff *skb) + * Description + * Invalidate the current *skb*\ **->hash**. It can be used after + * mangling on headers through direct packet access, in order to + * indicate that the hash is outdated and to trigger a + * recalculation the next time the kernel tries to access this + * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * + * int bpf_get_numa_node_id(void) + * Description + * Return the id of the current NUMA node. The primary use case + * for this helper is the selection of sockets for the local NUMA + * node, when the program is attached to sockets using the + * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), + * but the helper is also available to other eBPF program types, + * similarly to **bpf_get_smp_processor_id**\ (). + * Return + * The id of current NUMA node. + * + * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * Description + * Grows headroom of packet associated to *skb* and adjusts the + * offset of the MAC header accordingly, adding *len* bytes of + * space. It automatically extends and reallocates memory as + * required. + * + * This helper can be used on a layer 3 *skb* to push a MAC header + * for redirection into a layer 2 device. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that + * it is possible to use a negative value for *delta*. This helper + * can be used to prepare the packet for pushing or popping + * headers. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) + * Description + * Copy a NUL terminated string from an unsafe address + * *unsafe_ptr* to *dst*. The *size* should include the + * terminating NUL byte. In case the string length is smaller than + * *size*, the target is not padded with further NUL bytes. If the + * string length is larger than *size*, just *size*-1 bytes are + * copied and the last byte is set to NUL. + * + * On success, the length of the copied string is returned. This + * makes this helper useful in tracing programs for reading + * strings, and more importantly to get its length at runtime. See + * the following snippet: + * + * :: + * + * SEC("kprobe/sys_open") + * void bpf_sys_open(struct pt_regs *ctx) + * { + * char buf[PATHLEN]; // PATHLEN is defined to 256 + * int res = bpf_probe_read_str(buf, sizeof(buf), + * ctx->di); + * + * // Consume buf, for example push it to + * // userspace via bpf_perf_event_output(); we + * // can use res (the string length) as event + * // size, after checking its boundaries. + * } + * + * In comparison, using **bpf_probe_read()** helper here instead + * to read the string would require to estimate the length at + * compile time, and would often result in copying more memory + * than necessary. + * + * Another useful use case is when parsing individual process + * arguments or individual environment variables navigating + * *current*\ **->mm->arg_start** and *current*\ + * **->mm->env_start**: using this helper and the return value, + * one can quickly iterate at the right offset of the memory area. + * Return + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. + * + * u64 bpf_get_socket_cookie(struct sk_buff *skb) + * Description + * If the **struct sk_buff** pointed by *skb* has a known socket, + * retrieve the cookie (generated by the kernel) of this socket. + * If no cookie has been set yet, generate a new cookie. Once + * generated, the socket cookie remains stable for the life of the + * socket. This helper can be useful for monitoring per socket + * networking traffic statistics as it provides a unique socket + * identifier per namespace. + * Return + * A 8-byte long non-decreasing number on success, or 0 if the + * socket field is missing inside *skb*. + * + * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * Return + * A 8-byte long non-decreasing number. + * + * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) + * Description + * Equivalent to bpf_get_socket_cookie() helper that accepts + * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * Return + * A 8-byte long non-decreasing number. + * + * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Return + * The owner UID of the socket associated to *skb*. If the socket + * is **NULL**, or if it is not a full socket (i.e. if it is a + * time-wait or a request socket instead), **overflowuid** value + * is returned (note that **overflowuid** might also be the actual + * UID value for the socket). + * + * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * Description + * Set the full hash for *skb* (set the field *skb*\ **->hash**) + * to value *hash*. + * Return + * 0 + * + * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) + * Description + * Emulate a call to **setsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **setsockopt(2)** for more information. + * The option value of length *optlen* is pointed by *optval*. + * + * This helper actually implements a subset of **setsockopt()**. + * It supports the following *level*\ s: + * + * * **SOL_SOCKET**, which supports the following *optname*\ s: + * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * * **IPPROTO_TCP**, which supports the following *optname*\ s: + * **TCP_CONGESTION**, **TCP_BPF_IW**, + * **TCP_BPF_SNDCWND_CLAMP**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) + * Description + * Grow or shrink the room for data in the packet associated to + * *skb* by *len_diff*, and according to the selected *mode*. + * + * There is a single supported mode at this time: + * + * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer + * (room space is added or removed below the layer 3 header). + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * Description + * Redirect the packet to the endpoint referenced by *map* at + * index *key*. Depending on its type, this *map* can contain + * references to net devices (for forwarding packets through other + * ports), or to CPUs (for redirecting XDP frames to another CPU; + * but this is only implemented for native XDP (with driver + * support) as of this writing). + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * When used to redirect packets to net devices, this helper + * provides a high performance increase over **bpf_redirect**\ (). + * This is due to various implementation details of the underlying + * mechanisms, one of which is the fact that **bpf_redirect_map**\ + * () tries to send packet as a "bulk" to the device. + * Return + * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. + * + * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * Description + * Redirect the packet to the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * Description + * Add an entry to, or update a *map* referencing sockets. The + * *skops* is used as a new value for the entry associated to + * *key*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * If the *map* has eBPF programs (parser and verdict), those will + * be inherited by the socket being added. If the socket is + * already attached to eBPF programs, this results in an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust the address pointed by *xdp_md*\ **->data_meta** by + * *delta* (which can be positive or negative). Note that this + * operation modifies the address stored in *xdp_md*\ **->data**, + * so the latter must be loaded only after the helper has been + * called. + * + * The use of *xdp_md*\ **->data_meta** is optional and programs + * are not required to use it. The rationale is that when the + * packet is processed with XDP (e.g. as DoS filter), it is + * possible to push further meta data along with it before passing + * to the stack, and to give the guarantee that an ingress eBPF + * program attached as a TC classifier on the same device can pick + * this up for further post-processing. Since TC works with socket + * buffers, it remains possible to set from XDP the **mark** or + * **priority** pointers, or other pointers for the socket buffer. + * Having this scratch space generic and programmable allows for + * more flexibility as the user is free to store whatever meta + * data they need. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * Description + * Read the value of a perf event counter, and store it into *buf* + * of size *buf_size*. This helper relies on a *map* of type + * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event + * counter is selected when *map* is updated with perf event file + * descriptors. The *map* is an array whose size is the number of + * available CPUs, and each cell contains a value relative to one + * CPU. The value to retrieve is indicated by *flags*, that + * contains the index of the CPU to look up, masked with + * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to + * **BPF_F_CURRENT_CPU** to indicate that the value for the + * current CPU should be retrieved. + * + * This helper behaves in a way close to + * **bpf_perf_event_read**\ () helper, save that instead of + * just returning the value observed, it fills the *buf* + * structure. This allows for additional data to be retrieved: in + * particular, the enabled and running times (in *buf*\ + * **->enabled** and *buf*\ **->running**, respectively) are + * copied. In general, **bpf_perf_event_read_value**\ () is + * recommended over **bpf_perf_event_read**\ (), which has some + * ABI issues and provides fewer functionalities. + * + * These values are interesting, because hardware PMU (Performance + * Monitoring Unit) counters are limited resources. When there are + * more PMU based perf events opened than available counters, + * kernel will multiplex these events so each event gets certain + * percentage (but not all) of the PMU time. In case that + * multiplexing happens, the number of samples or counter value + * will not reflect the case compared to when no multiplexing + * occurs. This makes comparison between different runs difficult. + * Typically, the counter value should be normalized before + * comparing to other experiments. The usual normalization is done + * as follows. + * + * :: + * + * normalized_counter = counter * t_enabled / t_running + * + * Where t_enabled is the time enabled for event and t_running is + * the time running for event since last normalization. The + * enabled and running times are accumulated since the perf event + * open. To achieve scaling factor between two invocations of an + * eBPF program, users can can use CPU id as the key (which is + * typical for perf array usage model) to remember the previous + * value and do the calculation inside the eBPF program. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * Description + * For en eBPF program attached to a perf event, retrieve the + * value of the event counter associated to *ctx* and store it in + * the structure pointed by *buf* and of size *buf_size*. Enabled + * and running times are also stored in the structure (see + * description of helper **bpf_perf_event_read_value**\ () for + * more details). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen) + * Description + * Emulate a call to **getsockopt()** on the socket associated to + * *bpf_socket*, which must be a full socket. The *level* at + * which the option resides and the name *optname* of the option + * must be specified, see **getsockopt(2)** for more information. + * The retrieved value is stored in the structure pointed by + * *opval* and of length *optlen*. + * + * This helper actually implements a subset of **getsockopt()**. + * It supports the following *level*\ s: + * + * * **IPPROTO_TCP**, which supports *optname* + * **TCP_CONGESTION**. + * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. + * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_override_return(struct pt_reg *regs, u64 rc) + * Description + * Used for error injection, this helper uses kprobes to override + * the return value of the probed function, and to set it to *rc*. + * The first argument is the context *regs* on which the kprobe + * works. + * + * This helper works by setting setting the PC (program counter) + * to an override function which is run in place of the original + * probed function. This means the probed function is not run at + * all. The replacement function just returns with the required + * value. + * + * This helper has security implications, and thus is subject to + * restrictions. It is only available if the kernel was compiled + * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration + * option, and in this case it only works on functions tagged with + * **ALLOW_ERROR_INJECTION** in the kernel code. + * + * Also, the helper is only available for the architectures having + * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, + * x86 architecture is the only one to support this feature. + * Return + * 0 + * + * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) + * Description + * Attempt to set the value of the **bpf_sock_ops_cb_flags** field + * for the full TCP socket associated to *bpf_sock_ops* to + * *argval*. + * + * The primary use of this field is to determine if there should + * be calls to eBPF programs of type + * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP + * code. A program of the same type can change its value, per + * connection and as necessary, when the connection is + * established. This field is directly accessible for reading, but + * this helper must be used for updates in order to return an + * error if an eBPF program tries to set a callback that is not + * supported in the current kernel. + * + * The supported callback values that *argval* can combine are: + * + * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) + * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) + * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * + * Here are some examples of where one could call such eBPF + * program: + * + * * When RTO fires. + * * When a packet is retransmitted. + * * When the connection terminates. + * * When a packet is sent. + * * When a packet is received. + * Return + * Code **-EINVAL** if the socket is not a full TCP socket; + * otherwise, a positive number containing the bits that could not + * be set is returned (which comes down to 0 if all bits were set + * as required). + * + * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * socket level. If the message *msg* is allowed to pass (i.e. if + * the verdict eBPF program returns **SK_PASS**), redirect it to + * the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * Description + * For socket policies, apply the verdict of the eBPF program to + * the next *bytes* (number of bytes) of message *msg*. + * + * For example, this helper can be used in the following cases: + * + * * A single **sendmsg**\ () or **sendfile**\ () system call + * contains multiple logical messages that the eBPF program is + * supposed to read and for which it should apply a verdict. + * * An eBPF program only cares to read the first *bytes* of a + * *msg*. If the message has a large payload, then setting up + * and calling the eBPF program repeatedly for all bytes, even + * though the verdict is already known, would create unnecessary + * overhead. + * + * When called from within an eBPF program, the helper sets a + * counter internal to the BPF infrastructure, that is used to + * apply the last verdict to the next *bytes*. If *bytes* is + * smaller than the current data being processed from a + * **sendmsg**\ () or **sendfile**\ () system call, the first + * *bytes* will be sent and the eBPF program will be re-run with + * the pointer for start of data pointing to byte number *bytes* + * **+ 1**. If *bytes* is larger than the current data being + * processed, then the eBPF verdict will be applied to multiple + * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are + * consumed. + * + * Note that if a socket closes with the internal counter holding + * a non-zero value, this is not a problem because data is not + * being buffered for *bytes* and is sent as it is received. + * Return + * 0 + * + * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * Description + * For socket policies, prevent the execution of the verdict eBPF + * program for message *msg* until *bytes* (byte number) have been + * accumulated. + * + * This can be used when one needs a specific number of bytes + * before a verdict can be assigned, even if the data spans + * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme + * case would be a user calling **sendmsg**\ () repeatedly with + * 1-byte long message segments. Obviously, this is bad for + * performance, but it is still valid. If the eBPF program needs + * *bytes* bytes to validate a header, this helper can be used to + * prevent the eBPF program to be called again until *bytes* have + * been accumulated. + * Return + * 0 + * + * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * Description + * For socket policies, pull in non-linear data from user space + * for *msg* and set pointers *msg*\ **->data** and *msg*\ + * **->data_end** to *start* and *end* bytes offsets into *msg*, + * respectively. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it can only parse data that the (**data**, **data_end**) + * pointers have already consumed. For **sendmsg**\ () hooks this + * is likely the first scatterlist element. But for calls relying + * on the **sendpage** handler (e.g. **sendfile**\ ()) this will + * be the range (**0**, **0**) because the data is shared with + * user space and by default the objective is to avoid allowing + * user space to modify data while (or after) eBPF verdict is + * being decided. This helper can be used to pull in data and to + * set the start and end pointer to given values. Data will be + * copied if necessary (i.e. if data was not linear and if start + * and end pointers do not point to the same chunk). + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) + * Description + * Bind the socket associated to *ctx* to the address pointed by + * *addr*, of length *addr_len*. This allows for making outgoing + * connection from the desired IP address, which can be useful for + * example when all processes inside a cgroup should use one + * single IP address on a host that has multiple IP configured. + * + * This helper works for IPv4 and IPv6, TCP and UDP sockets. The + * domain (*addr*\ **->sa_family**) must be **AF_INET** (or + * **AF_INET6**). Looking for a free port to bind to can be + * expensive, therefore binding to port is not permitted by the + * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) + * must be set to zero. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * Description + * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is + * only possible to shrink the packet as of this writing, + * therefore *delta* must be a negative integer. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * Description + * Retrieve the XFRM state (IP transform framework, see also + * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. + * + * The retrieved value is stored in the **struct bpf_xfrm_state** + * pointed by *xfrm_state* and of length *size*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_XFRM** configuration option. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) + * Description + * Return a user or a kernel stack in bpf program provided buffer. + * To achieve this, the helper needs *ctx*, which is a pointer + * to the context on which the tracing program is executed. + * To store the stacktrace, the bpf program provides *buf* with + * a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack=<new value> + * Return + * A non-negative value equal to or less than *size* on success, + * or a negative error in case of failure. + * + * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header) + * Description + * This helper is similar to **bpf_skb_load_bytes**\ () in that + * it provides an easy way to load *len* bytes from *offset* + * from the packet associated to *skb*, into the buffer pointed + * by *to*. The difference to **bpf_skb_load_bytes**\ () is that + * a fifth argument *start_header* exists in order to select a + * base offset to start from. *start_header* can be one of: + * + * **BPF_HDR_START_MAC** + * Base offset to load data from is *skb*'s mac header. + * **BPF_HDR_START_NET** + * Base offset to load data from is *skb*'s network header. + * + * In general, "direct packet access" is the preferred method to + * access packet data, however, this helper is in particular useful + * in socket filters where *skb*\ **->data** does not always point + * to the start of the mac header and where "direct packet access" + * is not available. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * Description + * Do FIB lookup in kernel tables using parameters in *params*. + * If lookup is successful and result shows packet is to be + * forwarded, the neighbor tables are searched for the nexthop. + * If successful (ie., FIB lookup shows forwarding and nexthop + * is resolved), the nexthop address is returned in ipv4_dst + * or ipv6_dst based on family, smac is set to mac address of + * egress device, dmac is set to nexthop mac address, rt_metric + * is set to metric from route (IPv4/IPv6 only), and ifindex + * is set to the device index of the nexthop from the FIB lookup. + * + * *plen* argument is the size of the passed in struct. + * *flags* argument can be a combination of one or more of the + * following values: + * + * **BPF_FIB_LOOKUP_DIRECT** + * Do a direct table lookup vs full lookup using FIB + * rules. + * **BPF_FIB_LOOKUP_OUTPUT** + * Perform lookup from an egress perspective (default is + * ingress). + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return + * * < 0 if any input argument is invalid + * * 0 on success (packet is forwarded, nexthop neighbor exists) + * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the + * packet is not forwarded or needs assist from full stack + * + * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) + * Description + * Add an entry to, or update a sockhash *map* referencing sockets. + * The *skops* is used as a new value for the entry associated to + * *key*. *flags* is one of: + * + * **BPF_NOEXIST** + * The entry for *key* must not exist in the map. + * **BPF_EXIST** + * The entry for *key* must already exist in the map. + * **BPF_ANY** + * No condition on the existence of the entry for *key*. + * + * If the *map* has eBPF programs (parser and verdict), those will + * be inherited by the socket being added. If the socket is + * already attached to eBPF programs, this results in an error. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * socket level. If the message *msg* is allowed to pass (i.e. if + * the verdict eBPF program returns **SK_PASS**), redirect it to + * the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress path otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * Description + * This helper is used in programs implementing policies at the + * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. + * if the verdeict eBPF program returns **SK_PASS**), redirect it + * to the socket referenced by *map* (of type + * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and + * egress interfaces can be used for redirection. The + * **BPF_F_INGRESS** value in *flags* is used to make the + * distinction (ingress path is selected if the flag is present, + * egress otherwise). This is the only flag supported for now. + * Return + * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * Description + * Encapsulate the packet associated to *skb* within a Layer 3 + * protocol header. This header is provided in the buffer at + * address *hdr*, with *len* its size in bytes. *type* indicates + * the protocol of the header and can be one of: + * + * **BPF_LWT_ENCAP_SEG6** + * IPv6 encapsulation with Segment Routing Header + * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, + * the IPv6 header is computed by the kernel. + * **BPF_LWT_ENCAP_SEG6_INLINE** + * Only works if *skb* contains an IPv6 packet. Insert a + * Segment Routing Header (**struct ipv6_sr_hdr**) inside + * the IPv6 header. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * Description + * Store *len* bytes from address *from* into the packet + * associated to *skb*, at *offset*. Only the flags, tag and TLVs + * inside the outermost IPv6 Segment Routing Header can be + * modified through this helper. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * Description + * Adjust the size allocated to TLVs in the outermost IPv6 + * Segment Routing Header contained in the packet associated to + * *skb*, at position *offset* by *delta* bytes. Only offsets + * after the segments are accepted. *delta* can be as well + * positive (growing) as negative (shrinking). + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * Description + * Apply an IPv6 Segment Routing action of type *action* to the + * packet associated to *skb*. Each action takes a parameter + * contained at address *param*, and of length *param_len* bytes. + * *action* can be one of: + * + * **SEG6_LOCAL_ACTION_END_X** + * End.X action: Endpoint with Layer-3 cross-connect. + * Type of *param*: **struct in6_addr**. + * **SEG6_LOCAL_ACTION_END_T** + * End.T action: Endpoint with specific IPv6 table lookup. + * Type of *param*: **int**. + * **SEG6_LOCAL_ACTION_END_B6** + * End.B6 action: Endpoint bound to an SRv6 policy. + * Type of param: **struct ipv6_sr_hdr**. + * **SEG6_LOCAL_ACTION_END_B6_ENCAP** + * End.B6.Encap action: Endpoint bound to an SRv6 + * encapsulation policy. + * Type of param: **struct ipv6_sr_hdr**. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). + * + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 + * + * int bpf_rc_repeat(void *ctx) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. + * + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 + * + * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) + * Description + * Return the cgroup v2 id of the socket associated with the *skb*. + * This is roughly similar to the **bpf_get_cgroup_classid**\ () + * helper for cgroup v1 by providing a tag resp. identifier that + * can be matched on or used for map lookups e.g. to implement + * policy. The cgroup v2 id of a given path in the hierarchy is + * exposed in user space through the f_handle API in order to get + * to the same 64-bit id. + * + * This helper can be used on TC egress path, but not on ingress, + * and is available only if the kernel was compiled with the + * **CONFIG_SOCK_CGROUP_DATA** configuration option. + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *skb* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *skb*, then return value will be same as that + * of **bpf_skb_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *skb*. + * + * The format of returned id and helper limitations are same as in + * **bpf_skb_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * u64 bpf_get_current_cgroup_id(void) + * Return + * A 64-bit integer containing the current cgroup id based + * on the cgroup within which the current task is running. + * + * void* get_local_storage(void *map, u64 flags) + * Description + * Get the pointer to the local storage area. + * The type and the size of the local storage is defined + * by the *map* argument. + * The *flags* meaning is specific for each map type, + * and has to be 0 for cgroup local storage. + * + * Depending on the BPF program type, a local storage area + * can be shared between multiple instances of the BPF program, + * running simultaneously. + * + * A user should care about the synchronization by himself. + * For example, by using the **BPF_STX_XADD** instruction to alter + * the shared data. + * Return + * A pointer to the local storage area. + * + * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * Description + * Select a **SO_REUSEPORT** socket from a + * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * It checks the selected socket is matching the incoming + * request in the socket buffer. + * Return + * 0 on success, or a negative error in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. + * + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for UDP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. + * + * int bpf_sk_release(struct bpf_sock *sock) + * Description + * Release the reference held by *sock*. *sock* must be a + * non-**NULL** pointer that was returned from + * **bpf_sk_lookup_xxx**\ (). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) + * Description + * For socket policies, insert *len* bytes into *msg* at offset + * *start*. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it may want to insert metadata or options into the *msg*. + * This can later be read and used by any of the lower layer BPF + * hooks. + * + * This helper may fail if under memory pressure (a malloc + * fails) in these cases BPF programs will get an appropriate + * error and BPF programs will need to handle them. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * Description + * Will remove *pop* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of *msg* + * payload and/or *pop* value being to large. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded pointer movement. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 + */ +#define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ + FN(map_lookup_elem), \ + FN(map_update_elem), \ + FN(map_delete_elem), \ + FN(probe_read), \ + FN(ktime_get_ns), \ + FN(trace_printk), \ + FN(get_prandom_u32), \ + FN(get_smp_processor_id), \ + FN(skb_store_bytes), \ + FN(l3_csum_replace), \ + FN(l4_csum_replace), \ + FN(tail_call), \ + FN(clone_redirect), \ + FN(get_current_pid_tgid), \ + FN(get_current_uid_gid), \ + FN(get_current_comm), \ + FN(get_cgroup_classid), \ + FN(skb_vlan_push), \ + FN(skb_vlan_pop), \ + FN(skb_get_tunnel_key), \ + FN(skb_set_tunnel_key), \ + FN(perf_event_read), \ + FN(redirect), \ + FN(get_route_realm), \ + FN(perf_event_output), \ + FN(skb_load_bytes), \ + FN(get_stackid), \ + FN(csum_diff), \ + FN(skb_get_tunnel_opt), \ + FN(skb_set_tunnel_opt), \ + FN(skb_change_proto), \ + FN(skb_change_type), \ + FN(skb_under_cgroup), \ + FN(get_hash_recalc), \ + FN(get_current_task), \ + FN(probe_write_user), \ + FN(current_task_under_cgroup), \ + FN(skb_change_tail), \ + FN(skb_pull_data), \ + FN(csum_update), \ + FN(set_hash_invalid), \ + FN(get_numa_node_id), \ + FN(skb_change_head), \ + FN(xdp_adjust_head), \ + FN(probe_read_str), \ + FN(get_socket_cookie), \ + FN(get_socket_uid), \ + FN(set_hash), \ + FN(setsockopt), \ + FN(skb_adjust_room), \ + FN(redirect_map), \ + FN(sk_redirect_map), \ + FN(sock_map_update), \ + FN(xdp_adjust_meta), \ + FN(perf_event_read_value), \ + FN(perf_prog_read_value), \ + FN(getsockopt), \ + FN(override_return), \ + FN(sock_ops_cb_flags_set), \ + FN(msg_redirect_map), \ + FN(msg_apply_bytes), \ + FN(msg_cork_bytes), \ + FN(msg_pull_data), \ + FN(bind), \ + FN(xdp_adjust_tail), \ + FN(skb_get_xfrm_state), \ + FN(get_stack), \ + FN(skb_load_bytes_relative), \ + FN(fib_lookup), \ + FN(sock_hash_update), \ + FN(msg_redirect_hash), \ + FN(sk_redirect_hash), \ + FN(lwt_push_encap), \ + FN(lwt_seg6_store_bytes), \ + FN(lwt_seg6_adjust_srh), \ + FN(lwt_seg6_action), \ + FN(rc_repeat), \ + FN(rc_keydown), \ + FN(skb_cgroup_id), \ + FN(get_current_cgroup_id), \ + FN(get_local_storage), \ + FN(sk_select_reuseport), \ + FN(skb_ancestor_cgroup_id), \ + FN(sk_lookup_tcp), \ + FN(sk_lookup_udp), \ + FN(sk_release), \ + FN(map_push_elem), \ + FN(map_pop_elem), \ + FN(map_peek_elem), \ + FN(msg_push_data), \ + FN(msg_pop_data), \ + FN(rc_pointer_rel), + +/* integer value in 'imm' field of BPF_CALL instruction selects which helper + * function eBPF program intends to call + */ +#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x +enum bpf_func_id { + __BPF_FUNC_MAPPER(__BPF_ENUM_FN) + __BPF_FUNC_MAX_ID, +}; +#undef __BPF_ENUM_FN + +/* All flags used by eBPF helper functions, placed here. */ + +/* BPF_FUNC_skb_store_bytes flags. */ +#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) +#define BPF_F_INVALIDATE_HASH (1ULL << 1) + +/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. + * First 4 bits are for passing the header field size. + */ +#define BPF_F_HDR_FIELD_MASK 0xfULL + +/* BPF_FUNC_l4_csum_replace flags. */ +#define BPF_F_PSEUDO_HDR (1ULL << 4) +#define BPF_F_MARK_MANGLED_0 (1ULL << 5) +#define BPF_F_MARK_ENFORCE (1ULL << 6) + +/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ +#define BPF_F_INGRESS (1ULL << 0) + +/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ +#define BPF_F_TUNINFO_IPV6 (1ULL << 0) + +/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ +#define BPF_F_SKIP_FIELD_MASK 0xffULL +#define BPF_F_USER_STACK (1ULL << 8) +/* flags used by BPF_FUNC_get_stackid only. */ +#define BPF_F_FAST_STACK_CMP (1ULL << 9) +#define BPF_F_REUSE_STACKID (1ULL << 10) +/* flags used by BPF_FUNC_get_stack only. */ +#define BPF_F_USER_BUILD_ID (1ULL << 11) + +/* BPF_FUNC_skb_set_tunnel_key flags. */ +#define BPF_F_ZERO_CSUM_TX (1ULL << 1) +#define BPF_F_DONT_FRAGMENT (1ULL << 2) +#define BPF_F_SEQ_NUMBER (1ULL << 3) + +/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and + * BPF_FUNC_perf_event_read_value flags. + */ +#define BPF_F_INDEX_MASK 0xffffffffULL +#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +/* BPF_FUNC_perf_event_output for sk_buff input context. */ +#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + +/* Current network namespace */ +#define BPF_F_CURRENT_NETNS (-1L) + +/* Mode for BPF_FUNC_skb_adjust_room helper. */ +enum bpf_adj_room_mode { + BPF_ADJ_ROOM_NET, +}; + +/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ +enum bpf_hdr_start_off { + BPF_HDR_START_MAC, + BPF_HDR_START_NET, +}; + +/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ +enum bpf_lwt_encap_mode { + BPF_LWT_ENCAP_SEG6, + BPF_LWT_ENCAP_SEG6_INLINE +}; + +#define __bpf_md_ptr(type, name) \ +union { \ + type name; \ + __u64 :64; \ +} __attribute__((aligned(8))) + +/* user accessible mirror of in-kernel sk_buff. + * new fields can only be added to the end of this structure + */ +struct __sk_buff { + __u32 len; + __u32 pkt_type; + __u32 mark; + __u32 queue_mapping; + __u32 protocol; + __u32 vlan_present; + __u32 vlan_tci; + __u32 vlan_proto; + __u32 priority; + __u32 ingress_ifindex; + __u32 ifindex; + __u32 tc_index; + __u32 cb[5]; + __u32 hash; + __u32 tc_classid; + __u32 data; + __u32 data_end; + __u32 napi_id; + + /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ + /* ... here. */ + + __u32 data_meta; + __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); + __u64 tstamp; + __u32 wire_len; +}; + +struct bpf_tunnel_key { + __u32 tunnel_id; + union { + __u32 remote_ipv4; + __u32 remote_ipv6[4]; + }; + __u8 tunnel_tos; + __u8 tunnel_ttl; + __u16 tunnel_ext; /* Padding, future use. */ + __u32 tunnel_label; +}; + +/* user accessible mirror of in-kernel xfrm_state. + * new fields can only be added to the end of this structure + */ +struct bpf_xfrm_state { + __u32 reqid; + __u32 spi; /* Stored in network byte order */ + __u16 family; + __u16 ext; /* Padding, future use. */ + union { + __u32 remote_ipv4; /* Stored in network byte order */ + __u32 remote_ipv6[4]; /* Stored in network byte order */ + }; +}; + +/* Generic BPF return codes which all BPF program types may support. + * The values are binary compatible with their TC_ACT_* counter-part to + * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT + * programs. + * + * XDP is handled seprately, see XDP_*. + */ +enum bpf_ret_code { + BPF_OK = 0, + /* 1 reserved */ + BPF_DROP = 2, + /* 3-6 reserved */ + BPF_REDIRECT = 7, + /* >127 are reserved for prog type specific return codes */ +}; + +struct bpf_sock { + __u32 bound_dev_if; + __u32 family; + __u32 type; + __u32 protocol; + __u32 mark; + __u32 priority; + __u32 src_ip4; /* Allows 1,2,4-byte read. + * Stored in network byte order. + */ + __u32 src_ip6[4]; /* Allows 1,2,4-byte read. + * Stored in network byte order. + */ + __u32 src_port; /* Allows 4-byte read. + * Stored in host byte order + */ +}; + +struct bpf_sock_tuple { + union { + struct { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + } ipv4; + struct { + __be32 saddr[4]; + __be32 daddr[4]; + __be16 sport; + __be16 dport; + } ipv6; + }; +}; + +#define XDP_PACKET_HEADROOM 256 + +/* User return codes for XDP prog type. + * A valid XDP program must return one of these defined values. All other + * return codes are reserved for future use. Unknown return codes will + * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). + */ +enum xdp_action { + XDP_ABORTED = 0, + XDP_DROP, + XDP_PASS, + XDP_TX, + XDP_REDIRECT, +}; + +/* user accessible metadata for XDP packet hook + * new fields must be added to the end of this structure + */ +struct xdp_md { + __u32 data; + __u32 data_end; + __u32 data_meta; + /* Below access go through struct xdp_rxq_info */ + __u32 ingress_ifindex; /* rxq->dev->ifindex */ + __u32 rx_queue_index; /* rxq->queue_index */ +}; + +enum sk_action { + SK_DROP = 0, + SK_PASS, +}; + +/* user accessible metadata for SK_MSG packet hook, new fields must + * be added to the end of this structure + */ +struct sk_msg_md { + __bpf_md_ptr(void *, data); + __bpf_md_ptr(void *, data_end); + + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ + __u32 size; /* Total size of sk_msg */ +}; + +struct sk_reuseport_md { + /* + * Start of directly accessible data. It begins from + * the tcp/udp header. + */ + __bpf_md_ptr(void *, data); + /* End of directly accessible data */ + __bpf_md_ptr(void *, data_end); + /* + * Total length of packet (starting from the tcp/udp header). + * Note that the directly accessible bytes (data_end - data) + * could be less than this "len". Those bytes could be + * indirectly read by a helper "bpf_skb_load_bytes()". + */ + __u32 len; + /* + * Eth protocol in the mac header (network byte order). e.g. + * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD) + */ + __u32 eth_protocol; + __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ + __u32 bind_inany; /* Is sock bound to an INANY address? */ + __u32 hash; /* A hash of the packet 4 tuples */ +}; + +#define BPF_TAG_SIZE 8 + +struct bpf_prog_info { + __u32 type; + __u32 id; + __u8 tag[BPF_TAG_SIZE]; + __u32 jited_prog_len; + __u32 xlated_prog_len; + __aligned_u64 jited_prog_insns; + __aligned_u64 xlated_prog_insns; + __u64 load_time; /* ns since boottime */ + __u32 created_by_uid; + __u32 nr_map_ids; + __aligned_u64 map_ids; + char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u32 gpl_compatible:1; + __u64 netns_dev; + __u64 netns_ino; + __u32 nr_jited_ksyms; + __u32 nr_jited_func_lens; + __aligned_u64 jited_ksyms; + __aligned_u64 jited_func_lens; + __u32 btf_id; + __u32 func_info_rec_size; + __aligned_u64 func_info; + __u32 nr_func_info; + __u32 nr_line_info; + __aligned_u64 line_info; + __aligned_u64 jited_line_info; + __u32 nr_jited_line_info; + __u32 line_info_rec_size; + __u32 jited_line_info_rec_size; + __u32 nr_prog_tags; + __aligned_u64 prog_tags; +} __attribute__((aligned(8))); + +struct bpf_map_info { + __u32 type; + __u32 id; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 map_flags; + char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u32 :32; + __u64 netns_dev; + __u64 netns_ino; + __u32 btf_id; + __u32 btf_key_type_id; + __u32 btf_value_type_id; +} __attribute__((aligned(8))); + +struct bpf_btf_info { + __aligned_u64 btf; + __u32 btf_size; + __u32 id; +} __attribute__((aligned(8))); + +/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed + * by user and intended to be used by socket (e.g. to bind to, depends on + * attach attach type). + */ +struct bpf_sock_addr { + __u32 user_family; /* Allows 4-byte read, but no write. */ + __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write. + * Stored in network byte order. + */ + __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. + * Stored in network byte order. + */ + __u32 user_port; /* Allows 4-byte read and write. + * Stored in network byte order + */ + __u32 family; /* Allows 4-byte read, but no write */ + __u32 type; /* Allows 4-byte read, but no write */ + __u32 protocol; /* Allows 4-byte read, but no write */ + __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write. + * Stored in network byte order. + */ + __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write. + * Stored in network byte order. + */ +}; + +/* User bpf_sock_ops struct to access socket values and specify request ops + * and their replies. + * Some of this fields are in network (bigendian) byte order and may need + * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h). + * New fields can only be added at the end of this structure + */ +struct bpf_sock_ops { + __u32 op; + union { + __u32 args[4]; /* Optionally passed to bpf program */ + __u32 reply; /* Returned by bpf program */ + __u32 replylong[4]; /* Optionally returned by bpf prog */ + }; + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ + __u32 is_fullsock; /* Some TCP fields are only valid if + * there is a full socket. If not, the + * fields read as zero. + */ + __u32 snd_cwnd; + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ + __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ + __u32 state; + __u32 rtt_min; + __u32 snd_ssthresh; + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u32 mss_cache; + __u32 ecn_flags; + __u32 rate_delivered; + __u32 rate_interval_us; + __u32 packets_out; + __u32 retrans_out; + __u32 total_retrans; + __u32 segs_in; + __u32 data_segs_in; + __u32 segs_out; + __u32 data_segs_out; + __u32 lost_out; + __u32 sacked_out; + __u32 sk_txhash; + __u64 bytes_received; + __u64 bytes_acked; +}; + +/* Definitions for bpf_sock_ops_cb_flags */ +#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) +#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) +#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently + * supported cb flags + */ + +/* List of known BPF sock_ops operators. + * New entries can only be added at the end + */ +enum { + BPF_SOCK_OPS_VOID, + BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or + * -1 if default value should be used + */ + BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized + * window (in packets) or -1 if default + * value should be used + */ + BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an + * active connection is initialized + */ + BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an + * active connection is + * established + */ + BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a + * passive connection is + * established + */ + BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control + * needs ECN + */ + BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is + * based on the path and may be + * dependent on the congestion control + * algorithm. In general it indicates + * a congestion threshold. RTTs above + * this indicate congestion + */ + BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. + * Arg1: value of icsk_retransmits + * Arg2: value of icsk_rto + * Arg3: whether RTO has expired + */ + BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. + * Arg1: sequence number of 1st byte + * Arg2: # segments + * Arg3: return value of + * tcp_transmit_skb (0 => success) + */ + BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. + * Arg1: old_state + * Arg2: new_state + */ + BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after + * socket transition to LISTEN state. + */ +}; + +/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect + * changes between the TCP and BPF versions. Ideally this should never happen. + * If it does, we need to add code to convert them before calling + * the BPF sock_ops function. + */ +enum { + BPF_TCP_ESTABLISHED = 1, + BPF_TCP_SYN_SENT, + BPF_TCP_SYN_RECV, + BPF_TCP_FIN_WAIT1, + BPF_TCP_FIN_WAIT2, + BPF_TCP_TIME_WAIT, + BPF_TCP_CLOSE, + BPF_TCP_CLOSE_WAIT, + BPF_TCP_LAST_ACK, + BPF_TCP_LISTEN, + BPF_TCP_CLOSING, /* Now a valid state */ + BPF_TCP_NEW_SYN_RECV, + + BPF_TCP_MAX_STATES /* Leave at the end! */ +}; + +#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ +#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ + +struct bpf_perf_event_value { + __u64 counter; + __u64 enabled; + __u64 running; +}; + +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ + __u32 access_type; + __u32 major; + __u32 minor; +}; + +struct bpf_raw_tracepoint_args { + __u64 args[0]; +}; + +/* DIRECT: Skip the FIB rules and go to FIB table associated with device + * OUTPUT: Do lookup from egress perspective; default is ingress + */ +#define BPF_FIB_LOOKUP_DIRECT BIT(0) +#define BPF_FIB_LOOKUP_OUTPUT BIT(1) + +enum { + BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ + BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ + BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ + BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ + BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ + BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ +}; + +struct bpf_fib_lookup { + /* input: network family for lookup (AF_INET, AF_INET6) + * output: network family of egress nexthop + */ + __u8 family; + + /* set if lookup is to consider L4 data - e.g., FIB rules */ + __u8 l4_protocol; + __be16 sport; + __be16 dport; + + /* total length of packet from network header - used for MTU check */ + __u16 tot_len; + + /* input: L3 device index for lookup + * output: device index from FIB lookup + */ + __u32 ifindex; + + union { + /* inputs to lookup */ + __u8 tos; /* AF_INET */ + __be32 flowinfo; /* AF_INET6, flow_label + priority */ + + /* output: metric of fib result (IPv4/IPv6 only) */ + __u32 rt_metric; + }; + + union { + __be32 ipv4_src; + __u32 ipv6_src[4]; /* in6_addr; network order */ + }; + + /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in + * network header. output: bpf_fib_lookup sets to gateway address + * if FIB lookup returns gateway route + */ + union { + __be32 ipv4_dst; + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + __u8 smac[6]; /* ETH_ALEN */ + __u8 dmac[6]; /* ETH_ALEN */ +}; + +enum bpf_task_fd_type { + BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ + BPF_FD_TYPE_TRACEPOINT, /* tp name */ + BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */ + BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */ + BPF_FD_TYPE_UPROBE, /* filename + offset */ + BPF_FD_TYPE_URETPROBE, /* filename + offset */ +}; + +struct bpf_flow_keys { + __u16 nhoff; + __u16 thoff; + __u16 addr_proto; /* ETH_P_* of valid addrs */ + __u8 is_frag; + __u8 is_first_frag; + __u8 is_encap; + __u8 ip_proto; + __be16 n_proto; + __be16 sport; + __be16 dport; + union { + struct { + __be32 ipv4_src; + __be32 ipv4_dst; + }; + struct { + __u32 ipv6_src[4]; /* in6_addr; network order */ + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + }; +}; + +struct bpf_func_info { + __u32 insn_off; + __u32 type_id; +}; + +#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) +#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) + +struct bpf_line_info { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + +#endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/src/shared/linux/bpf_common.h b/src/shared/linux/bpf_common.h new file mode 100644 index 0000000..ee97668 --- /dev/null +++ b/src/shared/linux/bpf_common.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_BPF_COMMON_H__ +#define _UAPI__LINUX_BPF_COMMON_H__ + +/* Instruction classes */ +#define BPF_CLASS(code) ((code) & 0x07) +#define BPF_LD 0x00 +#define BPF_LDX 0x01 +#define BPF_ST 0x02 +#define BPF_STX 0x03 +#define BPF_ALU 0x04 +#define BPF_JMP 0x05 +#define BPF_RET 0x06 +#define BPF_MISC 0x07 + +/* ld/ldx fields */ +#define BPF_SIZE(code) ((code) & 0x18) +#define BPF_W 0x00 /* 32-bit */ +#define BPF_H 0x08 /* 16-bit */ +#define BPF_B 0x10 /* 8-bit */ +/* eBPF BPF_DW 0x18 64-bit */ +#define BPF_MODE(code) ((code) & 0xe0) +#define BPF_IMM 0x00 +#define BPF_ABS 0x20 +#define BPF_IND 0x40 +#define BPF_MEM 0x60 +#define BPF_LEN 0x80 +#define BPF_MSH 0xa0 + +/* alu/jmp fields */ +#define BPF_OP(code) ((code) & 0xf0) +#define BPF_ADD 0x00 +#define BPF_SUB 0x10 +#define BPF_MUL 0x20 +#define BPF_DIV 0x30 +#define BPF_OR 0x40 +#define BPF_AND 0x50 +#define BPF_LSH 0x60 +#define BPF_RSH 0x70 +#define BPF_NEG 0x80 +#define BPF_MOD 0x90 +#define BPF_XOR 0xa0 + +#define BPF_JA 0x00 +#define BPF_JEQ 0x10 +#define BPF_JGT 0x20 +#define BPF_JGE 0x30 +#define BPF_JSET 0x40 +#define BPF_SRC(code) ((code) & 0x08) +#define BPF_K 0x00 +#define BPF_X 0x08 + +#ifndef BPF_MAXINSNS +#define BPF_MAXINSNS 4096 +#endif + +#endif /* _UAPI__LINUX_BPF_COMMON_H__ */ diff --git a/src/shared/linux/bpf_insn.h b/src/shared/linux/bpf_insn.h new file mode 100644 index 0000000..d8d9fb2 --- /dev/null +++ b/src/shared/linux/bpf_insn.h @@ -0,0 +1,225 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* eBPF instruction mini library */ +#ifndef __BPF_INSN_H +#define __BPF_INSN_H + +struct bpf_insn; + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +#define BPF_ALU64_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ + +#define BPF_ALU64_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_ALU32_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Short form of mov, dst_reg = src_reg */ + +#define BPF_MOV64_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV32_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* Short form of mov, dst_reg = imm32 */ + +#define BPF_MOV64_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ +#define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) + +#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = (__u32) (IMM) }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((__u64) (IMM)) >> 32 }) + +#ifndef BPF_PSEUDO_MAP_FD +# define BPF_PSEUDO_MAP_FD 1 +#endif + +/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ +#define BPF_LD_MAP_FD(DST, MAP_FD) \ + BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) + + +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ + +#define BPF_LD_ABS(SIZE, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ + +#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = src_reg */ + +#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = imm32 */ + +#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ + +#define BPF_JMP_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ + +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +#define BPF_JMP_A(OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + +/* Raw code statement block */ + +#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = CODE, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = IMM }) + +/* Program exit */ + +#define BPF_EXIT_INSN() \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_EXIT, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +#endif diff --git a/src/shared/linux/dm-ioctl.h b/src/shared/linux/dm-ioctl.h new file mode 100644 index 0000000..b3aeec7 --- /dev/null +++ b/src/shared/linux/dm-ioctl.h @@ -0,0 +1,363 @@ +/* SPDX-License-Identifier: LGPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited. + * Copyright (C) 2004 - 2009 Red Hat, Inc. All rights reserved. + * + * This file is released under the LGPL. + */ + +#ifndef _LINUX_DM_IOCTL_V4_H +#define _LINUX_DM_IOCTL_V4_H + +#include <linux/types.h> + +#define DM_DIR "mapper" /* Slashes not supported */ +#define DM_CONTROL_NODE "control" +#define DM_MAX_TYPE_NAME 16 +#define DM_NAME_LEN 128 +#define DM_UUID_LEN 129 + +/* + * A traditional ioctl interface for the device mapper. + * + * Each device can have two tables associated with it, an + * 'active' table which is the one currently used by io passing + * through the device, and an 'inactive' one which is a table + * that is being prepared as a replacement for the 'active' one. + * + * DM_VERSION: + * Just get the version information for the ioctl interface. + * + * DM_REMOVE_ALL: + * Remove all dm devices, destroy all tables. Only really used + * for debug. + * + * DM_LIST_DEVICES: + * Get a list of all the dm device names. + * + * DM_DEV_CREATE: + * Create a new device, neither the 'active' or 'inactive' table + * slots will be filled. The device will be in suspended state + * after creation, however any io to the device will get errored + * since it will be out-of-bounds. + * + * DM_DEV_REMOVE: + * Remove a device, destroy any tables. + * + * DM_DEV_RENAME: + * Rename a device or set its uuid if none was previously supplied. + * + * DM_SUSPEND: + * This performs both suspend and resume, depending which flag is + * passed in. + * Suspend: This command will not return until all pending io to + * the device has completed. Further io will be deferred until + * the device is resumed. + * Resume: It is no longer an error to issue this command on an + * unsuspended device. If a table is present in the 'inactive' + * slot, it will be moved to the active slot, then the old table + * from the active slot will be _destroyed_. Finally the device + * is resumed. + * + * DM_DEV_STATUS: + * Retrieves the status for the table in the 'active' slot. + * + * DM_DEV_WAIT: + * Wait for a significant event to occur to the device. This + * could either be caused by an event triggered by one of the + * targets of the table in the 'active' slot, or a table change. + * + * DM_TABLE_LOAD: + * Load a table into the 'inactive' slot for the device. The + * device does _not_ need to be suspended prior to this command. + * + * DM_TABLE_CLEAR: + * Destroy any table in the 'inactive' slot (ie. abort). + * + * DM_TABLE_DEPS: + * Return a set of device dependencies for the 'active' table. + * + * DM_TABLE_STATUS: + * Return the targets status for the 'active' table. + * + * DM_TARGET_MSG: + * Pass a message string to the target at a specific offset of a device. + * + * DM_DEV_SET_GEOMETRY: + * Set the geometry of a device by passing in a string in this format: + * + * "cylinders heads sectors_per_track start_sector" + * + * Beware that CHS geometry is nearly obsolete and only provided + * for compatibility with dm devices that can be booted by a PC + * BIOS. See struct hd_geometry for range limits. Also note that + * the geometry is erased if the device size changes. + */ + +/* + * All ioctl arguments consist of a single chunk of memory, with + * this structure at the start. If a uuid is specified any + * lookup (eg. for a DM_INFO) will be done on that, *not* the + * name. + */ +struct dm_ioctl { + /* + * The version number is made up of three parts: + * major - no backward or forward compatibility, + * minor - only backwards compatible, + * patch - both backwards and forwards compatible. + * + * All clients of the ioctl interface should fill in the + * version number of the interface that they were + * compiled with. + * + * All recognised ioctl commands (ie. those that don't + * return -ENOTTY) fill out this field, even if the + * command failed. + */ + __u32 version[3]; /* in/out */ + __u32 data_size; /* total size of data passed in + * including this struct */ + + __u32 data_start; /* offset to start of data + * relative to start of this struct */ + + __u32 target_count; /* in/out */ + __s32 open_count; /* out */ + __u32 flags; /* in/out */ + + /* + * event_nr holds either the event number (input and output) or the + * udev cookie value (input only). + * The DM_DEV_WAIT ioctl takes an event number as input. + * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls + * use the field as a cookie to return in the DM_COOKIE + * variable with the uevents they issue. + * For output, the ioctls return the event number, not the cookie. + */ + __u32 event_nr; /* in/out */ + __u32 padding; + + __u64 dev; /* in/out */ + + char name[DM_NAME_LEN]; /* device name */ + char uuid[DM_UUID_LEN]; /* unique identifier for + * the block device */ + char data[7]; /* padding or data */ +}; + +/* + * Used to specify tables. These structures appear after the + * dm_ioctl. + */ +struct dm_target_spec { + __u64 sector_start; + __u64 length; + __s32 status; /* used when reading from kernel only */ + + /* + * Location of the next dm_target_spec. + * - When specifying targets on a DM_TABLE_LOAD command, this value is + * the number of bytes from the start of the "current" dm_target_spec + * to the start of the "next" dm_target_spec. + * - When retrieving targets on a DM_TABLE_STATUS command, this value + * is the number of bytes from the start of the first dm_target_spec + * (that follows the dm_ioctl struct) to the start of the "next" + * dm_target_spec. + */ + __u32 next; + + char target_type[DM_MAX_TYPE_NAME]; + + /* + * Parameter string starts immediately after this object. + * Be careful to add padding after string to ensure correct + * alignment of subsequent dm_target_spec. + */ +}; + +/* + * Used to retrieve the target dependencies. + */ +struct dm_target_deps { + __u32 count; /* Array size */ + __u32 padding; /* unused */ + __u64 dev[0]; /* out */ +}; + +/* + * Used to get a list of all dm devices. + */ +struct dm_name_list { + __u64 dev; + __u32 next; /* offset to the next record from + the _start_ of this */ + char name[0]; +}; + +/* + * Used to retrieve the target versions + */ +struct dm_target_versions { + __u32 next; + __u32 version[3]; + + char name[0]; +}; + +/* + * Used to pass message to a target + */ +struct dm_target_msg { + __u64 sector; /* Device sector */ + + char message[0]; +}; + +/* + * If you change this make sure you make the corresponding change + * to dm-ioctl.c:lookup_ioctl() + */ +enum { + /* Top level cmds */ + DM_VERSION_CMD = 0, + DM_REMOVE_ALL_CMD, + DM_LIST_DEVICES_CMD, + + /* device level cmds */ + DM_DEV_CREATE_CMD, + DM_DEV_REMOVE_CMD, + DM_DEV_RENAME_CMD, + DM_DEV_SUSPEND_CMD, + DM_DEV_STATUS_CMD, + DM_DEV_WAIT_CMD, + + /* Table level cmds */ + DM_TABLE_LOAD_CMD, + DM_TABLE_CLEAR_CMD, + DM_TABLE_DEPS_CMD, + DM_TABLE_STATUS_CMD, + + /* Added later */ + DM_LIST_VERSIONS_CMD, + DM_TARGET_MSG_CMD, + DM_DEV_SET_GEOMETRY_CMD, + DM_DEV_ARM_POLL_CMD, +}; + +#define DM_IOCTL 0xfd + +#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) +#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl) +#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl) + +#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl) +#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl) +#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl) +#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl) +#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl) +#define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl) +#define DM_DEV_ARM_POLL _IOWR(DM_IOCTL, DM_DEV_ARM_POLL_CMD, struct dm_ioctl) + +#define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl) +#define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl) +#define DM_TABLE_DEPS _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl) +#define DM_TABLE_STATUS _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl) + +#define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl) + +#define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl) +#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) + +#define DM_VERSION_MAJOR 4 +#define DM_VERSION_MINOR 27 +#define DM_VERSION_PATCHLEVEL 0 +#define DM_VERSION_EXTRA "-ioctl (2019-01-18)" + +/* Status bits */ +#define DM_READONLY_FLAG (1 << 0) /* In/Out */ +#define DM_SUSPEND_FLAG (1 << 1) /* In/Out */ +#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */ + +/* + * Flag passed into ioctl STATUS command to get table information + * rather than current status. + */ +#define DM_STATUS_TABLE_FLAG (1 << 4) /* In */ + +/* + * Flags that indicate whether a table is present in either of + * the two table slots that a device has. + */ +#define DM_ACTIVE_PRESENT_FLAG (1 << 5) /* Out */ +#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */ + +/* + * Indicates that the buffer passed in wasn't big enough for the + * results. + */ +#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */ + +/* + * This flag is now ignored. + */ +#define DM_SKIP_BDGET_FLAG (1 << 9) /* In */ + +/* + * Set this to avoid attempting to freeze any filesystem when suspending. + */ +#define DM_SKIP_LOCKFS_FLAG (1 << 10) /* In */ + +/* + * Set this to suspend without flushing queued ios. + * Also disables flushing uncommitted changes in the thin target before + * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT. + */ +#define DM_NOFLUSH_FLAG (1 << 11) /* In */ + +/* + * If set, any table information returned will relate to the inactive + * table instead of the live one. Always check DM_INACTIVE_PRESENT_FLAG + * is set before using the data returned. + */ +#define DM_QUERY_INACTIVE_TABLE_FLAG (1 << 12) /* In */ + +/* + * If set, a uevent was generated for which the caller may need to wait. + */ +#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */ + +/* + * If set, rename changes the uuid not the name. Only permitted + * if no uuid was previously supplied: an existing uuid cannot be changed. + */ +#define DM_UUID_FLAG (1 << 14) /* In */ + +/* + * If set, all buffers are wiped after use. Use when sending + * or requesting sensitive data such as an encryption key. + */ +#define DM_SECURE_DATA_FLAG (1 << 15) /* In */ + +/* + * If set, a message generated output data. + */ +#define DM_DATA_OUT_FLAG (1 << 16) /* Out */ + +/* + * If set with DM_DEV_REMOVE or DM_REMOVE_ALL this indicates that if + * the device cannot be removed immediately because it is still in use + * it should instead be scheduled for removal when it gets closed. + * + * On return from DM_DEV_REMOVE, DM_DEV_STATUS or other ioctls, this + * flag indicates that the device is scheduled to be removed when it + * gets closed. + */ +#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */ + +/* + * If set, the device is suspended internally. + */ +#define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */ + +#endif /* _LINUX_DM_IOCTL_H */ diff --git a/src/shared/linux/ethtool.h b/src/shared/linux/ethtool.h new file mode 100644 index 0000000..b06c630 --- /dev/null +++ b/src/shared/linux/ethtool.h @@ -0,0 +1,2021 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * ethtool.h: Defines for Linux ethtool. + * + * Copyright (C) 1998 David S. Miller (davem@redhat.com) + * Copyright 2001 Jeff Garzik <jgarzik@pobox.com> + * Portions Copyright 2001 Sun Microsystems (thockin@sun.com) + * Portions Copyright 2002 Intel (eli.kupermann@intel.com, + * christopher.leech@intel.com, + * scott.feldman@intel.com) + * Portions Copyright (C) Sun Microsystems 2008 + */ + +#ifndef _UAPI_LINUX_ETHTOOL_H +#define _UAPI_LINUX_ETHTOOL_H + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/if_ether.h> + +#ifndef __KERNEL__ +#include <limits.h> /* for INT_MAX */ +#endif + +#ifndef __KERNEL_DIV_ROUND_UP +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#endif + +/* All structures exposed to userland should be defined such that they + * have the same layout for 32-bit and 64-bit userland. + */ + +/** + * struct ethtool_cmd - DEPRECATED, link control and status + * This structure is DEPRECATED, please use struct ethtool_link_settings. + * @cmd: Command number = %ETHTOOL_GSET or %ETHTOOL_SSET + * @supported: Bitmask of %SUPPORTED_* flags for the link modes, + * physical connectors and other link features for which the + * interface supports autonegotiation or auto-detection. + * Read-only. + * @advertising: Bitmask of %ADVERTISED_* flags for the link modes, + * physical connectors and other link features that are + * advertised through autonegotiation or enabled for + * auto-detection. + * @speed: Low bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN + * @duplex: Duplex mode; one of %DUPLEX_* + * @port: Physical connector type; one of %PORT_* + * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not + * applicable. For clause 45 PHYs this is the PRTAD. + * @transceiver: Historically used to distinguish different possible + * PHY types, but not in a consistent way. Deprecated. + * @autoneg: Enable/disable autonegotiation and auto-detection; + * either %AUTONEG_DISABLE or %AUTONEG_ENABLE + * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO + * protocols supported by the interface; 0 if unknown. + * Read-only. + * @maxtxpkt: Historically used to report TX IRQ coalescing; now + * obsoleted by &struct ethtool_coalesce. Read-only; deprecated. + * @maxrxpkt: Historically used to report RX IRQ coalescing; now + * obsoleted by &struct ethtool_coalesce. Read-only; deprecated. + * @speed_hi: High bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN + * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of + * %ETH_TP_MDI_*. If the status is unknown or not applicable, the + * value will be %ETH_TP_MDI_INVALID. Read-only. + * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of + * %ETH_TP_MDI_*. If MDI(-X) control is not implemented, reads + * yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected. + * When written successfully, the link should be renegotiated if + * necessary. + * @lp_advertising: Bitmask of %ADVERTISED_* flags for the link modes + * and other link features that the link partner advertised + * through autonegotiation; 0 if unknown or not applicable. + * Read-only. + * + * The link speed in Mbps is split between @speed and @speed_hi. Use + * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to + * access it. + * + * If autonegotiation is disabled, the speed and @duplex represent the + * fixed link mode and are writable if the driver supports multiple + * link modes. If it is enabled then they are read-only; if the link + * is up they represent the negotiated link mode; if the link is down, + * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and + * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode. + * + * Some hardware interfaces may have multiple PHYs and/or physical + * connectors fitted or do not allow the driver to detect which are + * fitted. For these interfaces @port and/or @phy_address may be + * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE. + * Otherwise, attempts to write different values may be ignored or + * rejected. + * + * Users should assume that all fields not marked read-only are + * writable and subject to validation by the driver. They should use + * %ETHTOOL_GSET to get the current values before making specific + * changes and then applying them with %ETHTOOL_SSET. + * + * Deprecated fields should be ignored by both users and drivers. + */ +struct ethtool_cmd { + __u32 cmd; + __u32 supported; + __u32 advertising; + __u16 speed; + __u8 duplex; + __u8 port; + __u8 phy_address; + __u8 transceiver; + __u8 autoneg; + __u8 mdio_support; + __u32 maxtxpkt; + __u32 maxrxpkt; + __u16 speed_hi; + __u8 eth_tp_mdix; + __u8 eth_tp_mdix_ctrl; + __u32 lp_advertising; + __u32 reserved[2]; +}; + +static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep, + __u32 speed) +{ + ep->speed = (__u16)(speed & 0xFFFF); + ep->speed_hi = (__u16)(speed >> 16); +} + +static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep) +{ + return ((__u32) ep->speed_hi << 16) | (__u32) ep->speed; +} + +/* Device supports clause 22 register access to PHY or peripherals + * using the interface defined in <linux/mii.h>. This should not be + * set if there are known to be no such peripherals present or if + * the driver only emulates clause 22 registers for compatibility. + */ +#define ETH_MDIO_SUPPORTS_C22 1 + +/* Device supports clause 45 register access to PHY or peripherals + * using the interface defined in <linux/mii.h> and <linux/mdio.h>. + * This should not be set if there are known to be no such peripherals + * present. + */ +#define ETH_MDIO_SUPPORTS_C45 2 + +#define ETHTOOL_FWVERS_LEN 32 +#define ETHTOOL_BUSINFO_LEN 32 +#define ETHTOOL_EROMVERS_LEN 32 + +/** + * struct ethtool_drvinfo - general driver and device information + * @cmd: Command number = %ETHTOOL_GDRVINFO + * @driver: Driver short name. This should normally match the name + * in its bus driver structure (e.g. pci_driver::name). Must + * not be an empty string. + * @version: Driver version string; may be an empty string + * @fw_version: Firmware version string; may be an empty string + * @erom_version: Expansion ROM version string; may be an empty string + * @bus_info: Device bus address. This should match the dev_name() + * string for the underlying bus device, if there is one. May be + * an empty string. + * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and + * %ETHTOOL_SPFLAGS commands; also the number of strings in the + * %ETH_SS_PRIV_FLAGS set + * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS + * command; also the number of strings in the %ETH_SS_STATS set + * @testinfo_len: Number of results returned by the %ETHTOOL_TEST + * command; also the number of strings in the %ETH_SS_TEST set + * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM + * and %ETHTOOL_SEEPROM commands, in bytes + * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS + * command, in bytes + * + * Users can use the %ETHTOOL_GSSET_INFO command to get the number of + * strings in any string set (from Linux 2.6.34). + * + * Drivers should set at most @driver, @version, @fw_version and + * @bus_info in their get_drvinfo() implementation. The ethtool + * core fills in the other fields using other driver operations. + */ +struct ethtool_drvinfo { + __u32 cmd; + char driver[32]; + char version[32]; + char fw_version[ETHTOOL_FWVERS_LEN]; + char bus_info[ETHTOOL_BUSINFO_LEN]; + char erom_version[ETHTOOL_EROMVERS_LEN]; + char reserved2[12]; + __u32 n_priv_flags; + __u32 n_stats; + __u32 testinfo_len; + __u32 eedump_len; + __u32 regdump_len; +}; + +#define SOPASS_MAX 6 + +/** + * struct ethtool_wolinfo - Wake-On-Lan configuration + * @cmd: Command number = %ETHTOOL_GWOL or %ETHTOOL_SWOL + * @supported: Bitmask of %WAKE_* flags for supported Wake-On-Lan modes. + * Read-only. + * @wolopts: Bitmask of %WAKE_* flags for enabled Wake-On-Lan modes. + * @sopass: SecureOn(tm) password; meaningful only if %WAKE_MAGICSECURE + * is set in @wolopts. + */ +struct ethtool_wolinfo { + __u32 cmd; + __u32 supported; + __u32 wolopts; + __u8 sopass[SOPASS_MAX]; +}; + +/* for passing single values */ +struct ethtool_value { + __u32 cmd; + __u32 data; +}; + +#define PFC_STORM_PREVENTION_AUTO 0xffff +#define PFC_STORM_PREVENTION_DISABLE 0 + +enum tunable_id { + ETHTOOL_ID_UNSPEC, + ETHTOOL_RX_COPYBREAK, + ETHTOOL_TX_COPYBREAK, + ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ + /* + * Add your fresh new tunable attribute above and remember to update + * tunable_strings[] in net/core/ethtool.c + */ + __ETHTOOL_TUNABLE_COUNT, +}; + +enum tunable_type_id { + ETHTOOL_TUNABLE_UNSPEC, + ETHTOOL_TUNABLE_U8, + ETHTOOL_TUNABLE_U16, + ETHTOOL_TUNABLE_U32, + ETHTOOL_TUNABLE_U64, + ETHTOOL_TUNABLE_STRING, + ETHTOOL_TUNABLE_S8, + ETHTOOL_TUNABLE_S16, + ETHTOOL_TUNABLE_S32, + ETHTOOL_TUNABLE_S64, +}; + +struct ethtool_tunable { + __u32 cmd; + __u32 id; + __u32 type_id; + __u32 len; + void *data[0]; +}; + +#define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff +#define DOWNSHIFT_DEV_DISABLE 0 + +/* Time in msecs after which link is reported as down + * 0 = lowest time supported by the PHY + * 0xff = off, link down detection according to standard + */ +#define ETHTOOL_PHY_FAST_LINK_DOWN_ON 0 +#define ETHTOOL_PHY_FAST_LINK_DOWN_OFF 0xff + +/* Energy Detect Power Down (EDPD) is a feature supported by some PHYs, where + * the PHY's RX & TX blocks are put into a low-power mode when there is no + * link detected (typically cable is un-plugged). For RX, only a minimal + * link-detection is available, and for TX the PHY wakes up to send link pulses + * to avoid any lock-ups in case the peer PHY may also be running in EDPD mode. + * + * Some PHYs may support configuration of the wake-up interval for TX pulses, + * and some PHYs may support only disabling TX pulses entirely. For the latter + * a special value is required (ETHTOOL_PHY_EDPD_NO_TX) so that this can be + * configured from userspace (should the user want it). + * + * The interval units for TX wake-up are in milliseconds, since this should + * cover a reasonable range of intervals: + * - from 1 millisecond, which does not sound like much of a power-saver + * - to ~65 seconds which is quite a lot to wait for a link to come up when + * plugging a cable + */ +#define ETHTOOL_PHY_EDPD_DFLT_TX_MSECS 0xffff +#define ETHTOOL_PHY_EDPD_NO_TX 0xfffe +#define ETHTOOL_PHY_EDPD_DISABLE 0 + +enum phy_tunable_id { + ETHTOOL_PHY_ID_UNSPEC, + ETHTOOL_PHY_DOWNSHIFT, + ETHTOOL_PHY_FAST_LINK_DOWN, + ETHTOOL_PHY_EDPD, + /* + * Add your fresh new phy tunable attribute above and remember to update + * phy_tunable_strings[] in net/core/ethtool.c + */ + __ETHTOOL_PHY_TUNABLE_COUNT, +}; + +/** + * struct ethtool_regs - hardware register dump + * @cmd: Command number = %ETHTOOL_GREGS + * @version: Dump format version. This is driver-specific and may + * distinguish different chips/revisions. Drivers must use new + * version numbers whenever the dump format changes in an + * incompatible way. + * @len: On entry, the real length of @data. On return, the number of + * bytes used. + * @data: Buffer for the register dump + * + * Users should use %ETHTOOL_GDRVINFO to find the maximum length of + * a register dump for the interface. They must allocate the buffer + * immediately following this structure. + */ +struct ethtool_regs { + __u32 cmd; + __u32 version; + __u32 len; + __u8 data[0]; +}; + +/** + * struct ethtool_eeprom - EEPROM dump + * @cmd: Command number = %ETHTOOL_GEEPROM, %ETHTOOL_GMODULEEEPROM or + * %ETHTOOL_SEEPROM + * @magic: A 'magic cookie' value to guard against accidental changes. + * The value passed in to %ETHTOOL_SEEPROM must match the value + * returned by %ETHTOOL_GEEPROM for the same device. This is + * unused when @cmd is %ETHTOOL_GMODULEEEPROM. + * @offset: Offset within the EEPROM to begin reading/writing, in bytes + * @len: On entry, number of bytes to read/write. On successful + * return, number of bytes actually read/written. In case of + * error, this may indicate at what point the error occurred. + * @data: Buffer to read/write from + * + * Users may use %ETHTOOL_GDRVINFO or %ETHTOOL_GMODULEINFO to find + * the length of an on-board or module EEPROM, respectively. They + * must allocate the buffer immediately following this structure. + */ +struct ethtool_eeprom { + __u32 cmd; + __u32 magic; + __u32 offset; + __u32 len; + __u8 data[0]; +}; + +/** + * struct ethtool_eee - Energy Efficient Ethernet information + * @cmd: ETHTOOL_{G,S}EEE + * @supported: Mask of %SUPPORTED_* flags for the speed/duplex combinations + * for which there is EEE support. + * @advertised: Mask of %ADVERTISED_* flags for the speed/duplex combinations + * advertised as eee capable. + * @lp_advertised: Mask of %ADVERTISED_* flags for the speed/duplex + * combinations advertised by the link partner as eee capable. + * @eee_active: Result of the eee auto negotiation. + * @eee_enabled: EEE configured mode (enabled/disabled). + * @tx_lpi_enabled: Whether the interface should assert its tx lpi, given + * that eee was negotiated. + * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting + * its tx lpi (after reaching 'idle' state). Effective only when eee + * was negotiated and tx_lpi_enabled was set. + */ +struct ethtool_eee { + __u32 cmd; + __u32 supported; + __u32 advertised; + __u32 lp_advertised; + __u32 eee_active; + __u32 eee_enabled; + __u32 tx_lpi_enabled; + __u32 tx_lpi_timer; + __u32 reserved[2]; +}; + +/** + * struct ethtool_modinfo - plugin module eeprom information + * @cmd: %ETHTOOL_GMODULEINFO + * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx + * @eeprom_len: Length of the eeprom + * + * This structure is used to return the information to + * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM. + * The type code indicates the eeprom data format + */ +struct ethtool_modinfo { + __u32 cmd; + __u32 type; + __u32 eeprom_len; + __u32 reserved[8]; +}; + +/** + * struct ethtool_coalesce - coalescing parameters for IRQs and stats updates + * @cmd: ETHTOOL_{G,S}COALESCE + * @rx_coalesce_usecs: How many usecs to delay an RX interrupt after + * a packet arrives. + * @rx_max_coalesced_frames: Maximum number of packets to receive + * before an RX interrupt. + * @rx_coalesce_usecs_irq: Same as @rx_coalesce_usecs, except that + * this value applies while an IRQ is being serviced by the host. + * @rx_max_coalesced_frames_irq: Same as @rx_max_coalesced_frames, + * except that this value applies while an IRQ is being serviced + * by the host. + * @tx_coalesce_usecs: How many usecs to delay a TX interrupt after + * a packet is sent. + * @tx_max_coalesced_frames: Maximum number of packets to be sent + * before a TX interrupt. + * @tx_coalesce_usecs_irq: Same as @tx_coalesce_usecs, except that + * this value applies while an IRQ is being serviced by the host. + * @tx_max_coalesced_frames_irq: Same as @tx_max_coalesced_frames, + * except that this value applies while an IRQ is being serviced + * by the host. + * @stats_block_coalesce_usecs: How many usecs to delay in-memory + * statistics block updates. Some drivers do not have an + * in-memory statistic block, and in such cases this value is + * ignored. This value must not be zero. + * @use_adaptive_rx_coalesce: Enable adaptive RX coalescing. + * @use_adaptive_tx_coalesce: Enable adaptive TX coalescing. + * @pkt_rate_low: Threshold for low packet rate (packets per second). + * @rx_coalesce_usecs_low: How many usecs to delay an RX interrupt after + * a packet arrives, when the packet rate is below @pkt_rate_low. + * @rx_max_coalesced_frames_low: Maximum number of packets to be received + * before an RX interrupt, when the packet rate is below @pkt_rate_low. + * @tx_coalesce_usecs_low: How many usecs to delay a TX interrupt after + * a packet is sent, when the packet rate is below @pkt_rate_low. + * @tx_max_coalesced_frames_low: Maximum nuumber of packets to be sent before + * a TX interrupt, when the packet rate is below @pkt_rate_low. + * @pkt_rate_high: Threshold for high packet rate (packets per second). + * @rx_coalesce_usecs_high: How many usecs to delay an RX interrupt after + * a packet arrives, when the packet rate is above @pkt_rate_high. + * @rx_max_coalesced_frames_high: Maximum number of packets to be received + * before an RX interrupt, when the packet rate is above @pkt_rate_high. + * @tx_coalesce_usecs_high: How many usecs to delay a TX interrupt after + * a packet is sent, when the packet rate is above @pkt_rate_high. + * @tx_max_coalesced_frames_high: Maximum number of packets to be sent before + * a TX interrupt, when the packet rate is above @pkt_rate_high. + * @rate_sample_interval: How often to do adaptive coalescing packet rate + * sampling, measured in seconds. Must not be zero. + * + * Each pair of (usecs, max_frames) fields specifies that interrupts + * should be coalesced until + * (usecs > 0 && time_since_first_completion >= usecs) || + * (max_frames > 0 && completed_frames >= max_frames) + * + * It is illegal to set both usecs and max_frames to zero as this + * would cause interrupts to never be generated. To disable + * coalescing, set usecs = 0 and max_frames = 1. + * + * Some implementations ignore the value of max_frames and use the + * condition time_since_first_completion >= usecs + * + * This is deprecated. Drivers for hardware that does not support + * counting completions should validate that max_frames == !rx_usecs. + * + * Adaptive RX/TX coalescing is an algorithm implemented by some + * drivers to improve latency under low packet rates and improve + * throughput under high packet rates. Some drivers only implement + * one of RX or TX adaptive coalescing. Anything not implemented by + * the driver causes these values to be silently ignored. + * + * When the packet rate is below @pkt_rate_high but above + * @pkt_rate_low (both measured in packets per second) the + * normal {rx,tx}_* coalescing parameters are used. + */ +struct ethtool_coalesce { + __u32 cmd; + __u32 rx_coalesce_usecs; + __u32 rx_max_coalesced_frames; + __u32 rx_coalesce_usecs_irq; + __u32 rx_max_coalesced_frames_irq; + __u32 tx_coalesce_usecs; + __u32 tx_max_coalesced_frames; + __u32 tx_coalesce_usecs_irq; + __u32 tx_max_coalesced_frames_irq; + __u32 stats_block_coalesce_usecs; + __u32 use_adaptive_rx_coalesce; + __u32 use_adaptive_tx_coalesce; + __u32 pkt_rate_low; + __u32 rx_coalesce_usecs_low; + __u32 rx_max_coalesced_frames_low; + __u32 tx_coalesce_usecs_low; + __u32 tx_max_coalesced_frames_low; + __u32 pkt_rate_high; + __u32 rx_coalesce_usecs_high; + __u32 rx_max_coalesced_frames_high; + __u32 tx_coalesce_usecs_high; + __u32 tx_max_coalesced_frames_high; + __u32 rate_sample_interval; +}; + +/** + * struct ethtool_ringparam - RX/TX ring parameters + * @cmd: Command number = %ETHTOOL_GRINGPARAM or %ETHTOOL_SRINGPARAM + * @rx_max_pending: Maximum supported number of pending entries per + * RX ring. Read-only. + * @rx_mini_max_pending: Maximum supported number of pending entries + * per RX mini ring. Read-only. + * @rx_jumbo_max_pending: Maximum supported number of pending entries + * per RX jumbo ring. Read-only. + * @tx_max_pending: Maximum supported number of pending entries per + * TX ring. Read-only. + * @rx_pending: Current maximum number of pending entries per RX ring + * @rx_mini_pending: Current maximum number of pending entries per RX + * mini ring + * @rx_jumbo_pending: Current maximum number of pending entries per RX + * jumbo ring + * @tx_pending: Current maximum supported number of pending entries + * per TX ring + * + * If the interface does not have separate RX mini and/or jumbo rings, + * @rx_mini_max_pending and/or @rx_jumbo_max_pending will be 0. + * + * There may also be driver-dependent minimum values for the number + * of entries per ring. + */ +struct ethtool_ringparam { + __u32 cmd; + __u32 rx_max_pending; + __u32 rx_mini_max_pending; + __u32 rx_jumbo_max_pending; + __u32 tx_max_pending; + __u32 rx_pending; + __u32 rx_mini_pending; + __u32 rx_jumbo_pending; + __u32 tx_pending; +}; + +/** + * struct ethtool_channels - configuring number of network channel + * @cmd: ETHTOOL_{G,S}CHANNELS + * @max_rx: Read only. Maximum number of receive channel the driver support. + * @max_tx: Read only. Maximum number of transmit channel the driver support. + * @max_other: Read only. Maximum number of other channel the driver support. + * @max_combined: Read only. Maximum number of combined channel the driver + * support. Set of queues RX, TX or other. + * @rx_count: Valid values are in the range 1 to the max_rx. + * @tx_count: Valid values are in the range 1 to the max_tx. + * @other_count: Valid values are in the range 1 to the max_other. + * @combined_count: Valid values are in the range 1 to the max_combined. + * + * This can be used to configure RX, TX and other channels. + */ + +struct ethtool_channels { + __u32 cmd; + __u32 max_rx; + __u32 max_tx; + __u32 max_other; + __u32 max_combined; + __u32 rx_count; + __u32 tx_count; + __u32 other_count; + __u32 combined_count; +}; + +/** + * struct ethtool_pauseparam - Ethernet pause (flow control) parameters + * @cmd: Command number = %ETHTOOL_GPAUSEPARAM or %ETHTOOL_SPAUSEPARAM + * @autoneg: Flag to enable autonegotiation of pause frame use + * @rx_pause: Flag to enable reception of pause frames + * @tx_pause: Flag to enable transmission of pause frames + * + * Drivers should reject a non-zero setting of @autoneg when + * autoneogotiation is disabled (or not supported) for the link. + * + * If the link is autonegotiated, drivers should use + * mii_advertise_flowctrl() or similar code to set the advertised + * pause frame capabilities based on the @rx_pause and @tx_pause flags, + * even if @autoneg is zero. They should also allow the advertised + * pause frame capabilities to be controlled directly through the + * advertising field of &struct ethtool_cmd. + * + * If @autoneg is non-zero, the MAC is configured to send and/or + * receive pause frames according to the result of autonegotiation. + * Otherwise, it is configured directly based on the @rx_pause and + * @tx_pause flags. + */ +struct ethtool_pauseparam { + __u32 cmd; + __u32 autoneg; + __u32 rx_pause; + __u32 tx_pause; +}; + +/** + * enum ethtool_link_ext_state - link extended state + */ +enum ethtool_link_ext_state { + ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_STATE_NO_CABLE, + ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, + ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, + ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, + ETHTOOL_LINK_EXT_STATE_OVERHEAT, +}; + +/** + * enum ethtool_link_ext_substate_autoneg - more information in addition to + * ETHTOOL_LINK_EXT_STATE_AUTONEG. + */ +enum ethtool_link_ext_substate_autoneg { + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1, + ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE, + ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD, +}; + +/** + * enum ethtool_link_ext_substate_link_training - more information in addition to + * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. + */ +enum ethtool_link_ext_substate_link_training { + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY, + ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT, +}; + +/** + * enum ethtool_link_ext_substate_logical_mismatch - more information in addition + * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. + */ +enum ethtool_link_ext_substate_link_logical_mismatch { + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED, +}; + +/** + * enum ethtool_link_ext_substate_bad_signal_integrity - more information in + * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. + */ +enum ethtool_link_ext_substate_bad_signal_integrity { + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, +}; + +/** + * enum ethtool_link_ext_substate_cable_issue - more information in + * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. + */ +enum ethtool_link_ext_substate_cable_issue { + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1, + ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, +}; + +#define ETH_GSTRING_LEN 32 + +/** + * enum ethtool_stringset - string set ID + * @ETH_SS_TEST: Self-test result names, for use with %ETHTOOL_TEST + * @ETH_SS_STATS: Statistic names, for use with %ETHTOOL_GSTATS + * @ETH_SS_PRIV_FLAGS: Driver private flag names, for use with + * %ETHTOOL_GPFLAGS and %ETHTOOL_SPFLAGS + * @ETH_SS_NTUPLE_FILTERS: Previously used with %ETHTOOL_GRXNTUPLE; + * now deprecated + * @ETH_SS_FEATURES: Device feature names + * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names + * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS + * @ETH_SS_PHY_TUNABLES: PHY tunable names + * @ETH_SS_LINK_MODES: link mode names + * @ETH_SS_MSG_CLASSES: debug message class names + * @ETH_SS_WOL_MODES: wake-on-lan modes + * @ETH_SS_SOF_TIMESTAMPING: SOF_TIMESTAMPING_* flags + * @ETH_SS_TS_TX_TYPES: timestamping Tx types + * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters + * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types + */ +enum ethtool_stringset { + ETH_SS_TEST = 0, + ETH_SS_STATS, + ETH_SS_PRIV_FLAGS, + ETH_SS_NTUPLE_FILTERS, + ETH_SS_FEATURES, + ETH_SS_RSS_HASH_FUNCS, + ETH_SS_TUNABLES, + ETH_SS_PHY_STATS, + ETH_SS_PHY_TUNABLES, + ETH_SS_LINK_MODES, + ETH_SS_MSG_CLASSES, + ETH_SS_WOL_MODES, + ETH_SS_SOF_TIMESTAMPING, + ETH_SS_TS_TX_TYPES, + ETH_SS_TS_RX_FILTERS, + ETH_SS_UDP_TUNNEL_TYPES, + + /* add new constants above here */ + ETH_SS_COUNT +}; + +/** + * struct ethtool_gstrings - string set for data tagging + * @cmd: Command number = %ETHTOOL_GSTRINGS + * @string_set: String set ID; one of &enum ethtool_stringset + * @len: On return, the number of strings in the string set + * @data: Buffer for strings. Each string is null-padded to a size of + * %ETH_GSTRING_LEN. + * + * Users must use %ETHTOOL_GSSET_INFO to find the number of strings in + * the string set. They must allocate a buffer of the appropriate + * size immediately following this structure. + */ +struct ethtool_gstrings { + __u32 cmd; + __u32 string_set; + __u32 len; + __u8 data[0]; +}; + +/** + * struct ethtool_sset_info - string set information + * @cmd: Command number = %ETHTOOL_GSSET_INFO + * @sset_mask: On entry, a bitmask of string sets to query, with bits + * numbered according to &enum ethtool_stringset. On return, a + * bitmask of those string sets queried that are supported. + * @data: Buffer for string set sizes. On return, this contains the + * size of each string set that was queried and supported, in + * order of ID. + * + * Example: The user passes in @sset_mask = 0x7 (sets 0, 1, 2) and on + * return @sset_mask == 0x6 (sets 1, 2). Then @data[0] contains the + * size of set 1 and @data[1] contains the size of set 2. + * + * Users must allocate a buffer of the appropriate size (4 * number of + * sets queried) immediately following this structure. + */ +struct ethtool_sset_info { + __u32 cmd; + __u32 reserved; + __u64 sset_mask; + __u32 data[0]; +}; + +/** + * enum ethtool_test_flags - flags definition of ethtool_test + * @ETH_TEST_FL_OFFLINE: if set perform online and offline tests, otherwise + * only online tests. + * @ETH_TEST_FL_FAILED: Driver set this flag if test fails. + * @ETH_TEST_FL_EXTERNAL_LB: Application request to perform external loopback + * test. + * @ETH_TEST_FL_EXTERNAL_LB_DONE: Driver performed the external loopback test + */ + +enum ethtool_test_flags { + ETH_TEST_FL_OFFLINE = (1 << 0), + ETH_TEST_FL_FAILED = (1 << 1), + ETH_TEST_FL_EXTERNAL_LB = (1 << 2), + ETH_TEST_FL_EXTERNAL_LB_DONE = (1 << 3), +}; + +/** + * struct ethtool_test - device self-test invocation + * @cmd: Command number = %ETHTOOL_TEST + * @flags: A bitmask of flags from &enum ethtool_test_flags. Some + * flags may be set by the user on entry; others may be set by + * the driver on return. + * @len: On return, the number of test results + * @data: Array of test results + * + * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the + * number of test results that will be returned. They must allocate a + * buffer of the appropriate size (8 * number of results) immediately + * following this structure. + */ +struct ethtool_test { + __u32 cmd; + __u32 flags; + __u32 reserved; + __u32 len; + __u64 data[0]; +}; + +/** + * struct ethtool_stats - device-specific statistics + * @cmd: Command number = %ETHTOOL_GSTATS + * @n_stats: On return, the number of statistics + * @data: Array of statistics + * + * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the + * number of statistics that will be returned. They must allocate a + * buffer of the appropriate size (8 * number of statistics) + * immediately following this structure. + */ +struct ethtool_stats { + __u32 cmd; + __u32 n_stats; + __u64 data[0]; +}; + +/** + * struct ethtool_perm_addr - permanent hardware address + * @cmd: Command number = %ETHTOOL_GPERMADDR + * @size: On entry, the size of the buffer. On return, the size of the + * address. The command fails if the buffer is too small. + * @data: Buffer for the address + * + * Users must allocate the buffer immediately following this structure. + * A buffer size of %MAX_ADDR_LEN should be sufficient for any address + * type. + */ +struct ethtool_perm_addr { + __u32 cmd; + __u32 size; + __u8 data[0]; +}; + +/* boolean flags controlling per-interface behavior characteristics. + * When reading, the flag indicates whether or not a certain behavior + * is enabled/present. When writing, the flag indicates whether + * or not the driver should turn on (set) or off (clear) a behavior. + * + * Some behaviors may read-only (unconditionally absent or present). + * If such is the case, return EINVAL in the set-flags operation if the + * flag differs from the read-only value. + */ +enum ethtool_flags { + ETH_FLAG_TXVLAN = (1 << 7), /* TX VLAN offload enabled */ + ETH_FLAG_RXVLAN = (1 << 8), /* RX VLAN offload enabled */ + ETH_FLAG_LRO = (1 << 15), /* LRO is enabled */ + ETH_FLAG_NTUPLE = (1 << 27), /* N-tuple filters enabled */ + ETH_FLAG_RXHASH = (1 << 28), +}; + +/* The following structures are for supporting RX network flow + * classification and RX n-tuple configuration. Note, all multibyte + * fields, e.g., ip4src, ip4dst, psrc, pdst, spi, etc. are expected to + * be in network byte order. + */ + +/** + * struct ethtool_tcpip4_spec - flow specification for TCP/IPv4 etc. + * @ip4src: Source host + * @ip4dst: Destination host + * @psrc: Source port + * @pdst: Destination port + * @tos: Type-of-service + * + * This can be used to specify a TCP/IPv4, UDP/IPv4 or SCTP/IPv4 flow. + */ +struct ethtool_tcpip4_spec { + __be32 ip4src; + __be32 ip4dst; + __be16 psrc; + __be16 pdst; + __u8 tos; +}; + +/** + * struct ethtool_ah_espip4_spec - flow specification for IPsec/IPv4 + * @ip4src: Source host + * @ip4dst: Destination host + * @spi: Security parameters index + * @tos: Type-of-service + * + * This can be used to specify an IPsec transport or tunnel over IPv4. + */ +struct ethtool_ah_espip4_spec { + __be32 ip4src; + __be32 ip4dst; + __be32 spi; + __u8 tos; +}; + +#define ETH_RX_NFC_IP4 1 + +/** + * struct ethtool_usrip4_spec - general flow specification for IPv4 + * @ip4src: Source host + * @ip4dst: Destination host + * @l4_4_bytes: First 4 bytes of transport (layer 4) header + * @tos: Type-of-service + * @ip_ver: Value must be %ETH_RX_NFC_IP4; mask must be 0 + * @proto: Transport protocol number; mask must be 0 + */ +struct ethtool_usrip4_spec { + __be32 ip4src; + __be32 ip4dst; + __be32 l4_4_bytes; + __u8 tos; + __u8 ip_ver; + __u8 proto; +}; + +/** + * struct ethtool_tcpip6_spec - flow specification for TCP/IPv6 etc. + * @ip6src: Source host + * @ip6dst: Destination host + * @psrc: Source port + * @pdst: Destination port + * @tclass: Traffic Class + * + * This can be used to specify a TCP/IPv6, UDP/IPv6 or SCTP/IPv6 flow. + */ +struct ethtool_tcpip6_spec { + __be32 ip6src[4]; + __be32 ip6dst[4]; + __be16 psrc; + __be16 pdst; + __u8 tclass; +}; + +/** + * struct ethtool_ah_espip6_spec - flow specification for IPsec/IPv6 + * @ip6src: Source host + * @ip6dst: Destination host + * @spi: Security parameters index + * @tclass: Traffic Class + * + * This can be used to specify an IPsec transport or tunnel over IPv6. + */ +struct ethtool_ah_espip6_spec { + __be32 ip6src[4]; + __be32 ip6dst[4]; + __be32 spi; + __u8 tclass; +}; + +/** + * struct ethtool_usrip6_spec - general flow specification for IPv6 + * @ip6src: Source host + * @ip6dst: Destination host + * @l4_4_bytes: First 4 bytes of transport (layer 4) header + * @tclass: Traffic Class + * @l4_proto: Transport protocol number (nexthdr after any Extension Headers) + */ +struct ethtool_usrip6_spec { + __be32 ip6src[4]; + __be32 ip6dst[4]; + __be32 l4_4_bytes; + __u8 tclass; + __u8 l4_proto; +}; + +union ethtool_flow_union { + struct ethtool_tcpip4_spec tcp_ip4_spec; + struct ethtool_tcpip4_spec udp_ip4_spec; + struct ethtool_tcpip4_spec sctp_ip4_spec; + struct ethtool_ah_espip4_spec ah_ip4_spec; + struct ethtool_ah_espip4_spec esp_ip4_spec; + struct ethtool_usrip4_spec usr_ip4_spec; + struct ethtool_tcpip6_spec tcp_ip6_spec; + struct ethtool_tcpip6_spec udp_ip6_spec; + struct ethtool_tcpip6_spec sctp_ip6_spec; + struct ethtool_ah_espip6_spec ah_ip6_spec; + struct ethtool_ah_espip6_spec esp_ip6_spec; + struct ethtool_usrip6_spec usr_ip6_spec; + struct ethhdr ether_spec; + __u8 hdata[52]; +}; + +/** + * struct ethtool_flow_ext - additional RX flow fields + * @h_dest: destination MAC address + * @vlan_etype: VLAN EtherType + * @vlan_tci: VLAN tag control information + * @data: user defined data + * + * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT + * is set in &struct ethtool_rx_flow_spec @flow_type. + * @h_dest is valid if %FLOW_MAC_EXT is set. + */ +struct ethtool_flow_ext { + __u8 padding[2]; + unsigned char h_dest[ETH_ALEN]; + __be16 vlan_etype; + __be16 vlan_tci; + __be32 data[2]; +}; + +/** + * struct ethtool_rx_flow_spec - classification rule for RX flows + * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW + * @h_u: Flow fields to match (dependent on @flow_type) + * @h_ext: Additional fields to match + * @m_u: Masks for flow field bits to be matched + * @m_ext: Masks for additional field bits to be matched + * Note, all additional fields must be ignored unless @flow_type + * includes the %FLOW_EXT or %FLOW_MAC_EXT flag + * (see &struct ethtool_flow_ext description). + * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC + * if packets should be discarded, or %RX_CLS_FLOW_WAKE if the + * packets should be used for Wake-on-LAN with %WAKE_FILTER + * @location: Location of rule in the table. Locations must be + * numbered such that a flow matching multiple rules will be + * classified according to the first (lowest numbered) rule. + */ +struct ethtool_rx_flow_spec { + __u32 flow_type; + union ethtool_flow_union h_u; + struct ethtool_flow_ext h_ext; + union ethtool_flow_union m_u; + struct ethtool_flow_ext m_ext; + __u64 ring_cookie; + __u32 location; +}; + +/* How rings are laid out when accessing virtual functions or + * offloaded queues is device specific. To allow users to do flow + * steering and specify these queues the ring cookie is partitioned + * into a 32bit queue index with an 8 bit virtual function id. + * This also leaves the 3bytes for further specifiers. It is possible + * future devices may support more than 256 virtual functions if + * devices start supporting PCIe w/ARI. However at the moment I + * do not know of any devices that support this so I do not reserve + * space for this at this time. If a future patch consumes the next + * byte it should be aware of this possibility. + */ +#define ETHTOOL_RX_FLOW_SPEC_RING 0x00000000FFFFFFFFLL +#define ETHTOOL_RX_FLOW_SPEC_RING_VF 0x000000FF00000000LL +#define ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF 32 +static inline __u64 ethtool_get_flow_spec_ring(__u64 ring_cookie) +{ + return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie; +} + +static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie) +{ + return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >> + ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF; +} + +/** + * struct ethtool_rxnfc - command to get or set RX flow classification rules + * @cmd: Specific command number - %ETHTOOL_GRXFH, %ETHTOOL_SRXFH, + * %ETHTOOL_GRXRINGS, %ETHTOOL_GRXCLSRLCNT, %ETHTOOL_GRXCLSRULE, + * %ETHTOOL_GRXCLSRLALL, %ETHTOOL_SRXCLSRLDEL or %ETHTOOL_SRXCLSRLINS + * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW + * @data: Command-dependent value + * @fs: Flow classification rule + * @rss_context: RSS context to be affected + * @rule_cnt: Number of rules to be affected + * @rule_locs: Array of used rule locations + * + * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating + * the fields included in the flow hash, e.g. %RXH_IP_SRC. The following + * structure fields must not be used, except that if @flow_type includes + * the %FLOW_RSS flag, then @rss_context determines which RSS context to + * act on. + * + * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues + * on return. + * + * For %ETHTOOL_GRXCLSRLCNT, @rule_cnt is set to the number of defined + * rules on return. If @data is non-zero on return then it is the + * size of the rule table, plus the flag %RX_CLS_LOC_SPECIAL if the + * driver supports any special location values. If that flag is not + * set in @data then special location values should not be used. + * + * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an + * existing rule on entry and @fs contains the rule on return; if + * @fs.@flow_type includes the %FLOW_RSS flag, then @rss_context is + * filled with the RSS context ID associated with the rule. + * + * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the + * user buffer for @rule_locs on entry. On return, @data is the size + * of the rule table, @rule_cnt is the number of defined rules, and + * @rule_locs contains the locations of the defined rules. Drivers + * must use the second parameter to get_rxnfc() instead of @rule_locs. + * + * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update. + * @fs.@location either specifies the location to use or is a special + * location value with %RX_CLS_LOC_SPECIAL flag set. On return, + * @fs.@location is the actual rule location. If @fs.@flow_type + * includes the %FLOW_RSS flag, @rss_context is the RSS context ID to + * use for flow spreading traffic which matches this rule. The value + * from the rxfh indirection table will be added to @fs.@ring_cookie + * to choose which ring to deliver to. + * + * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an + * existing rule on entry. + * + * A driver supporting the special location values for + * %ETHTOOL_SRXCLSRLINS may add the rule at any suitable unused + * location, and may remove a rule at a later location (lower + * priority) that matches exactly the same set of flows. The special + * values are %RX_CLS_LOC_ANY, selecting any location; + * %RX_CLS_LOC_FIRST, selecting the first suitable location (maximum + * priority); and %RX_CLS_LOC_LAST, selecting the last suitable + * location (minimum priority). Additional special values may be + * defined in future and drivers must return -%EINVAL for any + * unrecognised value. + */ +struct ethtool_rxnfc { + __u32 cmd; + __u32 flow_type; + __u64 data; + struct ethtool_rx_flow_spec fs; + union { + __u32 rule_cnt; + __u32 rss_context; + }; + __u32 rule_locs[0]; +}; + + +/** + * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection + * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR + * @size: On entry, the array size of the user buffer, which may be zero. + * On return from %ETHTOOL_GRXFHINDIR, the array size of the hardware + * indirection table. + * @ring_index: RX ring/queue index for each hash value + * + * For %ETHTOOL_GRXFHINDIR, a @size of zero means that only the size + * should be returned. For %ETHTOOL_SRXFHINDIR, a @size of zero means + * the table should be reset to default values. This last feature + * is not supported by the original implementations. + */ +struct ethtool_rxfh_indir { + __u32 cmd; + __u32 size; + __u32 ring_index[0]; +}; + +/** + * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key. + * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH + * @rss_context: RSS context identifier. Context 0 is the default for normal + * traffic; other contexts can be referenced as the destination for RX flow + * classification rules. %ETH_RXFH_CONTEXT_ALLOC is used with command + * %ETHTOOL_SRSSH to allocate a new RSS context; on return this field will + * contain the ID of the newly allocated context. + * @indir_size: On entry, the array size of the user buffer for the + * indirection table, which may be zero, or (for %ETHTOOL_SRSSH), + * %ETH_RXFH_INDIR_NO_CHANGE. On return from %ETHTOOL_GRSSH, + * the array size of the hardware indirection table. + * @key_size: On entry, the array size of the user buffer for the hash key, + * which may be zero. On return from %ETHTOOL_GRSSH, the size of the + * hardware hash key. + * @hfunc: Defines the current RSS hash function used by HW (or to be set to). + * Valid values are one of the %ETH_RSS_HASH_*. + * @rsvd: Reserved for future extensions. + * @rss_config: RX ring/queue index for each hash value i.e., indirection table + * of @indir_size __u32 elements, followed by hash key of @key_size + * bytes. + * + * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the + * size should be returned. For %ETHTOOL_SRSSH, an @indir_size of + * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested + * and a @indir_size of zero means the indir table should be reset to default + * values (if @rss_context == 0) or that the RSS context should be deleted. + * An hfunc of zero means that hash function setting is not requested. + */ +struct ethtool_rxfh { + __u32 cmd; + __u32 rss_context; + __u32 indir_size; + __u32 key_size; + __u8 hfunc; + __u8 rsvd8[3]; + __u32 rsvd32; + __u32 rss_config[0]; +}; +#define ETH_RXFH_CONTEXT_ALLOC 0xffffffff +#define ETH_RXFH_INDIR_NO_CHANGE 0xffffffff + +/** + * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter + * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW + * @h_u: Flow field values to match (dependent on @flow_type) + * @m_u: Masks for flow field value bits to be ignored + * @vlan_tag: VLAN tag to match + * @vlan_tag_mask: Mask for VLAN tag bits to be ignored + * @data: Driver-dependent data to match + * @data_mask: Mask for driver-dependent data bits to be ignored + * @action: RX ring/queue index to deliver to (non-negative) or other action + * (negative, e.g. %ETHTOOL_RXNTUPLE_ACTION_DROP) + * + * For flow types %TCP_V4_FLOW, %UDP_V4_FLOW and %SCTP_V4_FLOW, where + * a field value and mask are both zero this is treated as if all mask + * bits are set i.e. the field is ignored. + */ +struct ethtool_rx_ntuple_flow_spec { + __u32 flow_type; + union { + struct ethtool_tcpip4_spec tcp_ip4_spec; + struct ethtool_tcpip4_spec udp_ip4_spec; + struct ethtool_tcpip4_spec sctp_ip4_spec; + struct ethtool_ah_espip4_spec ah_ip4_spec; + struct ethtool_ah_espip4_spec esp_ip4_spec; + struct ethtool_usrip4_spec usr_ip4_spec; + struct ethhdr ether_spec; + __u8 hdata[72]; + } h_u, m_u; + + __u16 vlan_tag; + __u16 vlan_tag_mask; + __u64 data; + __u64 data_mask; + + __s32 action; +#define ETHTOOL_RXNTUPLE_ACTION_DROP (-1) /* drop packet */ +#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) /* clear filter */ +}; + +/** + * struct ethtool_rx_ntuple - command to set or clear RX flow filter + * @cmd: Command number - %ETHTOOL_SRXNTUPLE + * @fs: Flow filter specification + */ +struct ethtool_rx_ntuple { + __u32 cmd; + struct ethtool_rx_ntuple_flow_spec fs; +}; + +#define ETHTOOL_FLASH_MAX_FILENAME 128 +enum ethtool_flash_op_type { + ETHTOOL_FLASH_ALL_REGIONS = 0, +}; + +/* for passing firmware flashing related parameters */ +struct ethtool_flash { + __u32 cmd; + __u32 region; + char data[ETHTOOL_FLASH_MAX_FILENAME]; +}; + +/** + * struct ethtool_dump - used for retrieving, setting device dump + * @cmd: Command number - %ETHTOOL_GET_DUMP_FLAG, %ETHTOOL_GET_DUMP_DATA, or + * %ETHTOOL_SET_DUMP + * @version: FW version of the dump, filled in by driver + * @flag: driver dependent flag for dump setting, filled in by driver during + * get and filled in by ethtool for set operation. + * flag must be initialized by macro ETH_FW_DUMP_DISABLE value when + * firmware dump is disabled. + * @len: length of dump data, used as the length of the user buffer on entry to + * %ETHTOOL_GET_DUMP_DATA and this is returned as dump length by driver + * for %ETHTOOL_GET_DUMP_FLAG command + * @data: data collected for get dump data operation + */ +struct ethtool_dump { + __u32 cmd; + __u32 version; + __u32 flag; + __u32 len; + __u8 data[0]; +}; + +#define ETH_FW_DUMP_DISABLE 0 + +/* for returning and changing feature sets */ + +/** + * struct ethtool_get_features_block - block with state of 32 features + * @available: mask of changeable features + * @requested: mask of features requested to be enabled if possible + * @active: mask of currently enabled features + * @never_changed: mask of features not changeable for any device + */ +struct ethtool_get_features_block { + __u32 available; + __u32 requested; + __u32 active; + __u32 never_changed; +}; + +/** + * struct ethtool_gfeatures - command to get state of device's features + * @cmd: command number = %ETHTOOL_GFEATURES + * @size: On entry, the number of elements in the features[] array; + * on return, the number of elements in features[] needed to hold + * all features + * @features: state of features + */ +struct ethtool_gfeatures { + __u32 cmd; + __u32 size; + struct ethtool_get_features_block features[0]; +}; + +/** + * struct ethtool_set_features_block - block with request for 32 features + * @valid: mask of features to be changed + * @requested: values of features to be changed + */ +struct ethtool_set_features_block { + __u32 valid; + __u32 requested; +}; + +/** + * struct ethtool_sfeatures - command to request change in device's features + * @cmd: command number = %ETHTOOL_SFEATURES + * @size: array size of the features[] array + * @features: feature change masks + */ +struct ethtool_sfeatures { + __u32 cmd; + __u32 size; + struct ethtool_set_features_block features[0]; +}; + +/** + * struct ethtool_ts_info - holds a device's timestamping and PHC association + * @cmd: command number = %ETHTOOL_GET_TS_INFO + * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags + * @phc_index: device index of the associated PHC, or -1 if there is none + * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values + * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values + * + * The bits in the 'tx_types' and 'rx_filters' fields correspond to + * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values, + * respectively. For example, if the device supports HWTSTAMP_TX_ON, + * then (1 << HWTSTAMP_TX_ON) in 'tx_types' will be set. + * + * Drivers should only report the filters they actually support without + * upscaling in the SIOCSHWTSTAMP ioctl. If the SIOCSHWSTAMP request for + * HWTSTAMP_FILTER_V1_SYNC is supported by HWTSTAMP_FILTER_V1_EVENT, then the + * driver should only report HWTSTAMP_FILTER_V1_EVENT in this op. + */ +struct ethtool_ts_info { + __u32 cmd; + __u32 so_timestamping; + __s32 phc_index; + __u32 tx_types; + __u32 tx_reserved[3]; + __u32 rx_filters; + __u32 rx_reserved[3]; +}; + +/* + * %ETHTOOL_SFEATURES changes features present in features[].valid to the + * values of corresponding bits in features[].requested. Bits in .requested + * not set in .valid or not changeable are ignored. + * + * Returns %EINVAL when .valid contains undefined or never-changeable bits + * or size is not equal to required number of features words (32-bit blocks). + * Returns >= 0 if request was completed; bits set in the value mean: + * %ETHTOOL_F_UNSUPPORTED - there were bits set in .valid that are not + * changeable (not present in %ETHTOOL_GFEATURES' features[].available) + * those bits were ignored. + * %ETHTOOL_F_WISH - some or all changes requested were recorded but the + * resulting state of bits masked by .valid is not equal to .requested. + * Probably there are other device-specific constraints on some features + * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered + * here as though ignored bits were cleared. + * %ETHTOOL_F_COMPAT - some or all changes requested were made by calling + * compatibility functions. Requested offload state cannot be properly + * managed by kernel. + * + * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of + * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands + * for ETH_SS_FEATURES string set. First entry in the table corresponds to least + * significant bit in features[0] fields. Empty strings mark undefined features. + */ +enum ethtool_sfeatures_retval_bits { + ETHTOOL_F_UNSUPPORTED__BIT, + ETHTOOL_F_WISH__BIT, + ETHTOOL_F_COMPAT__BIT, +}; + +#define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT) +#define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT) +#define ETHTOOL_F_COMPAT (1 << ETHTOOL_F_COMPAT__BIT) + +#define MAX_NUM_QUEUE 4096 + +/** + * struct ethtool_per_queue_op - apply sub command to the queues in mask. + * @cmd: ETHTOOL_PERQUEUE + * @sub_command: the sub command which apply to each queues + * @queue_mask: Bitmap of the queues which sub command apply to + * @data: A complete command structure following for each of the queues addressed + */ +struct ethtool_per_queue_op { + __u32 cmd; + __u32 sub_command; + __u32 queue_mask[__KERNEL_DIV_ROUND_UP(MAX_NUM_QUEUE, 32)]; + char data[]; +}; + +/** + * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters + * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM + * @active_fec: FEC mode which is active on porte + * @fec: Bitmask of supported/configured FEC modes + * @rsvd: Reserved for future extensions. i.e FEC bypass feature. + * + * Drivers should reject a non-zero setting of @autoneg when + * autoneogotiation is disabled (or not supported) for the link. + * + */ +struct ethtool_fecparam { + __u32 cmd; + /* bitmask of FEC modes */ + __u32 active_fec; + __u32 fec; + __u32 reserved; +}; + +/** + * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration + * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported + * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver + * @ETHTOOL_FEC_OFF: No FEC Mode + * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode + * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode + */ +enum ethtool_fec_config_bits { + ETHTOOL_FEC_NONE_BIT, + ETHTOOL_FEC_AUTO_BIT, + ETHTOOL_FEC_OFF_BIT, + ETHTOOL_FEC_RS_BIT, + ETHTOOL_FEC_BASER_BIT, + ETHTOOL_FEC_LLRS_BIT, +}; + +#define ETHTOOL_FEC_NONE (1 << ETHTOOL_FEC_NONE_BIT) +#define ETHTOOL_FEC_AUTO (1 << ETHTOOL_FEC_AUTO_BIT) +#define ETHTOOL_FEC_OFF (1 << ETHTOOL_FEC_OFF_BIT) +#define ETHTOOL_FEC_RS (1 << ETHTOOL_FEC_RS_BIT) +#define ETHTOOL_FEC_BASER (1 << ETHTOOL_FEC_BASER_BIT) +#define ETHTOOL_FEC_LLRS (1 << ETHTOOL_FEC_LLRS_BIT) + +/* CMDs currently supported */ +#define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings. + * Please use ETHTOOL_GLINKSETTINGS + */ +#define ETHTOOL_SSET 0x00000002 /* DEPRECATED, Set settings. + * Please use ETHTOOL_SLINKSETTINGS + */ +#define ETHTOOL_GDRVINFO 0x00000003 /* Get driver info. */ +#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers. */ +#define ETHTOOL_GWOL 0x00000005 /* Get wake-on-lan options. */ +#define ETHTOOL_SWOL 0x00000006 /* Set wake-on-lan options. */ +#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ +#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level. */ +#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation. */ +/* Get link status for host, i.e. whether the interface *and* the + * physical port (if there is one) are up (ethtool_value). */ +#define ETHTOOL_GLINK 0x0000000a +#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ +#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data. */ +#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ +#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ +#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ +#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters. */ +#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ +#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ +#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ +#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ +#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ +#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */ +#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable + * (ethtool_value) */ +#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable + * (ethtool_value). */ +#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test. */ +#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ +#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ +#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ +#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ +#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ +#define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ +#define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */ +#define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */ +#define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */ +#define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */ +#define ETHTOOL_GFLAGS 0x00000025 /* Get flags bitmap(ethtool_value) */ +#define ETHTOOL_SFLAGS 0x00000026 /* Set flags bitmap(ethtool_value) */ +#define ETHTOOL_GPFLAGS 0x00000027 /* Get driver-private flags bitmap */ +#define ETHTOOL_SPFLAGS 0x00000028 /* Set driver-private flags bitmap */ + +#define ETHTOOL_GRXFH 0x00000029 /* Get RX flow hash configuration */ +#define ETHTOOL_SRXFH 0x0000002a /* Set RX flow hash configuration */ +#define ETHTOOL_GGRO 0x0000002b /* Get GRO enable (ethtool_value) */ +#define ETHTOOL_SGRO 0x0000002c /* Set GRO enable (ethtool_value) */ +#define ETHTOOL_GRXRINGS 0x0000002d /* Get RX rings available for LB */ +#define ETHTOOL_GRXCLSRLCNT 0x0000002e /* Get RX class rule count */ +#define ETHTOOL_GRXCLSRULE 0x0000002f /* Get RX classification rule */ +#define ETHTOOL_GRXCLSRLALL 0x00000030 /* Get all RX classification rule */ +#define ETHTOOL_SRXCLSRLDEL 0x00000031 /* Delete RX classification rule */ +#define ETHTOOL_SRXCLSRLINS 0x00000032 /* Insert RX classification rule */ +#define ETHTOOL_FLASHDEV 0x00000033 /* Flash firmware to device */ +#define ETHTOOL_RESET 0x00000034 /* Reset hardware */ +#define ETHTOOL_SRXNTUPLE 0x00000035 /* Add an n-tuple filter to device */ +#define ETHTOOL_GRXNTUPLE 0x00000036 /* deprecated */ +#define ETHTOOL_GSSET_INFO 0x00000037 /* Get string set info */ +#define ETHTOOL_GRXFHINDIR 0x00000038 /* Get RX flow hash indir'n table */ +#define ETHTOOL_SRXFHINDIR 0x00000039 /* Set RX flow hash indir'n table */ + +#define ETHTOOL_GFEATURES 0x0000003a /* Get device offload settings */ +#define ETHTOOL_SFEATURES 0x0000003b /* Change device offload settings */ +#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */ +#define ETHTOOL_SCHANNELS 0x0000003d /* Set no of channels */ +#define ETHTOOL_SET_DUMP 0x0000003e /* Set dump settings */ +#define ETHTOOL_GET_DUMP_FLAG 0x0000003f /* Get dump settings */ +#define ETHTOOL_GET_DUMP_DATA 0x00000040 /* Get dump data */ +#define ETHTOOL_GET_TS_INFO 0x00000041 /* Get time stamping and PHC info */ +#define ETHTOOL_GMODULEINFO 0x00000042 /* Get plug-in module information */ +#define ETHTOOL_GMODULEEEPROM 0x00000043 /* Get plug-in module eeprom */ +#define ETHTOOL_GEEE 0x00000044 /* Get EEE settings */ +#define ETHTOOL_SEEE 0x00000045 /* Set EEE settings */ + +#define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */ +#define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */ +#define ETHTOOL_GTUNABLE 0x00000048 /* Get tunable configuration */ +#define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */ +#define ETHTOOL_GPHYSTATS 0x0000004a /* get PHY-specific statistics */ + +#define ETHTOOL_PERQUEUE 0x0000004b /* Set per queue options */ + +#define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */ +#define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */ +#define ETHTOOL_PHY_GTUNABLE 0x0000004e /* Get PHY tunable configuration */ +#define ETHTOOL_PHY_STUNABLE 0x0000004f /* Set PHY tunable configuration */ +#define ETHTOOL_GFECPARAM 0x00000050 /* Get FEC settings */ +#define ETHTOOL_SFECPARAM 0x00000051 /* Set FEC settings */ + +/* compatibility with older code */ +#define SPARC_ETH_GSET ETHTOOL_GSET +#define SPARC_ETH_SSET ETHTOOL_SSET + +/* Link mode bit indices */ +enum ethtool_link_mode_bit_indices { + ETHTOOL_LINK_MODE_10baseT_Half_BIT = 0, + ETHTOOL_LINK_MODE_10baseT_Full_BIT = 1, + ETHTOOL_LINK_MODE_100baseT_Half_BIT = 2, + ETHTOOL_LINK_MODE_100baseT_Full_BIT = 3, + ETHTOOL_LINK_MODE_1000baseT_Half_BIT = 4, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT = 5, + ETHTOOL_LINK_MODE_Autoneg_BIT = 6, + ETHTOOL_LINK_MODE_TP_BIT = 7, + ETHTOOL_LINK_MODE_AUI_BIT = 8, + ETHTOOL_LINK_MODE_MII_BIT = 9, + ETHTOOL_LINK_MODE_FIBRE_BIT = 10, + ETHTOOL_LINK_MODE_BNC_BIT = 11, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT = 12, + ETHTOOL_LINK_MODE_Pause_BIT = 13, + ETHTOOL_LINK_MODE_Asym_Pause_BIT = 14, + ETHTOOL_LINK_MODE_2500baseX_Full_BIT = 15, + ETHTOOL_LINK_MODE_Backplane_BIT = 16, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT = 17, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT = 18, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT = 19, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT = 20, + ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT = 21, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT = 22, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT = 23, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT = 24, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT = 25, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT = 26, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT = 27, + ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT = 28, + ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29, + ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT = 31, + + /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit + * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* + * macro for bits > 31. The only way to use indices > 31 is to + * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. + */ + + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT = 32, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT = 33, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT = 34, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT = 35, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT = 36, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT = 41, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT = 42, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT = 43, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT = 44, + ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT = 46, + ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47, + ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48, + + ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49, + ETHTOOL_LINK_MODE_FEC_RS_BIT = 50, + ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT = 52, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT = 53, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT = 54, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT = 55, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT = 56, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT = 57, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT = 58, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT = 59, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT = 60, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT = 61, + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT = 62, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT = 63, + ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT = 64, + ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT = 65, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT = 66, + ETHTOOL_LINK_MODE_100baseT1_Full_BIT = 67, + ETHTOOL_LINK_MODE_1000baseT1_Full_BIT = 68, + ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT = 69, + ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT = 70, + ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT = 71, + ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT = 72, + ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT = 73, + ETHTOOL_LINK_MODE_FEC_LLRS_BIT = 74, + ETHTOOL_LINK_MODE_100000baseKR_Full_BIT = 75, + ETHTOOL_LINK_MODE_100000baseSR_Full_BIT = 76, + ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT = 77, + ETHTOOL_LINK_MODE_100000baseCR_Full_BIT = 78, + ETHTOOL_LINK_MODE_100000baseDR_Full_BIT = 79, + ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT = 80, + ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT = 81, + ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT = 82, + ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT = 83, + ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT = 84, + ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT = 85, + ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT = 86, + ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT = 87, + ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT = 88, + ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89, + /* must be last entry */ + __ETHTOOL_LINK_MODE_MASK_NBITS +}; + +#define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ + (1UL << (ETHTOOL_LINK_MODE_ ## base_name ## _BIT)) + +/* DEPRECATED macros. Please migrate to + * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT + * define any new SUPPORTED_* macro for bits > 31. + */ +#define SUPPORTED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half) +#define SUPPORTED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full) +#define SUPPORTED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half) +#define SUPPORTED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full) +#define SUPPORTED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half) +#define SUPPORTED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full) +#define SUPPORTED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg) +#define SUPPORTED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP) +#define SUPPORTED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI) +#define SUPPORTED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII) +#define SUPPORTED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE) +#define SUPPORTED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC) +#define SUPPORTED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full) +#define SUPPORTED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause) +#define SUPPORTED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause) +#define SUPPORTED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full) +#define SUPPORTED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane) +#define SUPPORTED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full) +#define SUPPORTED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full) +#define SUPPORTED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full) +#define SUPPORTED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC) +#define SUPPORTED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full) +#define SUPPORTED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full) +#define SUPPORTED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full) +#define SUPPORTED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full) +#define SUPPORTED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full) +#define SUPPORTED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full) +#define SUPPORTED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full) +#define SUPPORTED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full) +#define SUPPORTED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full) +#define SUPPORTED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full) +/* Please do not define any new SUPPORTED_* macro for bits > 31, see + * notice above. + */ + +/* + * DEPRECATED macros. Please migrate to + * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT + * define any new ADERTISE_* macro for bits > 31. + */ +#define ADVERTISED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half) +#define ADVERTISED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full) +#define ADVERTISED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half) +#define ADVERTISED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full) +#define ADVERTISED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half) +#define ADVERTISED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full) +#define ADVERTISED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg) +#define ADVERTISED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP) +#define ADVERTISED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI) +#define ADVERTISED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII) +#define ADVERTISED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE) +#define ADVERTISED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC) +#define ADVERTISED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full) +#define ADVERTISED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause) +#define ADVERTISED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause) +#define ADVERTISED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full) +#define ADVERTISED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane) +#define ADVERTISED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full) +#define ADVERTISED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full) +#define ADVERTISED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full) +#define ADVERTISED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC) +#define ADVERTISED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full) +#define ADVERTISED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full) +#define ADVERTISED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full) +#define ADVERTISED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full) +#define ADVERTISED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full) +#define ADVERTISED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full) +#define ADVERTISED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full) +#define ADVERTISED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full) +#define ADVERTISED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full) +#define ADVERTISED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full) +/* Please do not define any new ADVERTISED_* macro for bits > 31, see + * notice above. + */ + +/* The following are all involved in forcing a particular link + * mode for the device for setting things. When getting the + * devices settings, these indicate the current mode and whether + * it was forced up into this mode or autonegotiated. + */ + +/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. + * Update drivers/net/phy/phy.c:phy_speed_to_str() and + * drivers/net/bonding/bond_3ad.c:__get_link_speed() when adding new values. + */ +#define SPEED_10 10 +#define SPEED_100 100 +#define SPEED_1000 1000 +#define SPEED_2500 2500 +#define SPEED_5000 5000 +#define SPEED_10000 10000 +#define SPEED_14000 14000 +#define SPEED_20000 20000 +#define SPEED_25000 25000 +#define SPEED_40000 40000 +#define SPEED_50000 50000 +#define SPEED_56000 56000 +#define SPEED_100000 100000 +#define SPEED_200000 200000 +#define SPEED_400000 400000 + +#define SPEED_UNKNOWN -1 + +static inline int ethtool_validate_speed(__u32 speed) +{ + return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN; +} + +/* Duplex, half or full. */ +#define DUPLEX_HALF 0x00 +#define DUPLEX_FULL 0x01 +#define DUPLEX_UNKNOWN 0xff + +static inline int ethtool_validate_duplex(__u8 duplex) +{ + switch (duplex) { + case DUPLEX_HALF: + case DUPLEX_FULL: + case DUPLEX_UNKNOWN: + return 1; + } + + return 0; +} + +#define MASTER_SLAVE_CFG_UNSUPPORTED 0 +#define MASTER_SLAVE_CFG_UNKNOWN 1 +#define MASTER_SLAVE_CFG_MASTER_PREFERRED 2 +#define MASTER_SLAVE_CFG_SLAVE_PREFERRED 3 +#define MASTER_SLAVE_CFG_MASTER_FORCE 4 +#define MASTER_SLAVE_CFG_SLAVE_FORCE 5 +#define MASTER_SLAVE_STATE_UNSUPPORTED 0 +#define MASTER_SLAVE_STATE_UNKNOWN 1 +#define MASTER_SLAVE_STATE_MASTER 2 +#define MASTER_SLAVE_STATE_SLAVE 3 +#define MASTER_SLAVE_STATE_ERR 4 + +/* Which connector port. */ +#define PORT_TP 0x00 +#define PORT_AUI 0x01 +#define PORT_MII 0x02 +#define PORT_FIBRE 0x03 +#define PORT_BNC 0x04 +#define PORT_DA 0x05 +#define PORT_NONE 0xef +#define PORT_OTHER 0xff + +/* Which transceiver to use. */ +#define XCVR_INTERNAL 0x00 /* PHY and MAC are in the same package */ +#define XCVR_EXTERNAL 0x01 /* PHY and MAC are in different packages */ +#define XCVR_DUMMY1 0x02 +#define XCVR_DUMMY2 0x03 +#define XCVR_DUMMY3 0x04 + +/* Enable or disable autonegotiation. */ +#define AUTONEG_DISABLE 0x00 +#define AUTONEG_ENABLE 0x01 + +/* MDI or MDI-X status/control - if MDI/MDI_X/AUTO is set then + * the driver is required to renegotiate link + */ +#define ETH_TP_MDI_INVALID 0x00 /* status: unknown; control: unsupported */ +#define ETH_TP_MDI 0x01 /* status: MDI; control: force MDI */ +#define ETH_TP_MDI_X 0x02 /* status: MDI-X; control: force MDI-X */ +#define ETH_TP_MDI_AUTO 0x03 /* control: auto-select */ + +/* Wake-On-Lan options. */ +#define WAKE_PHY (1 << 0) +#define WAKE_UCAST (1 << 1) +#define WAKE_MCAST (1 << 2) +#define WAKE_BCAST (1 << 3) +#define WAKE_ARP (1 << 4) +#define WAKE_MAGIC (1 << 5) +#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ +#define WAKE_FILTER (1 << 7) + +#define WOL_MODE_COUNT 8 + +/* L2-L4 network traffic flow types */ +#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ +#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ +#define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ +#define AH_ESP_V4_FLOW 0x04 /* hash only */ +#define TCP_V6_FLOW 0x05 /* hash or spec (tcp_ip6_spec; nfc only) */ +#define UDP_V6_FLOW 0x06 /* hash or spec (udp_ip6_spec; nfc only) */ +#define SCTP_V6_FLOW 0x07 /* hash or spec (sctp_ip6_spec; nfc only) */ +#define AH_ESP_V6_FLOW 0x08 /* hash only */ +#define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */ +#define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */ +#define AH_V6_FLOW 0x0b /* hash or spec (ah_ip6_spec; nfc only) */ +#define ESP_V6_FLOW 0x0c /* hash or spec (esp_ip6_spec; nfc only) */ +#define IPV4_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ +#define IP_USER_FLOW IPV4_USER_FLOW +#define IPV6_USER_FLOW 0x0e /* spec only (usr_ip6_spec; nfc only) */ +#define IPV4_FLOW 0x10 /* hash only */ +#define IPV6_FLOW 0x11 /* hash only */ +#define ETHER_FLOW 0x12 /* spec only (ether_spec) */ +/* Flag to enable additional fields in struct ethtool_rx_flow_spec */ +#define FLOW_EXT 0x80000000 +#define FLOW_MAC_EXT 0x40000000 +/* Flag to enable RSS spreading of traffic matching rule (nfc only) */ +#define FLOW_RSS 0x20000000 + +/* L3-L4 network traffic flow hash options */ +#define RXH_L2DA (1 << 1) +#define RXH_VLAN (1 << 2) +#define RXH_L3_PROTO (1 << 3) +#define RXH_IP_SRC (1 << 4) +#define RXH_IP_DST (1 << 5) +#define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ +#define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ +#define RXH_DISCARD (1 << 31) + +#define RX_CLS_FLOW_DISC 0xffffffffffffffffULL +#define RX_CLS_FLOW_WAKE 0xfffffffffffffffeULL + +/* Special RX classification rule insert location values */ +#define RX_CLS_LOC_SPECIAL 0x80000000 /* flag */ +#define RX_CLS_LOC_ANY 0xffffffff +#define RX_CLS_LOC_FIRST 0xfffffffe +#define RX_CLS_LOC_LAST 0xfffffffd + +/* EEPROM Standards for plug in modules */ +#define ETH_MODULE_SFF_8079 0x1 +#define ETH_MODULE_SFF_8079_LEN 256 +#define ETH_MODULE_SFF_8472 0x2 +#define ETH_MODULE_SFF_8472_LEN 512 +#define ETH_MODULE_SFF_8636 0x3 +#define ETH_MODULE_SFF_8636_LEN 256 +#define ETH_MODULE_SFF_8436 0x4 +#define ETH_MODULE_SFF_8436_LEN 256 + +#define ETH_MODULE_SFF_8636_MAX_LEN 640 +#define ETH_MODULE_SFF_8436_MAX_LEN 640 + +/* Reset flags */ +/* The reset() operation must clear the flags for the components which + * were actually reset. On successful return, the flags indicate the + * components which were not reset, either because they do not exist + * in the hardware or because they cannot be reset independently. The + * driver must never reset any components that were not requested. + */ +enum ethtool_reset_flags { + /* These flags represent components dedicated to the interface + * the command is addressed to. Shift any flag left by + * ETH_RESET_SHARED_SHIFT to reset a shared component of the + * same type. + */ + ETH_RESET_MGMT = 1 << 0, /* Management processor */ + ETH_RESET_IRQ = 1 << 1, /* Interrupt requester */ + ETH_RESET_DMA = 1 << 2, /* DMA engine */ + ETH_RESET_FILTER = 1 << 3, /* Filtering/flow direction */ + ETH_RESET_OFFLOAD = 1 << 4, /* Protocol offload */ + ETH_RESET_MAC = 1 << 5, /* Media access controller */ + ETH_RESET_PHY = 1 << 6, /* Transceiver/PHY */ + ETH_RESET_RAM = 1 << 7, /* RAM shared between + * multiple components */ + ETH_RESET_AP = 1 << 8, /* Application processor */ + + ETH_RESET_DEDICATED = 0x0000ffff, /* All components dedicated to + * this interface */ + ETH_RESET_ALL = 0xffffffff, /* All components used by this + * interface, even if shared */ +}; +#define ETH_RESET_SHARED_SHIFT 16 + + +/** + * struct ethtool_link_settings - link control and status + * + * IMPORTANT, Backward compatibility notice: When implementing new + * user-space tools, please first try %ETHTOOL_GLINKSETTINGS, and + * if it succeeds use %ETHTOOL_SLINKSETTINGS to change link + * settings; do not use %ETHTOOL_SSET if %ETHTOOL_GLINKSETTINGS + * succeeded: stick to %ETHTOOL_GLINKSETTINGS/%SLINKSETTINGS in + * that case. Conversely, if %ETHTOOL_GLINKSETTINGS fails, use + * %ETHTOOL_GSET to query and %ETHTOOL_SSET to change link + * settings; do not use %ETHTOOL_SLINKSETTINGS if + * %ETHTOOL_GLINKSETTINGS failed: stick to + * %ETHTOOL_GSET/%ETHTOOL_SSET in that case. + * + * @cmd: Command number = %ETHTOOL_GLINKSETTINGS or %ETHTOOL_SLINKSETTINGS + * @speed: Link speed (Mbps) + * @duplex: Duplex mode; one of %DUPLEX_* + * @port: Physical connector type; one of %PORT_* + * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not + * applicable. For clause 45 PHYs this is the PRTAD. + * @autoneg: Enable/disable autonegotiation and auto-detection; + * either %AUTONEG_DISABLE or %AUTONEG_ENABLE + * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO + * protocols supported by the interface; 0 if unknown. + * Read-only. + * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of + * %ETH_TP_MDI_*. If the status is unknown or not applicable, the + * value will be %ETH_TP_MDI_INVALID. Read-only. + * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of + * %ETH_TP_MDI_*. If MDI(-X) control is not implemented, reads + * yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected. + * When written successfully, the link should be renegotiated if + * necessary. + * @link_mode_masks_nwords: Number of 32-bit words for each of the + * supported, advertising, lp_advertising link mode bitmaps. For + * %ETHTOOL_GLINKSETTINGS: on entry, number of words passed by user + * (>= 0); on return, if handshake in progress, negative if + * request size unsupported by kernel: absolute value indicates + * kernel expected size and all the other fields but cmd + * are 0; otherwise (handshake completed), strictly positive + * to indicate size used by kernel and cmd field stays + * %ETHTOOL_GLINKSETTINGS, all other fields populated by driver. For + * %ETHTOOL_SLINKSETTINGS: must be valid on entry, ie. a positive + * value returned previously by %ETHTOOL_GLINKSETTINGS, otherwise + * refused. For drivers: ignore this field (use kernel's + * __ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will + * be overwritten by kernel. + * @supported: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features for which the interface + * supports autonegotiation or auto-detection. Read-only. + * @advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features that are advertised through + * autonegotiation or enabled for auto-detection. + * @lp_advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, and other + * link features that the link partner advertised through + * autonegotiation; 0 if unknown or not applicable. Read-only. + * @transceiver: Used to distinguish different possible PHY types, + * reported consistently by PHYLIB. Read-only. + * + * If autonegotiation is disabled, the speed and @duplex represent the + * fixed link mode and are writable if the driver supports multiple + * link modes. If it is enabled then they are read-only; if the link + * is up they represent the negotiated link mode; if the link is down, + * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and + * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode. + * + * Some hardware interfaces may have multiple PHYs and/or physical + * connectors fitted or do not allow the driver to detect which are + * fitted. For these interfaces @port and/or @phy_address may be + * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE. + * Otherwise, attempts to write different values may be ignored or + * rejected. + * + * Deprecated %ethtool_cmd fields transceiver, maxtxpkt and maxrxpkt + * are not available in %ethtool_link_settings. These fields will be + * always set to zero in %ETHTOOL_GSET reply and %ETHTOOL_SSET will + * fail if any of them is set to non-zero value. + * + * Users should assume that all fields not marked read-only are + * writable and subject to validation by the driver. They should use + * %ETHTOOL_GLINKSETTINGS to get the current values before making specific + * changes and then applying them with %ETHTOOL_SLINKSETTINGS. + * + * Drivers that implement %get_link_ksettings and/or + * %set_link_ksettings should ignore the @cmd + * and @link_mode_masks_nwords fields (any change to them overwritten + * by kernel), and rely only on kernel's internal + * %__ETHTOOL_LINK_MODE_MASK_NBITS and + * %ethtool_link_mode_mask_t. Drivers that implement + * %set_link_ksettings() should validate all fields other than @cmd + * and @link_mode_masks_nwords that are not described as read-only or + * deprecated, and must ignore all fields described as read-only. + */ +struct ethtool_link_settings { + __u32 cmd; + __u32 speed; + __u8 duplex; + __u8 port; + __u8 phy_address; + __u8 autoneg; + __u8 mdio_support; + __u8 eth_tp_mdix; + __u8 eth_tp_mdix_ctrl; + __s8 link_mode_masks_nwords; + __u8 transceiver; + __u8 master_slave_cfg; + __u8 master_slave_state; + __u8 reserved1[1]; + __u32 reserved[7]; + __u32 link_mode_masks[0]; + /* layout of link_mode_masks fields: + * __u32 map_supported[link_mode_masks_nwords]; + * __u32 map_advertising[link_mode_masks_nwords]; + * __u32 map_lp_advertising[link_mode_masks_nwords]; + */ +}; +#endif /* _UAPI_LINUX_ETHTOOL_H */ diff --git a/src/shared/linux/nl80211.h b/src/shared/linux/nl80211.h new file mode 100644 index 0000000..65edfff --- /dev/null +++ b/src/shared/linux/nl80211.h @@ -0,0 +1,6554 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __LINUX_NL80211_H +#define __LINUX_NL80211_H +/* + * 802.11 netlink interface public header + * + * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> + * Copyright 2008 Michael Wu <flamingice@sourmilk.net> + * Copyright 2008 Luis Carlos Cobo <luisca@cozybit.com> + * Copyright 2008 Michael Buesch <m@bues.ch> + * Copyright 2008, 2009 Luis R. Rodriguez <lrodriguez@atheros.com> + * Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com> + * Copyright 2008 Colin McCabe <colin@cozybit.com> + * Copyright 2015-2017 Intel Deutschland GmbH + * Copyright (C) 2018-2019 Intel Corporation + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +/* + * This header file defines the userspace API to the wireless stack. Please + * be careful not to break things - i.e. don't move anything around or so + * unless you can demonstrate that it breaks neither API nor ABI. + * + * Additions to the API should be accompanied by actual implementations in + * an upstream driver, so that example implementations exist in case there + * are ever concerns about the precise semantics of the API or changes are + * needed, and to ensure that code for dead (no longer implemented) API + * can actually be identified and removed. + * Nonetheless, semantics should also be documented carefully in this file. + */ + +#include <linux/types.h> + +#define NL80211_GENL_NAME "nl80211" + +#define NL80211_MULTICAST_GROUP_CONFIG "config" +#define NL80211_MULTICAST_GROUP_SCAN "scan" +#define NL80211_MULTICAST_GROUP_REG "regulatory" +#define NL80211_MULTICAST_GROUP_MLME "mlme" +#define NL80211_MULTICAST_GROUP_VENDOR "vendor" +#define NL80211_MULTICAST_GROUP_NAN "nan" +#define NL80211_MULTICAST_GROUP_TESTMODE "testmode" + +#define NL80211_EDMG_BW_CONFIG_MIN 4 +#define NL80211_EDMG_BW_CONFIG_MAX 15 +#define NL80211_EDMG_CHANNELS_MIN 1 +#define NL80211_EDMG_CHANNELS_MAX 0x3c /* 0b00111100 */ + +/** + * DOC: Station handling + * + * Stations are added per interface, but a special case exists with VLAN + * interfaces. When a station is bound to an AP interface, it may be moved + * into a VLAN identified by a VLAN interface index (%NL80211_ATTR_STA_VLAN). + * The station is still assumed to belong to the AP interface it was added + * to. + * + * Station handling varies per interface type and depending on the driver's + * capabilities. + * + * For drivers supporting TDLS with external setup (WIPHY_FLAG_SUPPORTS_TDLS + * and WIPHY_FLAG_TDLS_EXTERNAL_SETUP), the station lifetime is as follows: + * - a setup station entry is added, not yet authorized, without any rate + * or capability information, this just exists to avoid race conditions + * - when the TDLS setup is done, a single NL80211_CMD_SET_STATION is valid + * to add rate and capability information to the station and at the same + * time mark it authorized. + * - %NL80211_TDLS_ENABLE_LINK is then used + * - after this, the only valid operation is to remove it by tearing down + * the TDLS link (%NL80211_TDLS_DISABLE_LINK) + * + * TODO: need more info for other interface types + */ + +/** + * DOC: Frame transmission/registration support + * + * Frame transmission and registration support exists to allow userspace + * management entities such as wpa_supplicant react to management frames + * that are not being handled by the kernel. This includes, for example, + * certain classes of action frames that cannot be handled in the kernel + * for various reasons. + * + * Frame registration is done on a per-interface basis and registrations + * cannot be removed other than by closing the socket. It is possible to + * specify a registration filter to register, for example, only for a + * certain type of action frame. In particular with action frames, those + * that userspace registers for will not be returned as unhandled by the + * driver, so that the registered application has to take responsibility + * for doing that. + * + * The type of frame that can be registered for is also dependent on the + * driver and interface type. The frame types are advertised in wiphy + * attributes so applications know what to expect. + * + * NOTE: When an interface changes type while registrations are active, + * these registrations are ignored until the interface type is + * changed again. This means that changing the interface type can + * lead to a situation that couldn't otherwise be produced, but + * any such registrations will be dormant in the sense that they + * will not be serviced, i.e. they will not receive any frames. + * + * Frame transmission allows userspace to send for example the required + * responses to action frames. It is subject to some sanity checking, + * but many frames can be transmitted. When a frame was transmitted, its + * status is indicated to the sending socket. + * + * For more technical details, see the corresponding command descriptions + * below. + */ + +/** + * DOC: Virtual interface / concurrency capabilities + * + * Some devices are able to operate with virtual MACs, they can have + * more than one virtual interface. The capability handling for this + * is a bit complex though, as there may be a number of restrictions + * on the types of concurrency that are supported. + * + * To start with, each device supports the interface types listed in + * the %NL80211_ATTR_SUPPORTED_IFTYPES attribute, but by listing the + * types there no concurrency is implied. + * + * Once concurrency is desired, more attributes must be observed: + * To start with, since some interface types are purely managed in + * software, like the AP-VLAN type in mac80211 for example, there's + * an additional list of these, they can be added at any time and + * are only restricted by some semantic restrictions (e.g. AP-VLAN + * cannot be added without a corresponding AP interface). This list + * is exported in the %NL80211_ATTR_SOFTWARE_IFTYPES attribute. + * + * Further, the list of supported combinations is exported. This is + * in the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute. Basically, + * it exports a list of "groups", and at any point in time the + * interfaces that are currently active must fall into any one of + * the advertised groups. Within each group, there are restrictions + * on the number of interfaces of different types that are supported + * and also the number of different channels, along with potentially + * some other restrictions. See &enum nl80211_if_combination_attrs. + * + * All together, these attributes define the concurrency of virtual + * interfaces that a given device supports. + */ + +/** + * DOC: packet coalesce support + * + * In most cases, host that receives IPv4 and IPv6 multicast/broadcast + * packets does not do anything with these packets. Therefore the + * reception of these unwanted packets causes unnecessary processing + * and power consumption. + * + * Packet coalesce feature helps to reduce number of received interrupts + * to host by buffering these packets in firmware/hardware for some + * predefined time. Received interrupt will be generated when one of the + * following events occur. + * a) Expiration of hardware timer whose expiration time is set to maximum + * coalescing delay of matching coalesce rule. + * b) Coalescing buffer in hardware reaches it's limit. + * c) Packet doesn't match any of the configured coalesce rules. + * + * User needs to configure following parameters for creating a coalesce + * rule. + * a) Maximum coalescing delay + * b) List of packet patterns which needs to be matched + * c) Condition for coalescence. pattern 'match' or 'no match' + * Multiple such rules can be created. + */ + +/** + * DOC: WPA/WPA2 EAPOL handshake offload + * + * By setting @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK flag drivers + * can indicate they support offloading EAPOL handshakes for WPA/WPA2 + * preshared key authentication. In %NL80211_CMD_CONNECT the preshared + * key should be specified using %NL80211_ATTR_PMK. Drivers supporting + * this offload may reject the %NL80211_CMD_CONNECT when no preshared + * key material is provided, for example when that driver does not + * support setting the temporal keys through %CMD_NEW_KEY. + * + * Similarly @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X flag can be + * set by drivers indicating offload support of the PTK/GTK EAPOL + * handshakes during 802.1X authentication. In order to use the offload + * the %NL80211_CMD_CONNECT should have %NL80211_ATTR_WANT_1X_4WAY_HS + * attribute flag. Drivers supporting this offload may reject the + * %NL80211_CMD_CONNECT when the attribute flag is not present. + * + * For 802.1X the PMK or PMK-R0 are set by providing %NL80211_ATTR_PMK + * using %NL80211_CMD_SET_PMK. For offloaded FT support also + * %NL80211_ATTR_PMKR0_NAME must be provided. + */ + +/** + * DOC: FILS shared key authentication offload + * + * FILS shared key authentication offload can be advertized by drivers by + * setting @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD flag. The drivers that support + * FILS shared key authentication offload should be able to construct the + * authentication and association frames for FILS shared key authentication and + * eventually do a key derivation as per IEEE 802.11ai. The below additional + * parameters should be given to driver in %NL80211_CMD_CONNECT and/or in + * %NL80211_CMD_UPDATE_CONNECT_PARAMS. + * %NL80211_ATTR_FILS_ERP_USERNAME - used to construct keyname_nai + * %NL80211_ATTR_FILS_ERP_REALM - used to construct keyname_nai + * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used to construct erp message + * %NL80211_ATTR_FILS_ERP_RRK - used to generate the rIK and rMSK + * rIK should be used to generate an authentication tag on the ERP message and + * rMSK should be used to derive a PMKSA. + * rIK, rMSK should be generated and keyname_nai, sequence number should be used + * as specified in IETF RFC 6696. + * + * When FILS shared key authentication is completed, driver needs to provide the + * below additional parameters to userspace, which can be either after setting + * up a connection or after roaming. + * %NL80211_ATTR_FILS_KEK - used for key renewal + * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used in further EAP-RP exchanges + * %NL80211_ATTR_PMKID - used to identify the PMKSA used/generated + * %Nl80211_ATTR_PMK - used to update PMKSA cache in userspace + * The PMKSA can be maintained in userspace persistently so that it can be used + * later after reboots or wifi turn off/on also. + * + * %NL80211_ATTR_FILS_CACHE_ID is the cache identifier advertized by a FILS + * capable AP supporting PMK caching. It specifies the scope within which the + * PMKSAs are cached in an ESS. %NL80211_CMD_SET_PMKSA and + * %NL80211_CMD_DEL_PMKSA are enhanced to allow support for PMKSA caching based + * on FILS cache identifier. Additionally %NL80211_ATTR_PMK is used with + * %NL80211_SET_PMKSA to specify the PMK corresponding to a PMKSA for driver to + * use in a FILS shared key connection with PMKSA caching. + */ + +/** + * DOC: SAE authentication offload + * + * By setting @NL80211_EXT_FEATURE_SAE_OFFLOAD flag drivers can indicate they + * support offloading SAE authentication for WPA3-Personal networks. In + * %NL80211_CMD_CONNECT the password for SAE should be specified using + * %NL80211_ATTR_SAE_PASSWORD. + */ + +/** + * enum nl80211_commands - supported nl80211 commands + * + * @NL80211_CMD_UNSPEC: unspecified command to catch errors + * + * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request + * to get a list of all present wiphys. + * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or + * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME, + * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ (and the + * attributes determining the channel width; this is used for setting + * monitor mode channel), %NL80211_ATTR_WIPHY_RETRY_SHORT, + * %NL80211_ATTR_WIPHY_RETRY_LONG, %NL80211_ATTR_WIPHY_FRAG_THRESHOLD, + * and/or %NL80211_ATTR_WIPHY_RTS_THRESHOLD. + * However, for setting the channel, see %NL80211_CMD_SET_CHANNEL + * instead, the support here is for backward compatibility only. + * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request + * or rename notification. Has attributes %NL80211_ATTR_WIPHY and + * %NL80211_ATTR_WIPHY_NAME. + * @NL80211_CMD_DEL_WIPHY: Wiphy deleted. Has attributes + * %NL80211_ATTR_WIPHY and %NL80211_ATTR_WIPHY_NAME. + * + * @NL80211_CMD_GET_INTERFACE: Request an interface's configuration; + * either a dump request for all interfaces or a specific get with a + * single %NL80211_ATTR_IFINDEX is supported. + * @NL80211_CMD_SET_INTERFACE: Set type of a virtual interface, requires + * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_IFTYPE. + * @NL80211_CMD_NEW_INTERFACE: Newly created virtual interface or response + * to %NL80211_CMD_GET_INTERFACE. Has %NL80211_ATTR_IFINDEX, + * %NL80211_ATTR_WIPHY and %NL80211_ATTR_IFTYPE attributes. Can also + * be sent from userspace to request creation of a new virtual interface, + * then requires attributes %NL80211_ATTR_WIPHY, %NL80211_ATTR_IFTYPE and + * %NL80211_ATTR_IFNAME. + * @NL80211_CMD_DEL_INTERFACE: Virtual interface was deleted, has attributes + * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_WIPHY. Can also be sent from + * userspace to request deletion of a virtual interface, then requires + * attribute %NL80211_ATTR_IFINDEX. + * + * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified + * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC. + * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT, + * %NL80211_ATTR_KEY_DEFAULT_MGMT, or %NL80211_ATTR_KEY_THRESHOLD. + * @NL80211_CMD_NEW_KEY: add a key with given %NL80211_ATTR_KEY_DATA, + * %NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC, %NL80211_ATTR_KEY_CIPHER, + * and %NL80211_ATTR_KEY_SEQ attributes. + * @NL80211_CMD_DEL_KEY: delete a key identified by %NL80211_ATTR_KEY_IDX + * or %NL80211_ATTR_MAC. + * + * @NL80211_CMD_GET_BEACON: (not used) + * @NL80211_CMD_SET_BEACON: change the beacon on an access point interface + * using the %NL80211_ATTR_BEACON_HEAD and %NL80211_ATTR_BEACON_TAIL + * attributes. For drivers that generate the beacon and probe responses + * internally, the following attributes must be provided: %NL80211_ATTR_IE, + * %NL80211_ATTR_IE_PROBE_RESP and %NL80211_ATTR_IE_ASSOC_RESP. + * @NL80211_CMD_START_AP: Start AP operation on an AP interface, parameters + * are like for %NL80211_CMD_SET_BEACON, and additionally parameters that + * do not change are used, these include %NL80211_ATTR_BEACON_INTERVAL, + * %NL80211_ATTR_DTIM_PERIOD, %NL80211_ATTR_SSID, + * %NL80211_ATTR_HIDDEN_SSID, %NL80211_ATTR_CIPHERS_PAIRWISE, + * %NL80211_ATTR_CIPHER_GROUP, %NL80211_ATTR_WPA_VERSIONS, + * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY, + * %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_INACTIVITY_TIMEOUT, + * %NL80211_ATTR_ACL_POLICY and %NL80211_ATTR_MAC_ADDRS. + * The channel to use can be set on the interface or be given using the + * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel width. + * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP + * @NL80211_CMD_STOP_AP: Stop AP operation on the given interface + * @NL80211_CMD_DEL_BEACON: old alias for %NL80211_CMD_STOP_AP + * + * @NL80211_CMD_GET_STATION: Get station attributes for station identified by + * %NL80211_ATTR_MAC on the interface identified by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_SET_STATION: Set station attributes for station identified by + * %NL80211_ATTR_MAC on the interface identified by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_NEW_STATION: Add a station with given attributes to the + * the interface identified by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_DEL_STATION: Remove a station identified by %NL80211_ATTR_MAC + * or, if no MAC address given, all stations, on the interface identified + * by %NL80211_ATTR_IFINDEX. %NL80211_ATTR_MGMT_SUBTYPE and + * %NL80211_ATTR_REASON_CODE can optionally be used to specify which type + * of disconnection indication should be sent to the station + * (Deauthentication or Disassociation frame and reason code for that + * frame). + * + * @NL80211_CMD_GET_MPATH: Get mesh path attributes for mesh path to + * destination %NL80211_ATTR_MAC on the interface identified by + * %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_SET_MPATH: Set mesh path attributes for mesh path to + * destination %NL80211_ATTR_MAC on the interface identified by + * %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_NEW_MPATH: Create a new mesh path for the destination given by + * %NL80211_ATTR_MAC via %NL80211_ATTR_MPATH_NEXT_HOP. + * @NL80211_CMD_DEL_MPATH: Delete a mesh path to the destination given by + * %NL80211_ATTR_MAC. + * @NL80211_CMD_NEW_PATH: Add a mesh path with given attributes to the + * the interface identified by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC + * or, if no MAC address given, all mesh paths, on the interface identified + * by %NL80211_ATTR_IFINDEX. + * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by + * %NL80211_ATTR_IFINDEX. + * + * @NL80211_CMD_GET_REG: ask the wireless core to send us its currently set + * regulatory domain. If %NL80211_ATTR_WIPHY is specified and the device + * has a private regulatory domain, it will be returned. Otherwise, the + * global regdomain will be returned. + * A device will have a private regulatory domain if it uses the + * regulatory_hint() API. Even when a private regdomain is used the channel + * information will still be mended according to further hints from + * the regulatory core to help with compliance. A dump version of this API + * is now available which will returns the global regdomain as well as + * all private regdomains of present wiphys (for those that have it). + * If a wiphy is self-managed (%NL80211_ATTR_WIPHY_SELF_MANAGED_REG), then + * its private regdomain is the only valid one for it. The regulatory + * core is not used to help with compliance in this case. + * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command + * after being queried by the kernel. CRDA replies by sending a regulatory + * domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our + * current alpha2 if it found a match. It also provides + * NL80211_ATTR_REG_RULE_FLAGS, and a set of regulatory rules. Each + * regulatory rule is a nested set of attributes given by + * %NL80211_ATTR_REG_RULE_FREQ_[START|END] and + * %NL80211_ATTR_FREQ_RANGE_MAX_BW with an attached power rule given by + * %NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and + * %NL80211_ATTR_REG_RULE_POWER_MAX_EIRP. + * @NL80211_CMD_REQ_SET_REG: ask the wireless core to set the regulatory domain + * to the specified ISO/IEC 3166-1 alpha2 country code. The core will + * store this as a valid request and then query userspace for it. + * + * @NL80211_CMD_GET_MESH_CONFIG: Get mesh networking properties for the + * interface identified by %NL80211_ATTR_IFINDEX + * + * @NL80211_CMD_SET_MESH_CONFIG: Set mesh networking properties for the + * interface identified by %NL80211_ATTR_IFINDEX + * + * @NL80211_CMD_SET_MGMT_EXTRA_IE: Set extra IEs for management frames. The + * interface is identified with %NL80211_ATTR_IFINDEX and the management + * frame subtype with %NL80211_ATTR_MGMT_SUBTYPE. The extra IE data to be + * added to the end of the specified management frame is specified with + * %NL80211_ATTR_IE. If the command succeeds, the requested data will be + * added to all specified management frames generated by + * kernel/firmware/driver. + * Note: This command has been removed and it is only reserved at this + * point to avoid re-using existing command number. The functionality this + * command was planned for has been provided with cleaner design with the + * option to specify additional IEs in NL80211_CMD_TRIGGER_SCAN, + * NL80211_CMD_AUTHENTICATE, NL80211_CMD_ASSOCIATE, + * NL80211_CMD_DEAUTHENTICATE, and NL80211_CMD_DISASSOCIATE. + * + * @NL80211_CMD_GET_SCAN: get scan results + * @NL80211_CMD_TRIGGER_SCAN: trigger a new scan with the given parameters + * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the + * probe requests at CCK rate or not. %NL80211_ATTR_BSSID can be used to + * specify a BSSID to scan for; if not included, the wildcard BSSID will + * be used. + * @NL80211_CMD_NEW_SCAN_RESULTS: scan notification (as a reply to + * NL80211_CMD_GET_SCAN and on the "scan" multicast group) + * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons, + * partial scan results may be available + * + * @NL80211_CMD_START_SCHED_SCAN: start a scheduled scan at certain + * intervals and certain number of cycles, as specified by + * %NL80211_ATTR_SCHED_SCAN_PLANS. If %NL80211_ATTR_SCHED_SCAN_PLANS is + * not specified and only %NL80211_ATTR_SCHED_SCAN_INTERVAL is specified, + * scheduled scan will run in an infinite loop with the specified interval. + * These attributes are mutually exculsive, + * i.e. NL80211_ATTR_SCHED_SCAN_INTERVAL must not be passed if + * NL80211_ATTR_SCHED_SCAN_PLANS is defined. + * If for some reason scheduled scan is aborted by the driver, all scan + * plans are canceled (including scan plans that did not start yet). + * Like with normal scans, if SSIDs (%NL80211_ATTR_SCAN_SSIDS) + * are passed, they are used in the probe requests. For + * broadcast, a broadcast SSID must be passed (ie. an empty + * string). If no SSID is passed, no probe requests are sent and + * a passive scan is performed. %NL80211_ATTR_SCAN_FREQUENCIES, + * if passed, define which channels should be scanned; if not + * passed, all channels allowed for the current regulatory domain + * are used. Extra IEs can also be passed from the userspace by + * using the %NL80211_ATTR_IE attribute. The first cycle of the + * scheduled scan can be delayed by %NL80211_ATTR_SCHED_SCAN_DELAY + * is supplied. If the device supports multiple concurrent scheduled + * scans, it will allow such when the caller provides the flag attribute + * %NL80211_ATTR_SCHED_SCAN_MULTI to indicate user-space support for it. + * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan. Returns -ENOENT if + * scheduled scan is not running. The caller may assume that as soon + * as the call returns, it is safe to start a new scheduled scan again. + * @NL80211_CMD_SCHED_SCAN_RESULTS: indicates that there are scheduled scan + * results available. + * @NL80211_CMD_SCHED_SCAN_STOPPED: indicates that the scheduled scan has + * stopped. The driver may issue this event at any time during a + * scheduled scan. One reason for stopping the scan is if the hardware + * does not support starting an association or a normal scan while running + * a scheduled scan. This event is also sent when the + * %NL80211_CMD_STOP_SCHED_SCAN command is received or when the interface + * is brought down while a scheduled scan was running. + * + * @NL80211_CMD_GET_SURVEY: get survey resuls, e.g. channel occupation + * or noise level + * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to + * NL80211_CMD_GET_SURVEY and on the "scan" multicast group) + * + * @NL80211_CMD_SET_PMKSA: Add a PMKSA cache entry using %NL80211_ATTR_MAC + * (for the BSSID), %NL80211_ATTR_PMKID, and optionally %NL80211_ATTR_PMK + * (PMK is used for PTKSA derivation in case of FILS shared key offload) or + * using %NL80211_ATTR_SSID, %NL80211_ATTR_FILS_CACHE_ID, + * %NL80211_ATTR_PMKID, and %NL80211_ATTR_PMK in case of FILS + * authentication where %NL80211_ATTR_FILS_CACHE_ID is the identifier + * advertized by a FILS capable AP identifying the scope of PMKSA in an + * ESS. + * @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC + * (for the BSSID) and %NL80211_ATTR_PMKID or using %NL80211_ATTR_SSID, + * %NL80211_ATTR_FILS_CACHE_ID, and %NL80211_ATTR_PMKID in case of FILS + * authentication. + * @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries. + * + * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain + * has been changed and provides details of the request information + * that caused the change such as who initiated the regulatory request + * (%NL80211_ATTR_REG_INITIATOR), the wiphy_idx + * (%NL80211_ATTR_REG_ALPHA2) on which the request was made from if + * the initiator was %NL80211_REGDOM_SET_BY_COUNTRY_IE or + * %NL80211_REGDOM_SET_BY_DRIVER, the type of regulatory domain + * set (%NL80211_ATTR_REG_TYPE), if the type of regulatory domain is + * %NL80211_REG_TYPE_COUNTRY the alpha2 to which we have moved on + * to (%NL80211_ATTR_REG_ALPHA2). + * @NL80211_CMD_REG_BEACON_HINT: indicates to userspace that an AP beacon + * has been found while world roaming thus enabling active scan or + * any mode of operation that initiates TX (beacons) on a channel + * where we would not have been able to do either before. As an example + * if you are world roaming (regulatory domain set to world or if your + * driver is using a custom world roaming regulatory domain) and while + * doing a passive scan on the 5 GHz band you find an AP there (if not + * on a DFS channel) you will now be able to actively scan for that AP + * or use AP mode on your card on that same channel. Note that this will + * never be used for channels 1-11 on the 2 GHz band as they are always + * enabled world wide. This beacon hint is only sent if your device had + * either disabled active scanning or beaconing on a channel. We send to + * userspace the wiphy on which we removed a restriction from + * (%NL80211_ATTR_WIPHY) and the channel on which this occurred + * before (%NL80211_ATTR_FREQ_BEFORE) and after (%NL80211_ATTR_FREQ_AFTER) + * the beacon hint was processed. + * + * @NL80211_CMD_AUTHENTICATE: authentication request and notification. + * This command is used both as a command (request to authenticate) and + * as an event on the "mlme" multicast group indicating completion of the + * authentication process. + * When used as a command, %NL80211_ATTR_IFINDEX is used to identify the + * interface. %NL80211_ATTR_MAC is used to specify PeerSTAAddress (and + * BSSID in case of station mode). %NL80211_ATTR_SSID is used to specify + * the SSID (mainly for association, but is included in authentication + * request, too, to help BSS selection. %NL80211_ATTR_WIPHY_FREQ is used + * to specify the frequence of the channel in MHz. %NL80211_ATTR_AUTH_TYPE + * is used to specify the authentication type. %NL80211_ATTR_IE is used to + * define IEs (VendorSpecificInfo, but also including RSN IE and FT IEs) + * to be added to the frame. + * When used as an event, this reports reception of an Authentication + * frame in station and IBSS modes when the local MLME processed the + * frame, i.e., it was for the local STA and was received in correct + * state. This is similar to MLME-AUTHENTICATE.confirm primitive in the + * MLME SAP interface (kernel providing MLME, userspace SME). The + * included %NL80211_ATTR_FRAME attribute contains the management frame + * (including both the header and frame body, but not FCS). This event is + * also used to indicate if the authentication attempt timed out. In that + * case the %NL80211_ATTR_FRAME attribute is replaced with a + * %NL80211_ATTR_TIMED_OUT flag (and %NL80211_ATTR_MAC to indicate which + * pending authentication timed out). + * @NL80211_CMD_ASSOCIATE: association request and notification; like + * NL80211_CMD_AUTHENTICATE but for Association and Reassociation + * (similar to MLME-ASSOCIATE.request, MLME-REASSOCIATE.request, + * MLME-ASSOCIATE.confirm or MLME-REASSOCIATE.confirm primitives). The + * %NL80211_ATTR_PREV_BSSID attribute is used to specify whether the + * request is for the initial association to an ESS (that attribute not + * included) or for reassociation within the ESS (that attribute is + * included). + * @NL80211_CMD_DEAUTHENTICATE: deauthentication request and notification; like + * NL80211_CMD_AUTHENTICATE but for Deauthentication frames (similar to + * MLME-DEAUTHENTICATION.request and MLME-DEAUTHENTICATE.indication + * primitives). + * @NL80211_CMD_DISASSOCIATE: disassociation request and notification; like + * NL80211_CMD_AUTHENTICATE but for Disassociation frames (similar to + * MLME-DISASSOCIATE.request and MLME-DISASSOCIATE.indication primitives). + * + * @NL80211_CMD_MICHAEL_MIC_FAILURE: notification of a locally detected Michael + * MIC (part of TKIP) failure; sent on the "mlme" multicast group; the + * event includes %NL80211_ATTR_MAC to describe the source MAC address of + * the frame with invalid MIC, %NL80211_ATTR_KEY_TYPE to show the key + * type, %NL80211_ATTR_KEY_IDX to indicate the key identifier, and + * %NL80211_ATTR_KEY_SEQ to indicate the TSC value of the frame; this + * event matches with MLME-MICHAELMICFAILURE.indication() primitive + * + * @NL80211_CMD_JOIN_IBSS: Join a new IBSS -- given at least an SSID and a + * FREQ attribute (for the initial frequency if no peer can be found) + * and optionally a MAC (as BSSID) and FREQ_FIXED attribute if those + * should be fixed rather than automatically determined. Can only be + * executed on a network interface that is UP, and fixed BSSID/FREQ + * may be rejected. Another optional parameter is the beacon interval, + * given in the %NL80211_ATTR_BEACON_INTERVAL attribute, which if not + * given defaults to 100 TU (102.4ms). + * @NL80211_CMD_LEAVE_IBSS: Leave the IBSS -- no special arguments, the IBSS is + * determined by the network interface. + * + * @NL80211_CMD_TESTMODE: testmode command, takes a wiphy (or ifindex) attribute + * to identify the device, and the TESTDATA blob attribute to pass through + * to the driver. + * + * @NL80211_CMD_CONNECT: connection request and notification; this command + * requests to connect to a specified network but without separating + * auth and assoc steps. For this, you need to specify the SSID in a + * %NL80211_ATTR_SSID attribute, and can optionally specify the association + * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP, + * %NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + * %NL80211_ATTR_CONTROL_PORT_OVER_NL80211, %NL80211_ATTR_MAC_HINT, and + * %NL80211_ATTR_WIPHY_FREQ_HINT. + * If included, %NL80211_ATTR_MAC and %NL80211_ATTR_WIPHY_FREQ are + * restrictions on BSS selection, i.e., they effectively prevent roaming + * within the ESS. %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT + * can be included to provide a recommendation of the initial BSS while + * allowing the driver to roam to other BSSes within the ESS and also to + * ignore this recommendation if the indicated BSS is not ideal. Only one + * set of BSSID,frequency parameters is used (i.e., either the enforcing + * %NL80211_ATTR_MAC,%NL80211_ATTR_WIPHY_FREQ or the less strict + * %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT). + * %NL80211_ATTR_PREV_BSSID can be used to request a reassociation within + * the ESS in case the device is already associated and an association with + * a different BSS is desired. + * Background scan period can optionally be + * specified in %NL80211_ATTR_BG_SCAN_PERIOD, + * if not specified default background scan configuration + * in driver is used and if period value is 0, bg scan will be disabled. + * This attribute is ignored if driver does not support roam scan. + * It is also sent as an event, with the BSSID and response IEs when the + * connection is established or failed to be established. This can be + * determined by the %NL80211_ATTR_STATUS_CODE attribute (0 = success, + * non-zero = failure). If %NL80211_ATTR_TIMED_OUT is included in the + * event, the connection attempt failed due to not being able to initiate + * authentication/association or not receiving a response from the AP. + * Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as + * well to remain backwards compatible. + * When establishing a security association, drivers that support 4 way + * handshake offload should send %NL80211_CMD_PORT_AUTHORIZED event when + * the 4 way handshake is completed successfully. + * @NL80211_CMD_ROAM: Notification indicating the card/driver roamed by itself. + * When a security association was established with the new AP (e.g. if + * the FT protocol was used for roaming or the driver completed the 4 way + * handshake), this event should be followed by an + * %NL80211_CMD_PORT_AUTHORIZED event. + * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify + * userspace that a connection was dropped by the AP or due to other + * reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and + * %NL80211_ATTR_REASON_CODE attributes are used. + * + * @NL80211_CMD_SET_WIPHY_NETNS: Set a wiphy's netns. Note that all devices + * associated with this wiphy must be down and will follow. + * + * @NL80211_CMD_REMAIN_ON_CHANNEL: Request to remain awake on the specified + * channel for the specified amount of time. This can be used to do + * off-channel operations like transmit a Public Action frame and wait for + * a response while being associated to an AP on another channel. + * %NL80211_ATTR_IFINDEX is used to specify which interface (and thus + * radio) is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the + * frequency for the operation. + * %NL80211_ATTR_DURATION is used to specify the duration in milliseconds + * to remain on the channel. This command is also used as an event to + * notify when the requested duration starts (it may take a while for the + * driver to schedule this time due to other concurrent needs for the + * radio). + * When called, this operation returns a cookie (%NL80211_ATTR_COOKIE) + * that will be included with any events pertaining to this request; + * the cookie is also used to cancel the request. + * @NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL: This command can be used to cancel a + * pending remain-on-channel duration if the desired operation has been + * completed prior to expiration of the originally requested duration. + * %NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify the + * radio. The %NL80211_ATTR_COOKIE attribute must be given as well to + * uniquely identify the request. + * This command is also used as an event to notify when a requested + * remain-on-channel duration has expired. + * + * @NL80211_CMD_SET_TX_BITRATE_MASK: Set the mask of rates to be used in TX + * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface + * and @NL80211_ATTR_TX_RATES the set of allowed rates. + * + * @NL80211_CMD_REGISTER_FRAME: Register for receiving certain mgmt frames + * (via @NL80211_CMD_FRAME) for processing in userspace. This command + * requires an interface index, a frame type attribute (optional for + * backward compatibility reasons, if not given assumes action frames) + * and a match attribute containing the first few bytes of the frame + * that should match, e.g. a single byte for only a category match or + * four bytes for vendor frames including the OUI. The registration + * cannot be dropped, but is removed automatically when the netlink + * socket is closed. Multiple registrations can be made. + * @NL80211_CMD_REGISTER_ACTION: Alias for @NL80211_CMD_REGISTER_FRAME for + * backward compatibility + * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This + * command is used both as a request to transmit a management frame and + * as an event indicating reception of a frame that was not processed in + * kernel code, but is for us (i.e., which may need to be processed in a + * user space application). %NL80211_ATTR_FRAME is used to specify the + * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ is used + * to indicate on which channel the frame is to be transmitted or was + * received. If this channel is not the current channel (remain-on-channel + * or the operational channel) the device will switch to the given channel + * and transmit the frame, optionally waiting for a response for the time + * specified using %NL80211_ATTR_DURATION. When called, this operation + * returns a cookie (%NL80211_ATTR_COOKIE) that will be included with the + * TX status event pertaining to the TX request. + * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the + * management frames at CCK rate or not in 2GHz band. + * %NL80211_ATTR_CSA_C_OFFSETS_TX is an array of offsets to CSA + * counters which will be updated to the current value. This attribute + * is used during CSA period. + * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this + * command may be used with the corresponding cookie to cancel the wait + * time if it is known that it is no longer necessary. This command is + * also sent as an event whenever the driver has completed the off-channel + * wait time. + * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility. + * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame + * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies + * the TX command and %NL80211_ATTR_FRAME includes the contents of the + * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged + * the frame. + * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for + * backward compatibility. + * + * @NL80211_CMD_SET_POWER_SAVE: Set powersave, using %NL80211_ATTR_PS_STATE + * @NL80211_CMD_GET_POWER_SAVE: Get powersave status in %NL80211_ATTR_PS_STATE + * + * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command + * is used to configure connection quality monitoring notification trigger + * levels. + * @NL80211_CMD_NOTIFY_CQM: Connection quality monitor notification. This + * command is used as an event to indicate the that a trigger level was + * reached. + * @NL80211_CMD_SET_CHANNEL: Set the channel (using %NL80211_ATTR_WIPHY_FREQ + * and the attributes determining channel width) the given interface + * (identifed by %NL80211_ATTR_IFINDEX) shall operate on. + * In case multiple channels are supported by the device, the mechanism + * with which it switches channels is implementation-defined. + * When a monitor interface is given, it can only switch channel while + * no other interfaces are operating to avoid disturbing the operation + * of any other interfaces, and other interfaces will again take + * precedence when they are used. + * + * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface. + * + * @NL80211_CMD_SET_MULTICAST_TO_UNICAST: Configure if this AP should perform + * multicast to unicast conversion. When enabled, all multicast packets + * with ethertype ARP, IPv4 or IPv6 (possibly within an 802.1Q header) + * will be sent out to each station once with the destination (multicast) + * MAC address replaced by the station's MAC address. Note that this may + * break certain expectations of the receiver, e.g. the ability to drop + * unicast IP packets encapsulated in multicast L2 frames, or the ability + * to not send destination unreachable messages in such cases. + * This can only be toggled per BSS. Configure this on an interface of + * type %NL80211_IFTYPE_AP. It applies to all its VLAN interfaces + * (%NL80211_IFTYPE_AP_VLAN), except for those in 4addr (WDS) mode. + * If %NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED is not present with this + * command, the feature is disabled. + * + * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial + * mesh config parameters may be given. + * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the + * network is determined by the network interface. + * + * @NL80211_CMD_UNPROT_DEAUTHENTICATE: Unprotected deauthentication frame + * notification. This event is used to indicate that an unprotected + * deauthentication frame was dropped when MFP is in use. + * @NL80211_CMD_UNPROT_DISASSOCIATE: Unprotected disassociation frame + * notification. This event is used to indicate that an unprotected + * disassociation frame was dropped when MFP is in use. + * + * @NL80211_CMD_NEW_PEER_CANDIDATE: Notification on the reception of a + * beacon or probe response from a compatible mesh peer. This is only + * sent while no station information (sta_info) exists for the new peer + * candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH, + * @NL80211_MESH_SETUP_USERSPACE_AMPE, or + * @NL80211_MESH_SETUP_USERSPACE_MPM is set. On reception of this + * notification, userspace may decide to create a new station + * (@NL80211_CMD_NEW_STATION). To stop this notification from + * reoccurring, the userspace authentication daemon may want to create the + * new station with the AUTHENTICATED flag unset and maybe change it later + * depending on the authentication result. + * + * @NL80211_CMD_GET_WOWLAN: get Wake-on-Wireless-LAN (WoWLAN) settings. + * @NL80211_CMD_SET_WOWLAN: set Wake-on-Wireless-LAN (WoWLAN) settings. + * Since wireless is more complex than wired ethernet, it supports + * various triggers. These triggers can be configured through this + * command with the %NL80211_ATTR_WOWLAN_TRIGGERS attribute. For + * more background information, see + * http://wireless.kernel.org/en/users/Documentation/WoWLAN. + * The @NL80211_CMD_SET_WOWLAN command can also be used as a notification + * from the driver reporting the wakeup reason. In this case, the + * @NL80211_ATTR_WOWLAN_TRIGGERS attribute will contain the reason + * for the wakeup, if it was caused by wireless. If it is not present + * in the wakeup notification, the wireless device didn't cause the + * wakeup but reports that it was woken up. + * + * @NL80211_CMD_SET_REKEY_OFFLOAD: This command is used give the driver + * the necessary information for supporting GTK rekey offload. This + * feature is typically used during WoWLAN. The configuration data + * is contained in %NL80211_ATTR_REKEY_DATA (which is nested and + * contains the data in sub-attributes). After rekeying happened, + * this command may also be sent by the driver as an MLME event to + * inform userspace of the new replay counter. + * + * @NL80211_CMD_PMKSA_CANDIDATE: This is used as an event to inform userspace + * of PMKSA caching dandidates. + * + * @NL80211_CMD_TDLS_OPER: Perform a high-level TDLS command (e.g. link setup). + * In addition, this can be used as an event to request userspace to take + * actions on TDLS links (set up a new link or tear down an existing one). + * In such events, %NL80211_ATTR_TDLS_OPERATION indicates the requested + * operation, %NL80211_ATTR_MAC contains the peer MAC address, and + * %NL80211_ATTR_REASON_CODE the reason code to be used (only with + * %NL80211_TDLS_TEARDOWN). + * @NL80211_CMD_TDLS_MGMT: Send a TDLS management frame. The + * %NL80211_ATTR_TDLS_ACTION attribute determines the type of frame to be + * sent. Public Action codes (802.11-2012 8.1.5.1) will be sent as + * 802.11 management frames, while TDLS action codes (802.11-2012 + * 8.5.13.1) will be encapsulated and sent as data frames. The currently + * supported Public Action code is %WLAN_PUB_ACTION_TDLS_DISCOVER_RES + * and the currently supported TDLS actions codes are given in + * &enum ieee80211_tdls_actioncode. + * + * @NL80211_CMD_UNEXPECTED_FRAME: Used by an application controlling an AP + * (or GO) interface (i.e. hostapd) to ask for unexpected frames to + * implement sending deauth to stations that send unexpected class 3 + * frames. Also used as the event sent by the kernel when such a frame + * is received. + * For the event, the %NL80211_ATTR_MAC attribute carries the TA and + * other attributes like the interface index are present. + * If used as the command it must have an interface index and you can + * only unsubscribe from the event by closing the socket. Subscription + * is also for %NL80211_CMD_UNEXPECTED_4ADDR_FRAME events. + * + * @NL80211_CMD_UNEXPECTED_4ADDR_FRAME: Sent as an event indicating that the + * associated station identified by %NL80211_ATTR_MAC sent a 4addr frame + * and wasn't already in a 4-addr VLAN. The event will be sent similarly + * to the %NL80211_CMD_UNEXPECTED_FRAME event, to the same listener. + * + * @NL80211_CMD_PROBE_CLIENT: Probe an associated station on an AP interface + * by sending a null data frame to it and reporting when the frame is + * acknowleged. This is used to allow timing out inactive clients. Uses + * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_MAC. The command returns a + * direct reply with an %NL80211_ATTR_COOKIE that is later used to match + * up the event with the request. The event includes the same data and + * has %NL80211_ATTR_ACK set if the frame was ACKed. + * + * @NL80211_CMD_REGISTER_BEACONS: Register this socket to receive beacons from + * other BSSes when any interfaces are in AP mode. This helps implement + * OLBC handling in hostapd. Beacons are reported in %NL80211_CMD_FRAME + * messages. Note that per PHY only one application may register. + * + * @NL80211_CMD_SET_NOACK_MAP: sets a bitmap for the individual TIDs whether + * No Acknowledgement Policy should be applied. + * + * @NL80211_CMD_CH_SWITCH_NOTIFY: An AP or GO may decide to switch channels + * independently of the userspace SME, send this event indicating + * %NL80211_ATTR_IFINDEX is now on %NL80211_ATTR_WIPHY_FREQ and the + * attributes determining channel width. This indication may also be + * sent when a remotely-initiated switch (e.g., when a STA receives a CSA + * from the remote AP) is completed; + * + * @NL80211_CMD_CH_SWITCH_STARTED_NOTIFY: Notify that a channel switch + * has been started on an interface, regardless of the initiator + * (ie. whether it was requested from a remote device or + * initiated on our own). It indicates that + * %NL80211_ATTR_IFINDEX will be on %NL80211_ATTR_WIPHY_FREQ + * after %NL80211_ATTR_CH_SWITCH_COUNT TBTT's. The userspace may + * decide to react to this indication by requesting other + * interfaces to change channel as well. + * + * @NL80211_CMD_START_P2P_DEVICE: Start the given P2P Device, identified by + * its %NL80211_ATTR_WDEV identifier. It must have been created with + * %NL80211_CMD_NEW_INTERFACE previously. After it has been started, the + * P2P Device can be used for P2P operations, e.g. remain-on-channel and + * public action frame TX. + * @NL80211_CMD_STOP_P2P_DEVICE: Stop the given P2P Device, identified by + * its %NL80211_ATTR_WDEV identifier. + * + * @NL80211_CMD_CONN_FAILED: connection request to an AP failed; used to + * notify userspace that AP has rejected the connection request from a + * station, due to particular reason. %NL80211_ATTR_CONN_FAILED_REASON + * is used for this. + * + * @NL80211_CMD_SET_MCAST_RATE: Change the rate used to send multicast frames + * for IBSS or MESH vif. + * + * @NL80211_CMD_SET_MAC_ACL: sets ACL for MAC address based access control. + * This is to be used with the drivers advertising the support of MAC + * address based access control. List of MAC addresses is passed in + * %NL80211_ATTR_MAC_ADDRS and ACL policy is passed in + * %NL80211_ATTR_ACL_POLICY. Driver will enable ACL with this list, if it + * is not already done. The new list will replace any existing list. Driver + * will clear its ACL when the list of MAC addresses passed is empty. This + * command is used in AP/P2P GO mode. Driver has to make sure to clear its + * ACL list during %NL80211_CMD_STOP_AP. + * + * @NL80211_CMD_RADAR_DETECT: Start a Channel availability check (CAC). Once + * a radar is detected or the channel availability scan (CAC) has finished + * or was aborted, or a radar was detected, usermode will be notified with + * this event. This command is also used to notify userspace about radars + * while operating on this channel. + * %NL80211_ATTR_RADAR_EVENT is used to inform about the type of the + * event. + * + * @NL80211_CMD_GET_PROTOCOL_FEATURES: Get global nl80211 protocol features, + * i.e. features for the nl80211 protocol rather than device features. + * Returns the features in the %NL80211_ATTR_PROTOCOL_FEATURES bitmap. + * + * @NL80211_CMD_UPDATE_FT_IES: Pass down the most up-to-date Fast Transition + * Information Element to the WLAN driver + * + * @NL80211_CMD_FT_EVENT: Send a Fast transition event from the WLAN driver + * to the supplicant. This will carry the target AP's MAC address along + * with the relevant Information Elements. This event is used to report + * received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE). + * + * @NL80211_CMD_CRIT_PROTOCOL_START: Indicates user-space will start running + * a critical protocol that needs more reliability in the connection to + * complete. + * + * @NL80211_CMD_CRIT_PROTOCOL_STOP: Indicates the connection reliability can + * return back to normal. + * + * @NL80211_CMD_GET_COALESCE: Get currently supported coalesce rules. + * @NL80211_CMD_SET_COALESCE: Configure coalesce rules or clear existing rules. + * + * @NL80211_CMD_CHANNEL_SWITCH: Perform a channel switch by announcing the + * the new channel information (Channel Switch Announcement - CSA) + * in the beacon for some time (as defined in the + * %NL80211_ATTR_CH_SWITCH_COUNT parameter) and then change to the + * new channel. Userspace provides the new channel information (using + * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel + * width). %NL80211_ATTR_CH_SWITCH_BLOCK_TX may be supplied to inform + * other station that transmission must be blocked until the channel + * switch is complete. + * + * @NL80211_CMD_VENDOR: Vendor-specified command/event. The command is specified + * by the %NL80211_ATTR_VENDOR_ID attribute and a sub-command in + * %NL80211_ATTR_VENDOR_SUBCMD. Parameter(s) can be transported in + * %NL80211_ATTR_VENDOR_DATA. + * For feature advertisement, the %NL80211_ATTR_VENDOR_DATA attribute is + * used in the wiphy data as a nested attribute containing descriptions + * (&struct nl80211_vendor_cmd_info) of the supported vendor commands. + * This may also be sent as an event with the same attributes. + * + * @NL80211_CMD_SET_QOS_MAP: Set Interworking QoS mapping for IP DSCP values. + * The QoS mapping information is included in %NL80211_ATTR_QOS_MAP. If + * that attribute is not included, QoS mapping is disabled. Since this + * QoS mapping is relevant for IP packets, it is only valid during an + * association. This is cleared on disassociation and AP restart. + * + * @NL80211_CMD_ADD_TX_TS: Ask the kernel to add a traffic stream for the given + * %NL80211_ATTR_TSID and %NL80211_ATTR_MAC with %NL80211_ATTR_USER_PRIO + * and %NL80211_ATTR_ADMITTED_TIME parameters. + * Note that the action frame handshake with the AP shall be handled by + * userspace via the normal management RX/TX framework, this only sets + * up the TX TS in the driver/device. + * If the admitted time attribute is not added then the request just checks + * if a subsequent setup could be successful, the intent is to use this to + * avoid setting up a session with the AP when local restrictions would + * make that impossible. However, the subsequent "real" setup may still + * fail even if the check was successful. + * @NL80211_CMD_DEL_TX_TS: Remove an existing TS with the %NL80211_ATTR_TSID + * and %NL80211_ATTR_MAC parameters. It isn't necessary to call this + * before removing a station entry entirely, or before disassociating + * or similar, cleanup will happen in the driver/device in this case. + * + * @NL80211_CMD_GET_MPP: Get mesh path attributes for mesh proxy path to + * destination %NL80211_ATTR_MAC on the interface identified by + * %NL80211_ATTR_IFINDEX. + * + * @NL80211_CMD_JOIN_OCB: Join the OCB network. The center frequency and + * bandwidth of a channel must be given. + * @NL80211_CMD_LEAVE_OCB: Leave the OCB network -- no special arguments, the + * network is determined by the network interface. + * + * @NL80211_CMD_TDLS_CHANNEL_SWITCH: Start channel-switching with a TDLS peer, + * identified by the %NL80211_ATTR_MAC parameter. A target channel is + * provided via %NL80211_ATTR_WIPHY_FREQ and other attributes determining + * channel width/type. The target operating class is given via + * %NL80211_ATTR_OPER_CLASS. + * The driver is responsible for continually initiating channel-switching + * operations and returning to the base channel for communication with the + * AP. + * @NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH: Stop channel-switching with a TDLS + * peer given by %NL80211_ATTR_MAC. Both peers must be on the base channel + * when this command completes. + * + * @NL80211_CMD_WIPHY_REG_CHANGE: Similar to %NL80211_CMD_REG_CHANGE, but used + * as an event to indicate changes for devices with wiphy-specific regdom + * management. + * + * @NL80211_CMD_ABORT_SCAN: Stop an ongoing scan. Returns -ENOENT if a scan is + * not running. The driver indicates the status of the scan through + * cfg80211_scan_done(). + * + * @NL80211_CMD_START_NAN: Start NAN operation, identified by its + * %NL80211_ATTR_WDEV interface. This interface must have been + * previously created with %NL80211_CMD_NEW_INTERFACE. After it + * has been started, the NAN interface will create or join a + * cluster. This command must have a valid + * %NL80211_ATTR_NAN_MASTER_PREF attribute and optional + * %NL80211_ATTR_BANDS attributes. If %NL80211_ATTR_BANDS is + * omitted or set to 0, it means don't-care and the device will + * decide what to use. After this command NAN functions can be + * added. + * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by + * its %NL80211_ATTR_WDEV interface. + * @NL80211_CMD_ADD_NAN_FUNCTION: Add a NAN function. The function is defined + * with %NL80211_ATTR_NAN_FUNC nested attribute. When called, this + * operation returns the strictly positive and unique instance id + * (%NL80211_ATTR_NAN_FUNC_INST_ID) and a cookie (%NL80211_ATTR_COOKIE) + * of the function upon success. + * Since instance ID's can be re-used, this cookie is the right + * way to identify the function. This will avoid races when a termination + * event is handled by the user space after it has already added a new + * function that got the same instance id from the kernel as the one + * which just terminated. + * This cookie may be used in NAN events even before the command + * returns, so userspace shouldn't process NAN events until it processes + * the response to this command. + * Look at %NL80211_ATTR_SOCKET_OWNER as well. + * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie. + * This command is also used as a notification sent when a NAN function is + * terminated. This will contain a %NL80211_ATTR_NAN_FUNC_INST_ID + * and %NL80211_ATTR_COOKIE attributes. + * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN + * configuration. NAN must be operational (%NL80211_CMD_START_NAN + * was executed). It must contain at least one of the following + * attributes: %NL80211_ATTR_NAN_MASTER_PREF, + * %NL80211_ATTR_BANDS. If %NL80211_ATTR_BANDS is omitted, the + * current configuration is not changed. If it is present but + * set to zero, the configuration is changed to don't-care + * (i.e. the device can decide what to do). + * @NL80211_CMD_NAN_FUNC_MATCH: Notification sent when a match is reported. + * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and + * %NL80211_ATTR_COOKIE. + * + * @NL80211_CMD_UPDATE_CONNECT_PARAMS: Update one or more connect parameters + * for subsequent roaming cases if the driver or firmware uses internal + * BSS selection. This command can be issued only while connected and it + * does not result in a change for the current association. Currently, + * only the %NL80211_ATTR_IE data is used and updated with this command. + * + * @NL80211_CMD_SET_PMK: For offloaded 4-Way handshake, set the PMK or PMK-R0 + * for the given authenticator address (specified with %NL80211_ATTR_MAC). + * When %NL80211_ATTR_PMKR0_NAME is set, %NL80211_ATTR_PMK specifies the + * PMK-R0, otherwise it specifies the PMK. + * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously + * configured PMK for the authenticator address identified by + * %NL80211_ATTR_MAC. + * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way + * handshake was completed successfully by the driver. The BSSID is + * specified with %NL80211_ATTR_MAC. Drivers that support 4 way handshake + * offload should send this event after indicating 802.11 association with + * %NL80211_CMD_CONNECT or %NL80211_CMD_ROAM. If the 4 way handshake failed + * %NL80211_CMD_DISCONNECT should be indicated instead. + * + * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request + * and RX notification. This command is used both as a request to transmit + * a control port frame and as a notification that a control port frame + * has been received. %NL80211_ATTR_FRAME is used to specify the + * frame contents. The frame is the raw EAPoL data, without ethernet or + * 802.11 headers. + * When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added + * indicating the protocol type of the received frame; whether the frame + * was received unencrypted and the MAC address of the peer respectively. + * + * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded. + * + * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host + * drivers that do not define separate commands for authentication and + * association, but rely on user space for the authentication to happen. + * This interface acts both as the event request (driver to user space) + * to trigger the authentication and command response (userspace to + * driver) to indicate the authentication status. + * + * User space uses the %NL80211_CMD_CONNECT command to the host driver to + * trigger a connection. The host driver selects a BSS and further uses + * this interface to offload only the authentication part to the user + * space. Authentication frames are passed between the driver and user + * space through the %NL80211_CMD_FRAME interface. Host driver proceeds + * further with the association after getting successful authentication + * status. User space indicates the authentication status through + * %NL80211_ATTR_STATUS_CODE attribute in %NL80211_CMD_EXTERNAL_AUTH + * command interface. + * + * Host driver reports this status on an authentication failure to the + * user space through the connect result as the user space would have + * initiated the connection through the connect request. + * + * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's + * ht opmode or vht opmode changes using any of %NL80211_ATTR_SMPS_MODE, + * %NL80211_ATTR_CHANNEL_WIDTH,%NL80211_ATTR_NSS attributes with its + * address(specified in %NL80211_ATTR_MAC). + * + * @NL80211_CMD_GET_FTM_RESPONDER_STATS: Retrieve FTM responder statistics, in + * the %NL80211_ATTR_FTM_RESPONDER_STATS attribute. + * + * @NL80211_CMD_PEER_MEASUREMENT_START: start a (set of) peer measurement(s) + * with the given parameters, which are encapsulated in the nested + * %NL80211_ATTR_PEER_MEASUREMENTS attribute. Optionally, MAC address + * randomization may be enabled and configured by specifying the + * %NL80211_ATTR_MAC and %NL80211_ATTR_MAC_MASK attributes. + * If a timeout is requested, use the %NL80211_ATTR_TIMEOUT attribute. + * A u64 cookie for further %NL80211_ATTR_COOKIE use is is returned in + * the netlink extended ack message. + * + * To cancel a measurement, close the socket that requested it. + * + * Measurement results are reported to the socket that requested the + * measurement using @NL80211_CMD_PEER_MEASUREMENT_RESULT when they + * become available, so applications must ensure a large enough socket + * buffer size. + * + * Depending on driver support it may or may not be possible to start + * multiple concurrent measurements. + * @NL80211_CMD_PEER_MEASUREMENT_RESULT: This command number is used for the + * result notification from the driver to the requesting socket. + * @NL80211_CMD_PEER_MEASUREMENT_COMPLETE: Notification only, indicating that + * the measurement completed, using the measurement cookie + * (%NL80211_ATTR_COOKIE). + * + * @NL80211_CMD_NOTIFY_RADAR: Notify the kernel that a radar signal was + * detected and reported by a neighboring device on the channel + * indicated by %NL80211_ATTR_WIPHY_FREQ and other attributes + * determining the width and type. + * + * @NL80211_CMD_UPDATE_OWE_INFO: This interface allows the host driver to + * offload OWE processing to user space. This intends to support + * OWE AKM by the host drivers that implement SME but rely + * on the user space for the cryptographic/DH IE processing in AP mode. + * + * @NL80211_CMD_PROBE_MESH_LINK: The requirement for mesh link metric + * refreshing, is that from one mesh point we be able to send some data + * frames to other mesh points which are not currently selected as a + * primary traffic path, but which are only 1 hop away. The absence of + * the primary path to the chosen node makes it necessary to apply some + * form of marking on a chosen packet stream so that the packets can be + * properly steered to the selected node for testing, and not by the + * regular mesh path lookup. Further, the packets must be of type data + * so that the rate control (often embedded in firmware) is used for + * rate selection. + * + * Here attribute %NL80211_ATTR_MAC is used to specify connected mesh + * peer MAC address and %NL80211_ATTR_FRAME is used to specify the frame + * content. The frame is ethernet data. + * + * @NL80211_CMD_MAX: highest used command number + * @__NL80211_CMD_AFTER_LAST: internal use + */ +enum nl80211_commands { +/* don't change the order or add anything between, this is ABI! */ + NL80211_CMD_UNSPEC, + + NL80211_CMD_GET_WIPHY, /* can dump */ + NL80211_CMD_SET_WIPHY, + NL80211_CMD_NEW_WIPHY, + NL80211_CMD_DEL_WIPHY, + + NL80211_CMD_GET_INTERFACE, /* can dump */ + NL80211_CMD_SET_INTERFACE, + NL80211_CMD_NEW_INTERFACE, + NL80211_CMD_DEL_INTERFACE, + + NL80211_CMD_GET_KEY, + NL80211_CMD_SET_KEY, + NL80211_CMD_NEW_KEY, + NL80211_CMD_DEL_KEY, + + NL80211_CMD_GET_BEACON, + NL80211_CMD_SET_BEACON, + NL80211_CMD_START_AP, + NL80211_CMD_NEW_BEACON = NL80211_CMD_START_AP, + NL80211_CMD_STOP_AP, + NL80211_CMD_DEL_BEACON = NL80211_CMD_STOP_AP, + + NL80211_CMD_GET_STATION, + NL80211_CMD_SET_STATION, + NL80211_CMD_NEW_STATION, + NL80211_CMD_DEL_STATION, + + NL80211_CMD_GET_MPATH, + NL80211_CMD_SET_MPATH, + NL80211_CMD_NEW_MPATH, + NL80211_CMD_DEL_MPATH, + + NL80211_CMD_SET_BSS, + + NL80211_CMD_SET_REG, + NL80211_CMD_REQ_SET_REG, + + NL80211_CMD_GET_MESH_CONFIG, + NL80211_CMD_SET_MESH_CONFIG, + + NL80211_CMD_SET_MGMT_EXTRA_IE /* reserved; not used */, + + NL80211_CMD_GET_REG, + + NL80211_CMD_GET_SCAN, + NL80211_CMD_TRIGGER_SCAN, + NL80211_CMD_NEW_SCAN_RESULTS, + NL80211_CMD_SCAN_ABORTED, + + NL80211_CMD_REG_CHANGE, + + NL80211_CMD_AUTHENTICATE, + NL80211_CMD_ASSOCIATE, + NL80211_CMD_DEAUTHENTICATE, + NL80211_CMD_DISASSOCIATE, + + NL80211_CMD_MICHAEL_MIC_FAILURE, + + NL80211_CMD_REG_BEACON_HINT, + + NL80211_CMD_JOIN_IBSS, + NL80211_CMD_LEAVE_IBSS, + + NL80211_CMD_TESTMODE, + + NL80211_CMD_CONNECT, + NL80211_CMD_ROAM, + NL80211_CMD_DISCONNECT, + + NL80211_CMD_SET_WIPHY_NETNS, + + NL80211_CMD_GET_SURVEY, + NL80211_CMD_NEW_SURVEY_RESULTS, + + NL80211_CMD_SET_PMKSA, + NL80211_CMD_DEL_PMKSA, + NL80211_CMD_FLUSH_PMKSA, + + NL80211_CMD_REMAIN_ON_CHANNEL, + NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + + NL80211_CMD_SET_TX_BITRATE_MASK, + + NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_REGISTER_ACTION = NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_FRAME, + NL80211_CMD_ACTION = NL80211_CMD_FRAME, + NL80211_CMD_FRAME_TX_STATUS, + NL80211_CMD_ACTION_TX_STATUS = NL80211_CMD_FRAME_TX_STATUS, + + NL80211_CMD_SET_POWER_SAVE, + NL80211_CMD_GET_POWER_SAVE, + + NL80211_CMD_SET_CQM, + NL80211_CMD_NOTIFY_CQM, + + NL80211_CMD_SET_CHANNEL, + NL80211_CMD_SET_WDS_PEER, + + NL80211_CMD_FRAME_WAIT_CANCEL, + + NL80211_CMD_JOIN_MESH, + NL80211_CMD_LEAVE_MESH, + + NL80211_CMD_UNPROT_DEAUTHENTICATE, + NL80211_CMD_UNPROT_DISASSOCIATE, + + NL80211_CMD_NEW_PEER_CANDIDATE, + + NL80211_CMD_GET_WOWLAN, + NL80211_CMD_SET_WOWLAN, + + NL80211_CMD_START_SCHED_SCAN, + NL80211_CMD_STOP_SCHED_SCAN, + NL80211_CMD_SCHED_SCAN_RESULTS, + NL80211_CMD_SCHED_SCAN_STOPPED, + + NL80211_CMD_SET_REKEY_OFFLOAD, + + NL80211_CMD_PMKSA_CANDIDATE, + + NL80211_CMD_TDLS_OPER, + NL80211_CMD_TDLS_MGMT, + + NL80211_CMD_UNEXPECTED_FRAME, + + NL80211_CMD_PROBE_CLIENT, + + NL80211_CMD_REGISTER_BEACONS, + + NL80211_CMD_UNEXPECTED_4ADDR_FRAME, + + NL80211_CMD_SET_NOACK_MAP, + + NL80211_CMD_CH_SWITCH_NOTIFY, + + NL80211_CMD_START_P2P_DEVICE, + NL80211_CMD_STOP_P2P_DEVICE, + + NL80211_CMD_CONN_FAILED, + + NL80211_CMD_SET_MCAST_RATE, + + NL80211_CMD_SET_MAC_ACL, + + NL80211_CMD_RADAR_DETECT, + + NL80211_CMD_GET_PROTOCOL_FEATURES, + + NL80211_CMD_UPDATE_FT_IES, + NL80211_CMD_FT_EVENT, + + NL80211_CMD_CRIT_PROTOCOL_START, + NL80211_CMD_CRIT_PROTOCOL_STOP, + + NL80211_CMD_GET_COALESCE, + NL80211_CMD_SET_COALESCE, + + NL80211_CMD_CHANNEL_SWITCH, + + NL80211_CMD_VENDOR, + + NL80211_CMD_SET_QOS_MAP, + + NL80211_CMD_ADD_TX_TS, + NL80211_CMD_DEL_TX_TS, + + NL80211_CMD_GET_MPP, + + NL80211_CMD_JOIN_OCB, + NL80211_CMD_LEAVE_OCB, + + NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, + + NL80211_CMD_TDLS_CHANNEL_SWITCH, + NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, + + NL80211_CMD_WIPHY_REG_CHANGE, + + NL80211_CMD_ABORT_SCAN, + + NL80211_CMD_START_NAN, + NL80211_CMD_STOP_NAN, + NL80211_CMD_ADD_NAN_FUNCTION, + NL80211_CMD_DEL_NAN_FUNCTION, + NL80211_CMD_CHANGE_NAN_CONFIG, + NL80211_CMD_NAN_MATCH, + + NL80211_CMD_SET_MULTICAST_TO_UNICAST, + + NL80211_CMD_UPDATE_CONNECT_PARAMS, + + NL80211_CMD_SET_PMK, + NL80211_CMD_DEL_PMK, + + NL80211_CMD_PORT_AUTHORIZED, + + NL80211_CMD_RELOAD_REGDB, + + NL80211_CMD_EXTERNAL_AUTH, + + NL80211_CMD_STA_OPMODE_CHANGED, + + NL80211_CMD_CONTROL_PORT_FRAME, + + NL80211_CMD_GET_FTM_RESPONDER_STATS, + + NL80211_CMD_PEER_MEASUREMENT_START, + NL80211_CMD_PEER_MEASUREMENT_RESULT, + NL80211_CMD_PEER_MEASUREMENT_COMPLETE, + + NL80211_CMD_NOTIFY_RADAR, + + NL80211_CMD_UPDATE_OWE_INFO, + + NL80211_CMD_PROBE_MESH_LINK, + + /* add new commands above here */ + + /* used to define NL80211_CMD_MAX below */ + __NL80211_CMD_AFTER_LAST, + NL80211_CMD_MAX = __NL80211_CMD_AFTER_LAST - 1 +}; + +/* + * Allow user space programs to use #ifdef on new commands by defining them + * here + */ +#define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS +#define NL80211_CMD_SET_MGMT_EXTRA_IE NL80211_CMD_SET_MGMT_EXTRA_IE +#define NL80211_CMD_REG_CHANGE NL80211_CMD_REG_CHANGE +#define NL80211_CMD_AUTHENTICATE NL80211_CMD_AUTHENTICATE +#define NL80211_CMD_ASSOCIATE NL80211_CMD_ASSOCIATE +#define NL80211_CMD_DEAUTHENTICATE NL80211_CMD_DEAUTHENTICATE +#define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE +#define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT + +#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS + +/* source-level API compatibility */ +#define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG +#define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG +#define NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE NL80211_MESH_SETUP_IE + +/** + * enum nl80211_attrs - nl80211 netlink attributes + * + * @NL80211_ATTR_UNSPEC: unspecified attribute to catch errors + * + * @NL80211_ATTR_WIPHY: index of wiphy to operate on, cf. + * /sys/class/ieee80211/<phyname>/index + * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) + * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters + * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz, + * defines the channel together with the (deprecated) + * %NL80211_ATTR_WIPHY_CHANNEL_TYPE attribute or the attributes + * %NL80211_ATTR_CHANNEL_WIDTH and if needed %NL80211_ATTR_CENTER_FREQ1 + * and %NL80211_ATTR_CENTER_FREQ2 + * @NL80211_ATTR_CHANNEL_WIDTH: u32 attribute containing one of the values + * of &enum nl80211_chan_width, describing the channel width. See the + * documentation of the enum for more information. + * @NL80211_ATTR_CENTER_FREQ1: Center frequency of the first part of the + * channel, used for anything but 20 MHz bandwidth + * @NL80211_ATTR_CENTER_FREQ2: Center frequency of the second part of the + * channel, used only for 80+80 MHz bandwidth + * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ + * if HT20 or HT40 are to be used (i.e., HT disabled if not included): + * NL80211_CHAN_NO_HT = HT not allowed (i.e., same as not including + * this attribute) + * NL80211_CHAN_HT20 = HT20 only + * NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel + * NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel + * This attribute is now deprecated. + * @NL80211_ATTR_WIPHY_RETRY_SHORT: TX retry limit for frames whose length is + * less than or equal to the RTS threshold; allowed range: 1..255; + * dot11ShortRetryLimit; u8 + * @NL80211_ATTR_WIPHY_RETRY_LONG: TX retry limit for frames whose length is + * greater than the RTS threshold; allowed range: 1..255; + * dot11ShortLongLimit; u8 + * @NL80211_ATTR_WIPHY_FRAG_THRESHOLD: fragmentation threshold, i.e., maximum + * length in octets for frames; allowed range: 256..8000, disable + * fragmentation with (u32)-1; dot11FragmentationThreshold; u32 + * @NL80211_ATTR_WIPHY_RTS_THRESHOLD: RTS threshold (TX frames with length + * larger than or equal to this use RTS/CTS handshake); allowed range: + * 0..65536, disable with (u32)-1; dot11RTSThreshold; u32 + * @NL80211_ATTR_WIPHY_COVERAGE_CLASS: Coverage Class as defined by IEEE 802.11 + * section 7.3.2.9; dot11CoverageClass; u8 + * + * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on + * @NL80211_ATTR_IFNAME: network interface name + * @NL80211_ATTR_IFTYPE: type of virtual interface, see &enum nl80211_iftype + * + * @NL80211_ATTR_WDEV: wireless device identifier, used for pseudo-devices + * that don't have a netdev (u64) + * + * @NL80211_ATTR_MAC: MAC address (various uses) + * + * @NL80211_ATTR_KEY_DATA: (temporal) key data; for TKIP this consists of + * 16 bytes encryption key followed by 8 bytes each for TX and RX MIC + * keys + * @NL80211_ATTR_KEY_IDX: key ID (u8, 0-3) + * @NL80211_ATTR_KEY_CIPHER: key cipher suite (u32, as defined by IEEE 802.11 + * section 7.3.2.25.1, e.g. 0x000FAC04) + * @NL80211_ATTR_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and + * CCMP keys, each six bytes in little endian + * @NL80211_ATTR_KEY_DEFAULT: Flag attribute indicating the key is default key + * @NL80211_ATTR_KEY_DEFAULT_MGMT: Flag attribute indicating the key is the + * default management key + * @NL80211_ATTR_CIPHER_SUITES_PAIRWISE: For crypto settings for connect or + * other commands, indicates which pairwise cipher suites are used + * @NL80211_ATTR_CIPHER_SUITE_GROUP: For crypto settings for connect or + * other commands, indicates which group cipher suite is used + * + * @NL80211_ATTR_BEACON_INTERVAL: beacon interval in TU + * @NL80211_ATTR_DTIM_PERIOD: DTIM period for beaconing + * @NL80211_ATTR_BEACON_HEAD: portion of the beacon before the TIM IE + * @NL80211_ATTR_BEACON_TAIL: portion of the beacon after the TIM IE + * + * @NL80211_ATTR_STA_AID: Association ID for the station (u16) + * @NL80211_ATTR_STA_FLAGS: flags, nested element with NLA_FLAG attributes of + * &enum nl80211_sta_flags (deprecated, use %NL80211_ATTR_STA_FLAGS2) + * @NL80211_ATTR_STA_LISTEN_INTERVAL: listen interval as defined by + * IEEE 802.11 7.3.1.6 (u16). + * @NL80211_ATTR_STA_SUPPORTED_RATES: supported rates, array of supported + * rates as defined by IEEE 802.11 7.3.2.2 but without the length + * restriction (at most %NL80211_MAX_SUPP_RATES). + * @NL80211_ATTR_STA_VLAN: interface index of VLAN interface to move station + * to, or the AP interface the station was originally added to. + * @NL80211_ATTR_STA_INFO: information about a station, part of station info + * given for %NL80211_CMD_GET_STATION, nested attribute containing + * info as possible, see &enum nl80211_sta_info. + * + * @NL80211_ATTR_WIPHY_BANDS: Information about an operating bands, + * consisting of a nested array. + * + * @NL80211_ATTR_MESH_ID: mesh id (1-32 bytes). + * @NL80211_ATTR_STA_PLINK_ACTION: action to perform on the mesh peer link + * (see &enum nl80211_plink_action). + * @NL80211_ATTR_MPATH_NEXT_HOP: MAC address of the next hop for a mesh path. + * @NL80211_ATTR_MPATH_INFO: information about a mesh_path, part of mesh path + * info given for %NL80211_CMD_GET_MPATH, nested attribute described at + * &enum nl80211_mpath_info. + * + * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of + * &enum nl80211_mntr_flags. + * + * @NL80211_ATTR_REG_ALPHA2: an ISO-3166-alpha2 country code for which the + * current regulatory domain should be set to or is already set to. + * For example, 'CR', for Costa Rica. This attribute is used by the kernel + * to query the CRDA to retrieve one regulatory domain. This attribute can + * also be used by userspace to query the kernel for the currently set + * regulatory domain. We chose an alpha2 as that is also used by the + * IEEE-802.11 country information element to identify a country. + * Users can also simply ask the wireless core to set regulatory domain + * to a specific alpha2. + * @NL80211_ATTR_REG_RULES: a nested array of regulatory domain regulatory + * rules. + * + * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled + * (u8, 0 or 1) + * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled + * (u8, 0 or 1) + * @NL80211_ATTR_BSS_BASIC_RATES: basic rates, array of basic + * rates in format defined by IEEE 802.11 7.3.2.2 but without the length + * restriction (at most %NL80211_MAX_SUPP_RATES). + * + * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * + * @NL80211_ATTR_SUPPORTED_IFTYPES: nested attribute containing all + * supported interface types, each a flag attribute with the number + * of the interface mode. + * + * @NL80211_ATTR_MGMT_SUBTYPE: Management frame subtype for + * %NL80211_CMD_SET_MGMT_EXTRA_IE. + * + * @NL80211_ATTR_IE: Information element(s) data (used, e.g., with + * %NL80211_CMD_SET_MGMT_EXTRA_IE). + * + * @NL80211_ATTR_MAX_NUM_SCAN_SSIDS: number of SSIDs you can scan with + * a single scan request, a wiphy attribute. + * @NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS: number of SSIDs you can + * scan with a single scheduled scan request, a wiphy attribute. + * @NL80211_ATTR_MAX_SCAN_IE_LEN: maximum length of information elements + * that can be added to a scan request + * @NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN: maximum length of information + * elements that can be added to a scheduled scan request + * @NL80211_ATTR_MAX_MATCH_SETS: maximum number of sets that can be + * used with @NL80211_ATTR_SCHED_SCAN_MATCH, a wiphy attribute. + * + * @NL80211_ATTR_SCAN_FREQUENCIES: nested attribute with frequencies (in MHz) + * @NL80211_ATTR_SCAN_SSIDS: nested attribute with SSIDs, leave out for passive + * scanning and include a zero-length SSID (wildcard) for wildcard scan + * @NL80211_ATTR_BSS: scan result BSS + * + * @NL80211_ATTR_REG_INITIATOR: indicates who requested the regulatory domain + * currently in effect. This could be any of the %NL80211_REGDOM_SET_BY_* + * @NL80211_ATTR_REG_TYPE: indicates the type of the regulatory domain currently + * set. This can be one of the nl80211_reg_type (%NL80211_REGDOM_TYPE_*) + * + * @NL80211_ATTR_SUPPORTED_COMMANDS: wiphy attribute that specifies + * an array of command numbers (i.e. a mapping index to command number) + * that the driver for the given wiphy supports. + * + * @NL80211_ATTR_FRAME: frame data (binary attribute), including frame header + * and body, but not FCS; used, e.g., with NL80211_CMD_AUTHENTICATE and + * NL80211_CMD_ASSOCIATE events + * @NL80211_ATTR_SSID: SSID (binary attribute, 0..32 octets) + * @NL80211_ATTR_AUTH_TYPE: AuthenticationType, see &enum nl80211_auth_type, + * represented as a u32 + * @NL80211_ATTR_REASON_CODE: ReasonCode for %NL80211_CMD_DEAUTHENTICATE and + * %NL80211_CMD_DISASSOCIATE, u16 + * + * @NL80211_ATTR_KEY_TYPE: Key Type, see &enum nl80211_key_type, represented as + * a u32 + * + * @NL80211_ATTR_FREQ_BEFORE: A channel which has suffered a regulatory change + * due to considerations from a beacon hint. This attribute reflects + * the state of the channel _before_ the beacon hint processing. This + * attributes consists of a nested attribute containing + * NL80211_FREQUENCY_ATTR_* + * @NL80211_ATTR_FREQ_AFTER: A channel which has suffered a regulatory change + * due to considerations from a beacon hint. This attribute reflects + * the state of the channel _after_ the beacon hint processing. This + * attributes consists of a nested attribute containing + * NL80211_FREQUENCY_ATTR_* + * + * @NL80211_ATTR_CIPHER_SUITES: a set of u32 values indicating the supported + * cipher suites + * + * @NL80211_ATTR_FREQ_FIXED: a flag indicating the IBSS should not try to look + * for other networks on different channels + * + * @NL80211_ATTR_TIMED_OUT: a flag indicating than an operation timed out; this + * is used, e.g., with %NL80211_CMD_AUTHENTICATE event + * + * @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is + * used for the association (&enum nl80211_mfp, represented as a u32); + * this attribute can be used with %NL80211_CMD_ASSOCIATE and + * %NL80211_CMD_CONNECT requests. %NL80211_MFP_OPTIONAL is not allowed for + * %NL80211_CMD_ASSOCIATE since user space SME is expected and hence, it + * must have decided whether to use management frame protection or not. + * Setting %NL80211_MFP_OPTIONAL with a %NL80211_CMD_CONNECT request will + * let the driver (or the firmware) decide whether to use MFP or not. + * + * @NL80211_ATTR_STA_FLAGS2: Attribute containing a + * &struct nl80211_sta_flag_update. + * + * @NL80211_ATTR_CONTROL_PORT: A flag indicating whether user space controls + * IEEE 802.1X port, i.e., sets/clears %NL80211_STA_FLAG_AUTHORIZED, in + * station mode. If the flag is included in %NL80211_CMD_ASSOCIATE + * request, the driver will assume that the port is unauthorized until + * authorized by user space. Otherwise, port is marked authorized by + * default in station mode. + * @NL80211_ATTR_CONTROL_PORT_ETHERTYPE: A 16-bit value indicating the + * ethertype that will be used for key negotiation. It can be + * specified with the associate and connect commands. If it is not + * specified, the value defaults to 0x888E (PAE, 802.1X). This + * attribute is also used as a flag in the wiphy information to + * indicate that protocols other than PAE are supported. + * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom + * ethertype frames used for key negotiation must not be encrypted. + * @NL80211_ATTR_CONTROL_PORT_OVER_NL80211: A flag indicating whether control + * port frames (e.g. of type given in %NL80211_ATTR_CONTROL_PORT_ETHERTYPE) + * will be sent directly to the network interface or sent via the NL80211 + * socket. If this attribute is missing, then legacy behavior of sending + * control port frames directly to the network interface is used. If the + * flag is included, then control port frames are sent over NL80211 instead + * using %CMD_CONTROL_PORT_FRAME. If control port routing over NL80211 is + * to be used then userspace must also use the %NL80211_ATTR_SOCKET_OWNER + * flag. + * + * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. + * We recommend using nested, driver-specific attributes within this. + * + * @NL80211_ATTR_DISCONNECTED_BY_AP: A flag indicating that the DISCONNECT + * event was due to the AP disconnecting the station, and not due to + * a local disconnect request. + * @NL80211_ATTR_STATUS_CODE: StatusCode for the %NL80211_CMD_CONNECT + * event (u16) + * @NL80211_ATTR_PRIVACY: Flag attribute, used with connect(), indicating + * that protected APs should be used. This is also used with NEW_BEACON to + * indicate that the BSS is to use protection. + * + * @NL80211_ATTR_CIPHERS_PAIRWISE: Used with CONNECT, ASSOCIATE, and NEW_BEACON + * to indicate which unicast key ciphers will be used with the connection + * (an array of u32). + * @NL80211_ATTR_CIPHER_GROUP: Used with CONNECT, ASSOCIATE, and NEW_BEACON to + * indicate which group key cipher will be used with the connection (a + * u32). + * @NL80211_ATTR_WPA_VERSIONS: Used with CONNECT, ASSOCIATE, and NEW_BEACON to + * indicate which WPA version(s) the AP we want to associate with is using + * (a u32 with flags from &enum nl80211_wpa_versions). + * @NL80211_ATTR_AKM_SUITES: Used with CONNECT, ASSOCIATE, and NEW_BEACON to + * indicate which key management algorithm(s) to use (an array of u32). + * This attribute is also sent in response to @NL80211_CMD_GET_WIPHY, + * indicating the supported AKM suites, intended for specific drivers which + * implement SME and have constraints on which AKMs are supported and also + * the cases where an AKM support is offloaded to the driver/firmware. + * If there is no such notification from the driver, user space should + * assume the driver supports all the AKM suites. + * + * @NL80211_ATTR_REQ_IE: (Re)association request information elements as + * sent out by the card, for ROAM and successful CONNECT events. + * @NL80211_ATTR_RESP_IE: (Re)association response information elements as + * sent by peer, for ROAM and successful CONNECT events. + * + * @NL80211_ATTR_PREV_BSSID: previous BSSID, to be used in ASSOCIATE and CONNECT + * commands to specify a request to reassociate within an ESS, i.e., to use + * Reassociate Request frame (with the value of this attribute in the + * Current AP address field) instead of Association Request frame which is + * used for the initial association to an ESS. + * + * @NL80211_ATTR_KEY: key information in a nested attribute with + * %NL80211_KEY_* sub-attributes + * @NL80211_ATTR_KEYS: array of keys for static WEP keys for connect() + * and join_ibss(), key information is in a nested attribute each + * with %NL80211_KEY_* sub-attributes + * + * @NL80211_ATTR_PID: Process ID of a network namespace. + * + * @NL80211_ATTR_GENERATION: Used to indicate consistent snapshots for + * dumps. This number increases whenever the object list being + * dumped changes, and as such userspace can verify that it has + * obtained a complete and consistent snapshot by verifying that + * all dump messages contain the same generation number. If it + * changed then the list changed and the dump should be repeated + * completely from scratch. + * + * @NL80211_ATTR_4ADDR: Use 4-address frames on a virtual interface + * + * @NL80211_ATTR_SURVEY_INFO: survey information about a channel, part of + * the survey response for %NL80211_CMD_GET_SURVEY, nested attribute + * containing info as possible, see &enum survey_info. + * + * @NL80211_ATTR_PMKID: PMK material for PMKSA caching. + * @NL80211_ATTR_MAX_NUM_PMKIDS: maximum number of PMKIDs a firmware can + * cache, a wiphy attribute. + * + * @NL80211_ATTR_DURATION: Duration of an operation in milliseconds, u32. + * @NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION: Device attribute that + * specifies the maximum duration that can be requested with the + * remain-on-channel operation, in milliseconds, u32. + * + * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects. + * + * @NL80211_ATTR_TX_RATES: Nested set of attributes + * (enum nl80211_tx_rate_attributes) describing TX rates per band. The + * enum nl80211_band value is used as the index (nla_type() of the nested + * data. If a band is not included, it will be configured to allow all + * rates based on negotiated supported rates information. This attribute + * is used with %NL80211_CMD_SET_TX_BITRATE_MASK and with starting AP, + * and joining mesh networks (not IBSS yet). In the later case, it must + * specify just a single bitrate, which is to be used for the beacon. + * The driver must also specify support for this with the extended + * features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, + * NL80211_EXT_FEATURE_BEACON_RATE_HT and + * NL80211_EXT_FEATURE_BEACON_RATE_VHT. + * + * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain + * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. + * @NL80211_ATTR_FRAME_TYPE: A u16 indicating the frame type/subtype for the + * @NL80211_CMD_REGISTER_FRAME command. + * @NL80211_ATTR_TX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be transmitted with + * %NL80211_CMD_FRAME. + * @NL80211_ATTR_RX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be registered for RX. + * + * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was + * acknowledged by the recipient. + * + * @NL80211_ATTR_PS_STATE: powersave state, using &enum nl80211_ps_state values. + * + * @NL80211_ATTR_CQM: connection quality monitor configuration in a + * nested attribute with %NL80211_ATTR_CQM_* sub-attributes. + * + * @NL80211_ATTR_LOCAL_STATE_CHANGE: Flag attribute to indicate that a command + * is requesting a local authentication/association state change without + * invoking actual management frame exchange. This can be used with + * NL80211_CMD_AUTHENTICATE, NL80211_CMD_DEAUTHENTICATE, + * NL80211_CMD_DISASSOCIATE. + * + * @NL80211_ATTR_AP_ISOLATE: (AP mode) Do not forward traffic between stations + * connected to this BSS. + * + * @NL80211_ATTR_WIPHY_TX_POWER_SETTING: Transmit power setting type. See + * &enum nl80211_tx_power_setting for possible values. + * @NL80211_ATTR_WIPHY_TX_POWER_LEVEL: Transmit power level in signed mBm units. + * This is used in association with @NL80211_ATTR_WIPHY_TX_POWER_SETTING + * for non-automatic settings. + * + * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly + * means support for per-station GTKs. + * + * @NL80211_ATTR_WIPHY_ANTENNA_TX: Bitmap of allowed antennas for transmitting. + * This can be used to mask out antennas which are not attached or should + * not be used for transmitting. If an antenna is not selected in this + * bitmap the hardware is not allowed to transmit on this antenna. + * + * Each bit represents one antenna, starting with antenna 1 at the first + * bit. Depending on which antennas are selected in the bitmap, 802.11n + * drivers can derive which chainmasks to use (if all antennas belonging to + * a particular chain are disabled this chain should be disabled) and if + * a chain has diversity antennas wether diversity should be used or not. + * HT capabilities (STBC, TX Beamforming, Antenna selection) can be + * derived from the available chains after applying the antenna mask. + * Non-802.11n drivers can derive wether to use diversity or not. + * Drivers may reject configurations or RX/TX mask combinations they cannot + * support by returning -EINVAL. + * + * @NL80211_ATTR_WIPHY_ANTENNA_RX: Bitmap of allowed antennas for receiving. + * This can be used to mask out antennas which are not attached or should + * not be used for receiving. If an antenna is not selected in this bitmap + * the hardware should not be configured to receive on this antenna. + * For a more detailed description see @NL80211_ATTR_WIPHY_ANTENNA_TX. + * + * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX: Bitmap of antennas which are available + * for configuration as TX antennas via the above parameters. + * + * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX: Bitmap of antennas which are available + * for configuration as RX antennas via the above parameters. + * + * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS + * + * @NL80211_ATTR_OFFCHANNEL_TX_OK: For management frame TX, the frame may be + * transmitted on another channel when the channel given doesn't match + * the current channel. If the current channel doesn't match and this + * flag isn't set, the frame will be rejected. This is also used as an + * nl80211 capability flag. + * + * @NL80211_ATTR_BSS_HT_OPMODE: HT operation mode (u16) + * + * @NL80211_ATTR_KEY_DEFAULT_TYPES: A nested attribute containing flags + * attributes, specifying what a key should be set as default as. + * See &enum nl80211_key_default_types. + * + * @NL80211_ATTR_MESH_SETUP: Optional mesh setup parameters. These cannot be + * changed once the mesh is active. + * @NL80211_ATTR_MESH_CONFIG: Mesh configuration parameters, a nested attribute + * containing attributes from &enum nl80211_meshconf_params. + * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver + * allows auth frames in a mesh to be passed to userspace for processing via + * the @NL80211_MESH_SETUP_USERSPACE_AUTH flag. + * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as defined in + * &enum nl80211_plink_state. Used when userspace is driving the peer link + * management state machine. @NL80211_MESH_SETUP_USERSPACE_AMPE or + * @NL80211_MESH_SETUP_USERSPACE_MPM must be enabled. + * + * @NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED: indicates, as part of the wiphy + * capabilities, the supported WoWLAN triggers + * @NL80211_ATTR_WOWLAN_TRIGGERS: used by %NL80211_CMD_SET_WOWLAN to + * indicate which WoW triggers should be enabled. This is also + * used by %NL80211_CMD_GET_WOWLAN to get the currently enabled WoWLAN + * triggers. + * + * @NL80211_ATTR_SCHED_SCAN_INTERVAL: Interval between scheduled scan + * cycles, in msecs. + * + * @NL80211_ATTR_SCHED_SCAN_MATCH: Nested attribute with one or more + * sets of attributes to match during scheduled scans. Only BSSs + * that match any of the sets will be reported. These are + * pass-thru filter rules. + * For a match to succeed, the BSS must match all attributes of a + * set. Since not every hardware supports matching all types of + * attributes, there is no guarantee that the reported BSSs are + * fully complying with the match sets and userspace needs to be + * able to ignore them by itself. + * Thus, the implementation is somewhat hardware-dependent, but + * this is only an optimization and the userspace application + * needs to handle all the non-filtered results anyway. + * If the match attributes don't make sense when combined with + * the values passed in @NL80211_ATTR_SCAN_SSIDS (eg. if an SSID + * is included in the probe request, but the match attributes + * will never let it go through), -EINVAL may be returned. + * If omitted, no filtering is done. + * + * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported + * interface combinations. In each nested item, it contains attributes + * defined in &enum nl80211_if_combination_attrs. + * @NL80211_ATTR_SOFTWARE_IFTYPES: Nested attribute (just like + * %NL80211_ATTR_SUPPORTED_IFTYPES) containing the interface types that + * are managed in software: interfaces of these types aren't subject to + * any restrictions in their number or combinations. + * + * @NL80211_ATTR_REKEY_DATA: nested attribute containing the information + * necessary for GTK rekeying in the device, see &enum nl80211_rekey_data. + * + * @NL80211_ATTR_SCAN_SUPP_RATES: rates per to be advertised as supported in scan, + * nested array attribute containing an entry for each band, with the entry + * being a list of supported rates as defined by IEEE 802.11 7.3.2.2 but + * without the length restriction (at most %NL80211_MAX_SUPP_RATES). + * + * @NL80211_ATTR_HIDDEN_SSID: indicates whether SSID is to be hidden from Beacon + * and Probe Response (when response to wildcard Probe Request); see + * &enum nl80211_hidden_ssid, represented as a u32 + * + * @NL80211_ATTR_IE_PROBE_RESP: Information element(s) for Probe Response frame. + * This is used with %NL80211_CMD_NEW_BEACON and %NL80211_CMD_SET_BEACON to + * provide extra IEs (e.g., WPS/P2P IE) into Probe Response frames when the + * driver (or firmware) replies to Probe Request frames. + * @NL80211_ATTR_IE_ASSOC_RESP: Information element(s) for (Re)Association + * Response frames. This is used with %NL80211_CMD_NEW_BEACON and + * %NL80211_CMD_SET_BEACON to provide extra IEs (e.g., WPS/P2P IE) into + * (Re)Association Response frames when the driver (or firmware) replies to + * (Re)Association Request frames. + * + * @NL80211_ATTR_STA_WME: Nested attribute containing the wme configuration + * of the station, see &enum nl80211_sta_wme_attr. + * @NL80211_ATTR_SUPPORT_AP_UAPSD: the device supports uapsd when working + * as AP. + * + * @NL80211_ATTR_ROAM_SUPPORT: Indicates whether the firmware is capable of + * roaming to another AP in the same ESS if the signal lever is low. + * + * @NL80211_ATTR_PMKSA_CANDIDATE: Nested attribute containing the PMKSA caching + * candidate information, see &enum nl80211_pmksa_candidate_attr. + * + * @NL80211_ATTR_TX_NO_CCK_RATE: Indicates whether to use CCK rate or not + * for management frames transmission. In order to avoid p2p probe/action + * frames are being transmitted at CCK rate in 2GHz band, the user space + * applications use this attribute. + * This attribute is used with %NL80211_CMD_TRIGGER_SCAN and + * %NL80211_CMD_FRAME commands. + * + * @NL80211_ATTR_TDLS_ACTION: Low level TDLS action code (e.g. link setup + * request, link setup confirm, link teardown, etc.). Values are + * described in the TDLS (802.11z) specification. + * @NL80211_ATTR_TDLS_DIALOG_TOKEN: Non-zero token for uniquely identifying a + * TDLS conversation between two devices. + * @NL80211_ATTR_TDLS_OPERATION: High level TDLS operation; see + * &enum nl80211_tdls_operation, represented as a u8. + * @NL80211_ATTR_TDLS_SUPPORT: A flag indicating the device can operate + * as a TDLS peer sta. + * @NL80211_ATTR_TDLS_EXTERNAL_SETUP: The TDLS discovery/setup and teardown + * procedures should be performed by sending TDLS packets via + * %NL80211_CMD_TDLS_MGMT. Otherwise %NL80211_CMD_TDLS_OPER should be + * used for asking the driver to perform a TDLS operation. + * + * @NL80211_ATTR_DEVICE_AP_SME: This u32 attribute may be listed for devices + * that have AP support to indicate that they have the AP SME integrated + * with support for the features listed in this attribute, see + * &enum nl80211_ap_sme_features. + * + * @NL80211_ATTR_DONT_WAIT_FOR_ACK: Used with %NL80211_CMD_FRAME, this tells + * the driver to not wait for an acknowledgement. Note that due to this, + * it will also not give a status callback nor return a cookie. This is + * mostly useful for probe responses to save airtime. + * + * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from + * &enum nl80211_feature_flags and is advertised in wiphy information. + * @NL80211_ATTR_PROBE_RESP_OFFLOAD: Indicates that the HW responds to probe + * requests while operating in AP-mode. + * This attribute holds a bitmap of the supported protocols for + * offloading (see &enum nl80211_probe_resp_offload_support_attr). + * + * @NL80211_ATTR_PROBE_RESP: Probe Response template data. Contains the entire + * probe-response frame. The DA field in the 802.11 header is zero-ed out, + * to be filled by the FW. + * @NL80211_ATTR_DISABLE_HT: Force HT capable interfaces to disable + * this feature. Currently, only supported in mac80211 drivers. + * @NL80211_ATTR_HT_CAPABILITY_MASK: Specify which bits of the + * ATTR_HT_CAPABILITY to which attention should be paid. + * Currently, only mac80211 NICs support this feature. + * The values that may be configured are: + * MCS rates, MAX-AMSDU, HT-20-40 and HT_CAP_SGI_40 + * AMPDU density and AMPDU factor. + * All values are treated as suggestions and may be ignored + * by the driver as required. The actual values may be seen in + * the station debugfs ht_caps file. + * + * @NL80211_ATTR_DFS_REGION: region for regulatory rules which this country + * abides to when initiating radiation on DFS channels. A country maps + * to one DFS region. + * + * @NL80211_ATTR_NOACK_MAP: This u16 bitmap contains the No Ack Policy of + * up to 16 TIDs. + * + * @NL80211_ATTR_INACTIVITY_TIMEOUT: timeout value in seconds, this can be + * used by the drivers which has MLME in firmware and does not have support + * to report per station tx/rx activity to free up the station entry from + * the list. This needs to be used when the driver advertises the + * capability to timeout the stations. + * + * @NL80211_ATTR_RX_SIGNAL_DBM: signal strength in dBm (as a 32-bit int); + * this attribute is (depending on the driver capabilities) added to + * received frames indicated with %NL80211_CMD_FRAME. + * + * @NL80211_ATTR_BG_SCAN_PERIOD: Background scan period in seconds + * or 0 to disable background scan. + * + * @NL80211_ATTR_USER_REG_HINT_TYPE: type of regulatory hint passed from + * userspace. If unset it is assumed the hint comes directly from + * a user. If set code could specify exactly what type of source + * was used to provide the hint. For the different types of + * allowed user regulatory hints see nl80211_user_reg_hint_type. + * + * @NL80211_ATTR_CONN_FAILED_REASON: The reason for which AP has rejected + * the connection request from a station. nl80211_connect_failed_reason + * enum has different reasons of connection failure. + * + * @NL80211_ATTR_AUTH_DATA: Fields and elements in Authentication frames. + * This contains the authentication frame body (non-IE and IE data), + * excluding the Authentication algorithm number, i.e., starting at the + * Authentication transaction sequence number field. It is used with + * authentication algorithms that need special fields to be added into + * the frames (SAE and FILS). Currently, only the SAE cases use the + * initial two fields (Authentication transaction sequence number and + * Status code). However, those fields are included in the attribute data + * for all authentication algorithms to keep the attribute definition + * consistent. + * + * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * + * @NL80211_ATTR_SCAN_FLAGS: scan request control flags (u32) + * + * @NL80211_ATTR_P2P_CTWINDOW: P2P GO Client Traffic Window (u8), used with + * the START_AP and SET_BSS commands + * @NL80211_ATTR_P2P_OPPPS: P2P GO opportunistic PS (u8), used with the + * START_AP and SET_BSS commands. This can have the values 0 or 1; + * if not given in START_AP 0 is assumed, if not given in SET_BSS + * no change is made. + * + * @NL80211_ATTR_LOCAL_MESH_POWER_MODE: local mesh STA link-specific power mode + * defined in &enum nl80211_mesh_power_mode. + * + * @NL80211_ATTR_ACL_POLICY: ACL policy, see &enum nl80211_acl_policy, + * carried in a u32 attribute + * + * @NL80211_ATTR_MAC_ADDRS: Array of nested MAC addresses, used for + * MAC ACL. + * + * @NL80211_ATTR_MAC_ACL_MAX: u32 attribute to advertise the maximum + * number of MAC addresses that a device can support for MAC + * ACL. + * + * @NL80211_ATTR_RADAR_EVENT: Type of radar event for notification to userspace, + * contains a value of enum nl80211_radar_event (u32). + * + * @NL80211_ATTR_EXT_CAPA: 802.11 extended capabilities that the kernel driver + * has and handles. The format is the same as the IE contents. See + * 802.11-2012 8.4.2.29 for more information. + * @NL80211_ATTR_EXT_CAPA_MASK: Extended capabilities that the kernel driver + * has set in the %NL80211_ATTR_EXT_CAPA value, for multibit fields. + * + * @NL80211_ATTR_STA_CAPABILITY: Station capabilities (u16) are advertised to + * the driver, e.g., to enable TDLS power save (PU-APSD). + * + * @NL80211_ATTR_STA_EXT_CAPABILITY: Station extended capabilities are + * advertised to the driver, e.g., to enable TDLS off channel operations + * and PU-APSD. + * + * @NL80211_ATTR_PROTOCOL_FEATURES: global nl80211 feature flags, see + * &enum nl80211_protocol_features, the attribute is a u32. + * + * @NL80211_ATTR_SPLIT_WIPHY_DUMP: flag attribute, userspace supports + * receiving the data for a single wiphy split across multiple + * messages, given with wiphy dump message + * + * @NL80211_ATTR_MDID: Mobility Domain Identifier + * + * @NL80211_ATTR_IE_RIC: Resource Information Container Information + * Element + * + * @NL80211_ATTR_CRIT_PROT_ID: critical protocol identifier requiring increased + * reliability, see &enum nl80211_crit_proto_id (u16). + * @NL80211_ATTR_MAX_CRIT_PROT_DURATION: duration in milliseconds in which + * the connection should have increased reliability (u16). + * + * @NL80211_ATTR_PEER_AID: Association ID for the peer TDLS station (u16). + * This is similar to @NL80211_ATTR_STA_AID but with a difference of being + * allowed to be used with the first @NL80211_CMD_SET_STATION command to + * update a TDLS peer STA entry. + * + * @NL80211_ATTR_COALESCE_RULE: Coalesce rule information. + * + * @NL80211_ATTR_CH_SWITCH_COUNT: u32 attribute specifying the number of TBTT's + * until the channel switch event. + * @NL80211_ATTR_CH_SWITCH_BLOCK_TX: flag attribute specifying that transmission + * must be blocked on the current channel (before the channel switch + * operation). + * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information + * for the time while performing a channel switch. + * @NL80211_ATTR_CSA_C_OFF_BEACON: An array of offsets (u16) to the channel + * switch counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL). + * @NL80211_ATTR_CSA_C_OFF_PRESP: An array of offsets (u16) to the channel + * switch counters in the probe response (%NL80211_ATTR_PROBE_RESP). + * + * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32. + * As specified in the &enum nl80211_rxmgmt_flags. + * + * @NL80211_ATTR_STA_SUPPORTED_CHANNELS: array of supported channels. + * + * @NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES: array of supported + * supported operating classes. + * + * @NL80211_ATTR_HANDLE_DFS: A flag indicating whether user space + * controls DFS operation in IBSS mode. If the flag is included in + * %NL80211_CMD_JOIN_IBSS request, the driver will allow use of DFS + * channels and reports radar events to userspace. Userspace is required + * to react to radar events, e.g. initiate a channel switch or leave the + * IBSS network. + * + * @NL80211_ATTR_SUPPORT_5_MHZ: A flag indicating that the device supports + * 5 MHz channel bandwidth. + * @NL80211_ATTR_SUPPORT_10_MHZ: A flag indicating that the device supports + * 10 MHz channel bandwidth. + * + * @NL80211_ATTR_OPMODE_NOTIF: Operating mode field from Operating Mode + * Notification Element based on association request when used with + * %NL80211_CMD_NEW_STATION or %NL80211_CMD_SET_STATION (only when + * %NL80211_FEATURE_FULL_AP_CLIENT_STATE is supported, or with TDLS); + * u8 attribute. + * + * @NL80211_ATTR_VENDOR_ID: The vendor ID, either a 24-bit OUI or, if + * %NL80211_VENDOR_ID_IS_LINUX is set, a special Linux ID (not used yet) + * @NL80211_ATTR_VENDOR_SUBCMD: vendor sub-command + * @NL80211_ATTR_VENDOR_DATA: data for the vendor command, if any; this + * attribute is also used for vendor command feature advertisement + * @NL80211_ATTR_VENDOR_EVENTS: used for event list advertising in the wiphy + * info, containing a nested array of possible events + * + * @NL80211_ATTR_QOS_MAP: IP DSCP mapping for Interworking QoS mapping. This + * data is in the format defined for the payload of the QoS Map Set element + * in IEEE Std 802.11-2012, 8.4.2.97. + * + * @NL80211_ATTR_MAC_HINT: MAC address recommendation as initial BSS + * @NL80211_ATTR_WIPHY_FREQ_HINT: frequency of the recommended initial BSS + * + * @NL80211_ATTR_MAX_AP_ASSOC_STA: Device attribute that indicates how many + * associated stations are supported in AP mode (including P2P GO); u32. + * Since drivers may not have a fixed limit on the maximum number (e.g., + * other concurrent operations may affect this), drivers are allowed to + * advertise values that cannot always be met. In such cases, an attempt + * to add a new station entry with @NL80211_CMD_NEW_STATION may fail. + * + * @NL80211_ATTR_CSA_C_OFFSETS_TX: An array of csa counter offsets (u16) which + * should be updated when the frame is transmitted. + * @NL80211_ATTR_MAX_CSA_COUNTERS: U8 attribute used to advertise the maximum + * supported number of csa counters. + * + * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32. + * As specified in the &enum nl80211_tdls_peer_capability. + * + * @NL80211_ATTR_SOCKET_OWNER: Flag attribute, if set during interface + * creation then the new interface will be owned by the netlink socket + * that created it and will be destroyed when the socket is closed. + * If set during scheduled scan start then the new scan req will be + * owned by the netlink socket that created it and the scheduled scan will + * be stopped when the socket is closed. + * If set during configuration of regulatory indoor operation then the + * regulatory indoor configuration would be owned by the netlink socket + * that configured the indoor setting, and the indoor operation would be + * cleared when the socket is closed. + * If set during NAN interface creation, the interface will be destroyed + * if the socket is closed just like any other interface. Moreover, NAN + * notifications will be sent in unicast to that socket. Without this + * attribute, the notifications will be sent to the %NL80211_MCGRP_NAN + * multicast group. + * If set during %NL80211_CMD_ASSOCIATE or %NL80211_CMD_CONNECT the + * station will deauthenticate when the socket is closed. + * If set during %NL80211_CMD_JOIN_IBSS the IBSS will be automatically + * torn down when the socket is closed. + * If set during %NL80211_CMD_JOIN_MESH the mesh setup will be + * automatically torn down when the socket is closed. + * If set during %NL80211_CMD_START_AP the AP will be automatically + * disabled when the socket is closed. + * + * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is + * the TDLS link initiator. + * + * @NL80211_ATTR_USE_RRM: flag for indicating whether the current connection + * shall support Radio Resource Measurements (11k). This attribute can be + * used with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests. + * User space applications are expected to use this flag only if the + * underlying device supports these minimal RRM features: + * %NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES, + * %NL80211_FEATURE_QUIET, + * Or, if global RRM is supported, see: + * %NL80211_EXT_FEATURE_RRM + * If this flag is used, driver must add the Power Capabilities IE to the + * association request. In addition, it must also set the RRM capability + * flag in the association request's Capability Info field. + * + * @NL80211_ATTR_WIPHY_DYN_ACK: flag attribute used to enable ACK timeout + * estimation algorithm (dynack). In order to activate dynack + * %NL80211_FEATURE_ACKTO_ESTIMATION feature flag must be set by lower + * drivers to indicate dynack capability. Dynack is automatically disabled + * setting valid value for coverage class. + * + * @NL80211_ATTR_TSID: a TSID value (u8 attribute) + * @NL80211_ATTR_USER_PRIO: user priority value (u8 attribute) + * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds + * (per second) (u16 attribute) + * + * @NL80211_ATTR_SMPS_MODE: SMPS mode to use (ap mode). see + * &enum nl80211_smps_mode. + * + * @NL80211_ATTR_OPER_CLASS: operating class + * + * @NL80211_ATTR_MAC_MASK: MAC address mask + * + * @NL80211_ATTR_WIPHY_SELF_MANAGED_REG: flag attribute indicating this device + * is self-managing its regulatory information and any regulatory domain + * obtained from it is coming from the device's wiphy and not the global + * cfg80211 regdomain. + * + * @NL80211_ATTR_EXT_FEATURES: extended feature flags contained in a byte + * array. The feature flags are identified by their bit index (see &enum + * nl80211_ext_feature_index). The bit index is ordered starting at the + * least-significant bit of the first byte in the array, ie. bit index 0 + * is located at bit 0 of byte 0. bit index 25 would be located at bit 1 + * of byte 3 (u8 array). + * + * @NL80211_ATTR_SURVEY_RADIO_STATS: Request overall radio statistics to be + * returned along with other survey data. If set, @NL80211_CMD_GET_SURVEY + * may return a survey entry without a channel indicating global radio + * statistics (only some values are valid and make sense.) + * For devices that don't return such an entry even then, the information + * should be contained in the result as the sum of the respective counters + * over all channels. + * + * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before the first cycle of a + * scheduled scan is started. Or the delay before a WoWLAN + * net-detect scan is started, counting from the moment the + * system is suspended. This value is a u32, in seconds. + + * @NL80211_ATTR_REG_INDOOR: flag attribute, if set indicates that the device + * is operating in an indoor environment. + * + * @NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS: maximum number of scan plans for + * scheduled scan supported by the device (u32), a wiphy attribute. + * @NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL: maximum interval (in seconds) for + * a scan plan (u32), a wiphy attribute. + * @NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS: maximum number of iterations in + * a scan plan (u32), a wiphy attribute. + * @NL80211_ATTR_SCHED_SCAN_PLANS: a list of scan plans for scheduled scan. + * Each scan plan defines the number of scan iterations and the interval + * between scans. The last scan plan will always run infinitely, + * thus it must not specify the number of iterations, only the interval + * between scans. The scan plans are executed sequentially. + * Each scan plan is a nested attribute of &enum nl80211_sched_scan_plan. + * @NL80211_ATTR_PBSS: flag attribute. If set it means operate + * in a PBSS. Specified in %NL80211_CMD_CONNECT to request + * connecting to a PCP, and in %NL80211_CMD_START_AP to start + * a PCP instead of AP. Relevant for DMG networks only. + * @NL80211_ATTR_BSS_SELECT: nested attribute for driver supporting the + * BSS selection feature. When used with %NL80211_CMD_GET_WIPHY it contains + * attributes according &enum nl80211_bss_select_attr to indicate what + * BSS selection behaviours are supported. When used with %NL80211_CMD_CONNECT + * it contains the behaviour-specific attribute containing the parameters for + * BSS selection to be done by driver and/or firmware. + * + * @NL80211_ATTR_STA_SUPPORT_P2P_PS: whether P2P PS mechanism supported + * or not. u8, one of the values of &enum nl80211_sta_p2p_ps_status + * + * @NL80211_ATTR_PAD: attribute used for padding for 64-bit alignment + * + * @NL80211_ATTR_IFTYPE_EXT_CAPA: Nested attribute of the following attributes: + * %NL80211_ATTR_IFTYPE, %NL80211_ATTR_EXT_CAPA, + * %NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities per + * interface type. + * + * @NL80211_ATTR_MU_MIMO_GROUP_DATA: array of 24 bytes that defines a MU-MIMO + * groupID for monitor mode. + * The first 8 bytes are a mask that defines the membership in each + * group (there are 64 groups, group 0 and 63 are reserved), + * each bit represents a group and set to 1 for being a member in + * that group and 0 for not being a member. + * The remaining 16 bytes define the position in each group: 2 bits for + * each group. + * (smaller group numbers represented on most significant bits and bigger + * group numbers on least significant bits.) + * This attribute is used only if all interfaces are in monitor mode. + * Set this attribute in order to monitor packets using the given MU-MIMO + * groupID data. + * to turn off that feature set all the bits of the groupID to zero. + * @NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR: mac address for the sniffer to follow + * when using MU-MIMO air sniffer. + * to turn that feature off set an invalid mac address + * (e.g. FF:FF:FF:FF:FF:FF) + * + * @NL80211_ATTR_SCAN_START_TIME_TSF: The time at which the scan was actually + * started (u64). The time is the TSF of the BSS the interface that + * requested the scan is connected to (if available, otherwise this + * attribute must not be included). + * @NL80211_ATTR_SCAN_START_TIME_TSF_BSSID: The BSS according to which + * %NL80211_ATTR_SCAN_START_TIME_TSF is set. + * @NL80211_ATTR_MEASUREMENT_DURATION: measurement duration in TUs (u16). If + * %NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY is not set, this is the + * maximum measurement duration allowed. This attribute is used with + * measurement requests. It can also be used with %NL80211_CMD_TRIGGER_SCAN + * if the scan is used for beacon report radio measurement. + * @NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY: flag attribute that indicates + * that the duration specified with %NL80211_ATTR_MEASUREMENT_DURATION is + * mandatory. If this flag is not set, the duration is the maximum duration + * and the actual measurement duration may be shorter. + * + * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is + * used to pull the stored data for mesh peer in power save state. + * + * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by + * %NL80211_CMD_START_NAN and optionally with + * %NL80211_CMD_CHANGE_NAN_CONFIG. Its type is u8 and it can't be 0. + * Also, values 1 and 255 are reserved for certification purposes and + * should not be used during a normal device operation. + * @NL80211_ATTR_BANDS: operating bands configuration. This is a u32 + * bitmask of BIT(NL80211_BAND_*) as described in %enum + * nl80211_band. For instance, for NL80211_BAND_2GHZ, bit 0 + * would be set. This attribute is used with + * %NL80211_CMD_START_NAN and %NL80211_CMD_CHANGE_NAN_CONFIG, and + * it is optional. If no bands are set, it means don't-care and + * the device will decide what to use. + * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See + * &enum nl80211_nan_func_attributes for description of this nested + * attribute. + * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. + * See &enum nl80211_nan_match_attributes. + * @NL80211_ATTR_FILS_KEK: KEK for FILS (Re)Association Request/Response frame + * protection. + * @NL80211_ATTR_FILS_NONCES: Nonces (part of AAD) for FILS (Re)Association + * Request/Response frame protection. This attribute contains the 16 octet + * STA Nonce followed by 16 octets of AP Nonce. + * + * @NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED: Indicates whether or not multicast + * packets should be send out as unicast to all stations (flag attribute). + * + * @NL80211_ATTR_BSSID: The BSSID of the AP. Note that %NL80211_ATTR_MAC is also + * used in various commands/events for specifying the BSSID. + * + * @NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI: Relative RSSI threshold by which + * other BSSs has to be better or slightly worse than the current + * connected BSS so that they get reported to user space. + * This will give an opportunity to userspace to consider connecting to + * other matching BSSs which have better or slightly worse RSSI than + * the current connected BSS by using an offloaded operation to avoid + * unnecessary wakeups. + * + * @NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST: When present the RSSI level for BSSs in + * the specified band is to be adjusted before doing + * %NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI based comparison to figure out + * better BSSs. The attribute value is a packed structure + * value as specified by &struct nl80211_bss_select_rssi_adjust. + * + * @NL80211_ATTR_TIMEOUT_REASON: The reason for which an operation timed out. + * u32 attribute with an &enum nl80211_timeout_reason value. This is used, + * e.g., with %NL80211_CMD_CONNECT event. + * + * @NL80211_ATTR_FILS_ERP_USERNAME: EAP Re-authentication Protocol (ERP) + * username part of NAI used to refer keys rRK and rIK. This is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_REALM: EAP Re-authentication Protocol (ERP) realm part + * of NAI specifying the domain name of the ER server. This is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM: Unsigned 16-bit ERP next sequence number + * to use in ERP messages. This is used in generating the FILS wrapped data + * for FILS authentication and is used with %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_RRK: ERP re-authentication Root Key (rRK) for the + * NAI specified by %NL80211_ATTR_FILS_ERP_USERNAME and + * %NL80211_ATTR_FILS_ERP_REALM. This is used for generating rIK and rMSK + * from successful FILS authentication and is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_CACHE_ID: A 2-octet identifier advertized by a FILS AP + * identifying the scope of PMKSAs. This is used with + * @NL80211_CMD_SET_PMKSA and @NL80211_CMD_DEL_PMKSA. + * + * @NL80211_ATTR_PMK: attribute for passing PMK key material. Used with + * %NL80211_CMD_SET_PMKSA for the PMKSA identified by %NL80211_ATTR_PMKID. + * For %NL80211_CMD_CONNECT it is used to provide PSK for offloading 4-way + * handshake for WPA/WPA2-PSK networks. For 802.1X authentication it is + * used with %NL80211_CMD_SET_PMK. For offloaded FT support this attribute + * specifies the PMK-R0 if NL80211_ATTR_PMKR0_NAME is included as well. + * + * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to + * indicate that it supports multiple active scheduled scan requests. + * @NL80211_ATTR_SCHED_SCAN_MAX_REQS: indicates maximum number of scheduled + * scan request that may be active for the device (u32). + * + * @NL80211_ATTR_WANT_1X_4WAY_HS: flag attribute which user-space can include + * in %NL80211_CMD_CONNECT to indicate that for 802.1X authentication it + * wants to use the supported offload of the 4-way handshake. + * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT. + * @NL80211_ATTR_PORT_AUTHORIZED: (reserved) + * + * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external + * authentication operation (u32 attribute with an + * &enum nl80211_external_auth_action value). This is used with the + * %NL80211_CMD_EXTERNAL_AUTH request event. + * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user + * space supports external authentication. This attribute shall be used + * with %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP request. The driver + * may offload authentication processing to user space if this capability + * is indicated in the respective requests from the user space. + * + * @NL80211_ATTR_NSS: Station's New/updated RX_NSS value notified using this + * u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED. + * + * @NL80211_ATTR_TXQ_STATS: TXQ statistics (nested attribute, see &enum + * nl80211_txq_stats) + * @NL80211_ATTR_TXQ_LIMIT: Total packet limit for the TXQ queues for this phy. + * The smaller of this and the memory limit is enforced. + * @NL80211_ATTR_TXQ_MEMORY_LIMIT: Total memory memory limit (in bytes) for the + * TXQ queues for this phy. The smaller of this and the packet limit is + * enforced. + * @NL80211_ATTR_TXQ_QUANTUM: TXQ scheduler quantum (bytes). Number of bytes + * a flow is assigned on each round of the DRR scheduler. + * @NL80211_ATTR_HE_CAPABILITY: HE Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION). Can be set + * only if %NL80211_STA_FLAG_WME is set. + * + * @NL80211_ATTR_FTM_RESPONDER: nested attribute which user-space can include + * in %NL80211_CMD_START_AP or %NL80211_CMD_SET_BEACON for fine timing + * measurement (FTM) responder functionality and containing parameters as + * possible, see &enum nl80211_ftm_responder_attr + * + * @NL80211_ATTR_FTM_RESPONDER_STATS: Nested attribute with FTM responder + * statistics, see &enum nl80211_ftm_responder_stats. + * + * @NL80211_ATTR_TIMEOUT: Timeout for the given operation in milliseconds (u32), + * if the attribute is not given no timeout is requested. Note that 0 is an + * invalid value. + * + * @NL80211_ATTR_PEER_MEASUREMENTS: peer measurements request (and result) + * data, uses nested attributes specified in + * &enum nl80211_peer_measurement_attrs. + * This is also used for capability advertisement in the wiphy information, + * with the appropriate sub-attributes. + * + * @NL80211_ATTR_AIRTIME_WEIGHT: Station's weight when scheduled by the airtime + * scheduler. + * + * @NL80211_ATTR_STA_TX_POWER_SETTING: Transmit power setting type (u8) for + * station associated with the AP. See &enum nl80211_tx_power_setting for + * possible values. + * @NL80211_ATTR_STA_TX_POWER: Transmit power level (s16) in dBm units. This + * allows to set Tx power for a station. If this attribute is not included, + * the default per-interface tx power setting will be overriding. Driver + * should be picking up the lowest tx power, either tx power per-interface + * or per-station. + * + * @NL80211_ATTR_SAE_PASSWORD: attribute for passing SAE password material. It + * is used with %NL80211_CMD_CONNECT to provide password for offloading + * SAE authentication for WPA3-Personal networks. + * + * @NL80211_ATTR_TWT_RESPONDER: Enable target wait time responder support. + * + * @NL80211_ATTR_HE_OBSS_PD: nested attribute for OBSS Packet Detection + * functionality. + * + * @NL80211_ATTR_WIPHY_EDMG_CHANNELS: bitmap that indicates the 2.16 GHz + * channel(s) that are allowed to be used for EDMG transmissions. + * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251. (u8 attribute) + * @NL80211_ATTR_WIPHY_EDMG_BW_CONFIG: Channel BW Configuration subfield encodes + * the allowed channel bandwidth configurations. (u8 attribute) + * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13. + * + * @NUM_NL80211_ATTR: total number of nl80211_attrs available + * @NL80211_ATTR_MAX: highest attribute number currently defined + * @__NL80211_ATTR_AFTER_LAST: internal use + */ +enum nl80211_attrs { +/* don't change the order or add anything between, this is ABI! */ + NL80211_ATTR_UNSPEC, + + NL80211_ATTR_WIPHY, + NL80211_ATTR_WIPHY_NAME, + + NL80211_ATTR_IFINDEX, + NL80211_ATTR_IFNAME, + NL80211_ATTR_IFTYPE, + + NL80211_ATTR_MAC, + + NL80211_ATTR_KEY_DATA, + NL80211_ATTR_KEY_IDX, + NL80211_ATTR_KEY_CIPHER, + NL80211_ATTR_KEY_SEQ, + NL80211_ATTR_KEY_DEFAULT, + + NL80211_ATTR_BEACON_INTERVAL, + NL80211_ATTR_DTIM_PERIOD, + NL80211_ATTR_BEACON_HEAD, + NL80211_ATTR_BEACON_TAIL, + + NL80211_ATTR_STA_AID, + NL80211_ATTR_STA_FLAGS, + NL80211_ATTR_STA_LISTEN_INTERVAL, + NL80211_ATTR_STA_SUPPORTED_RATES, + NL80211_ATTR_STA_VLAN, + NL80211_ATTR_STA_INFO, + + NL80211_ATTR_WIPHY_BANDS, + + NL80211_ATTR_MNTR_FLAGS, + + NL80211_ATTR_MESH_ID, + NL80211_ATTR_STA_PLINK_ACTION, + NL80211_ATTR_MPATH_NEXT_HOP, + NL80211_ATTR_MPATH_INFO, + + NL80211_ATTR_BSS_CTS_PROT, + NL80211_ATTR_BSS_SHORT_PREAMBLE, + NL80211_ATTR_BSS_SHORT_SLOT_TIME, + + NL80211_ATTR_HT_CAPABILITY, + + NL80211_ATTR_SUPPORTED_IFTYPES, + + NL80211_ATTR_REG_ALPHA2, + NL80211_ATTR_REG_RULES, + + NL80211_ATTR_MESH_CONFIG, + + NL80211_ATTR_BSS_BASIC_RATES, + + NL80211_ATTR_WIPHY_TXQ_PARAMS, + NL80211_ATTR_WIPHY_FREQ, + NL80211_ATTR_WIPHY_CHANNEL_TYPE, + + NL80211_ATTR_KEY_DEFAULT_MGMT, + + NL80211_ATTR_MGMT_SUBTYPE, + NL80211_ATTR_IE, + + NL80211_ATTR_MAX_NUM_SCAN_SSIDS, + + NL80211_ATTR_SCAN_FREQUENCIES, + NL80211_ATTR_SCAN_SSIDS, + NL80211_ATTR_GENERATION, /* replaces old SCAN_GENERATION */ + NL80211_ATTR_BSS, + + NL80211_ATTR_REG_INITIATOR, + NL80211_ATTR_REG_TYPE, + + NL80211_ATTR_SUPPORTED_COMMANDS, + + NL80211_ATTR_FRAME, + NL80211_ATTR_SSID, + NL80211_ATTR_AUTH_TYPE, + NL80211_ATTR_REASON_CODE, + + NL80211_ATTR_KEY_TYPE, + + NL80211_ATTR_MAX_SCAN_IE_LEN, + NL80211_ATTR_CIPHER_SUITES, + + NL80211_ATTR_FREQ_BEFORE, + NL80211_ATTR_FREQ_AFTER, + + NL80211_ATTR_FREQ_FIXED, + + + NL80211_ATTR_WIPHY_RETRY_SHORT, + NL80211_ATTR_WIPHY_RETRY_LONG, + NL80211_ATTR_WIPHY_FRAG_THRESHOLD, + NL80211_ATTR_WIPHY_RTS_THRESHOLD, + + NL80211_ATTR_TIMED_OUT, + + NL80211_ATTR_USE_MFP, + + NL80211_ATTR_STA_FLAGS2, + + NL80211_ATTR_CONTROL_PORT, + + NL80211_ATTR_TESTDATA, + + NL80211_ATTR_PRIVACY, + + NL80211_ATTR_DISCONNECTED_BY_AP, + NL80211_ATTR_STATUS_CODE, + + NL80211_ATTR_CIPHER_SUITES_PAIRWISE, + NL80211_ATTR_CIPHER_SUITE_GROUP, + NL80211_ATTR_WPA_VERSIONS, + NL80211_ATTR_AKM_SUITES, + + NL80211_ATTR_REQ_IE, + NL80211_ATTR_RESP_IE, + + NL80211_ATTR_PREV_BSSID, + + NL80211_ATTR_KEY, + NL80211_ATTR_KEYS, + + NL80211_ATTR_PID, + + NL80211_ATTR_4ADDR, + + NL80211_ATTR_SURVEY_INFO, + + NL80211_ATTR_PMKID, + NL80211_ATTR_MAX_NUM_PMKIDS, + + NL80211_ATTR_DURATION, + + NL80211_ATTR_COOKIE, + + NL80211_ATTR_WIPHY_COVERAGE_CLASS, + + NL80211_ATTR_TX_RATES, + + NL80211_ATTR_FRAME_MATCH, + + NL80211_ATTR_ACK, + + NL80211_ATTR_PS_STATE, + + NL80211_ATTR_CQM, + + NL80211_ATTR_LOCAL_STATE_CHANGE, + + NL80211_ATTR_AP_ISOLATE, + + NL80211_ATTR_WIPHY_TX_POWER_SETTING, + NL80211_ATTR_WIPHY_TX_POWER_LEVEL, + + NL80211_ATTR_TX_FRAME_TYPES, + NL80211_ATTR_RX_FRAME_TYPES, + NL80211_ATTR_FRAME_TYPE, + + NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + + NL80211_ATTR_SUPPORT_IBSS_RSN, + + NL80211_ATTR_WIPHY_ANTENNA_TX, + NL80211_ATTR_WIPHY_ANTENNA_RX, + + NL80211_ATTR_MCAST_RATE, + + NL80211_ATTR_OFFCHANNEL_TX_OK, + + NL80211_ATTR_BSS_HT_OPMODE, + + NL80211_ATTR_KEY_DEFAULT_TYPES, + + NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, + + NL80211_ATTR_MESH_SETUP, + + NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, + NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, + + NL80211_ATTR_SUPPORT_MESH_AUTH, + NL80211_ATTR_STA_PLINK_STATE, + + NL80211_ATTR_WOWLAN_TRIGGERS, + NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, + + NL80211_ATTR_SCHED_SCAN_INTERVAL, + + NL80211_ATTR_INTERFACE_COMBINATIONS, + NL80211_ATTR_SOFTWARE_IFTYPES, + + NL80211_ATTR_REKEY_DATA, + + NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, + NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, + + NL80211_ATTR_SCAN_SUPP_RATES, + + NL80211_ATTR_HIDDEN_SSID, + + NL80211_ATTR_IE_PROBE_RESP, + NL80211_ATTR_IE_ASSOC_RESP, + + NL80211_ATTR_STA_WME, + NL80211_ATTR_SUPPORT_AP_UAPSD, + + NL80211_ATTR_ROAM_SUPPORT, + + NL80211_ATTR_SCHED_SCAN_MATCH, + NL80211_ATTR_MAX_MATCH_SETS, + + NL80211_ATTR_PMKSA_CANDIDATE, + + NL80211_ATTR_TX_NO_CCK_RATE, + + NL80211_ATTR_TDLS_ACTION, + NL80211_ATTR_TDLS_DIALOG_TOKEN, + NL80211_ATTR_TDLS_OPERATION, + NL80211_ATTR_TDLS_SUPPORT, + NL80211_ATTR_TDLS_EXTERNAL_SETUP, + + NL80211_ATTR_DEVICE_AP_SME, + + NL80211_ATTR_DONT_WAIT_FOR_ACK, + + NL80211_ATTR_FEATURE_FLAGS, + + NL80211_ATTR_PROBE_RESP_OFFLOAD, + + NL80211_ATTR_PROBE_RESP, + + NL80211_ATTR_DFS_REGION, + + NL80211_ATTR_DISABLE_HT, + NL80211_ATTR_HT_CAPABILITY_MASK, + + NL80211_ATTR_NOACK_MAP, + + NL80211_ATTR_INACTIVITY_TIMEOUT, + + NL80211_ATTR_RX_SIGNAL_DBM, + + NL80211_ATTR_BG_SCAN_PERIOD, + + NL80211_ATTR_WDEV, + + NL80211_ATTR_USER_REG_HINT_TYPE, + + NL80211_ATTR_CONN_FAILED_REASON, + + NL80211_ATTR_AUTH_DATA, + + NL80211_ATTR_VHT_CAPABILITY, + + NL80211_ATTR_SCAN_FLAGS, + + NL80211_ATTR_CHANNEL_WIDTH, + NL80211_ATTR_CENTER_FREQ1, + NL80211_ATTR_CENTER_FREQ2, + + NL80211_ATTR_P2P_CTWINDOW, + NL80211_ATTR_P2P_OPPPS, + + NL80211_ATTR_LOCAL_MESH_POWER_MODE, + + NL80211_ATTR_ACL_POLICY, + + NL80211_ATTR_MAC_ADDRS, + + NL80211_ATTR_MAC_ACL_MAX, + + NL80211_ATTR_RADAR_EVENT, + + NL80211_ATTR_EXT_CAPA, + NL80211_ATTR_EXT_CAPA_MASK, + + NL80211_ATTR_STA_CAPABILITY, + NL80211_ATTR_STA_EXT_CAPABILITY, + + NL80211_ATTR_PROTOCOL_FEATURES, + NL80211_ATTR_SPLIT_WIPHY_DUMP, + + NL80211_ATTR_DISABLE_VHT, + NL80211_ATTR_VHT_CAPABILITY_MASK, + + NL80211_ATTR_MDID, + NL80211_ATTR_IE_RIC, + + NL80211_ATTR_CRIT_PROT_ID, + NL80211_ATTR_MAX_CRIT_PROT_DURATION, + + NL80211_ATTR_PEER_AID, + + NL80211_ATTR_COALESCE_RULE, + + NL80211_ATTR_CH_SWITCH_COUNT, + NL80211_ATTR_CH_SWITCH_BLOCK_TX, + NL80211_ATTR_CSA_IES, + NL80211_ATTR_CSA_C_OFF_BEACON, + NL80211_ATTR_CSA_C_OFF_PRESP, + + NL80211_ATTR_RXMGMT_FLAGS, + + NL80211_ATTR_STA_SUPPORTED_CHANNELS, + + NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES, + + NL80211_ATTR_HANDLE_DFS, + + NL80211_ATTR_SUPPORT_5_MHZ, + NL80211_ATTR_SUPPORT_10_MHZ, + + NL80211_ATTR_OPMODE_NOTIF, + + NL80211_ATTR_VENDOR_ID, + NL80211_ATTR_VENDOR_SUBCMD, + NL80211_ATTR_VENDOR_DATA, + NL80211_ATTR_VENDOR_EVENTS, + + NL80211_ATTR_QOS_MAP, + + NL80211_ATTR_MAC_HINT, + NL80211_ATTR_WIPHY_FREQ_HINT, + + NL80211_ATTR_MAX_AP_ASSOC_STA, + + NL80211_ATTR_TDLS_PEER_CAPABILITY, + + NL80211_ATTR_SOCKET_OWNER, + + NL80211_ATTR_CSA_C_OFFSETS_TX, + NL80211_ATTR_MAX_CSA_COUNTERS, + + NL80211_ATTR_TDLS_INITIATOR, + + NL80211_ATTR_USE_RRM, + + NL80211_ATTR_WIPHY_DYN_ACK, + + NL80211_ATTR_TSID, + NL80211_ATTR_USER_PRIO, + NL80211_ATTR_ADMITTED_TIME, + + NL80211_ATTR_SMPS_MODE, + + NL80211_ATTR_OPER_CLASS, + + NL80211_ATTR_MAC_MASK, + + NL80211_ATTR_WIPHY_SELF_MANAGED_REG, + + NL80211_ATTR_EXT_FEATURES, + + NL80211_ATTR_SURVEY_RADIO_STATS, + + NL80211_ATTR_NETNS_FD, + + NL80211_ATTR_SCHED_SCAN_DELAY, + + NL80211_ATTR_REG_INDOOR, + + NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS, + NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL, + NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS, + NL80211_ATTR_SCHED_SCAN_PLANS, + + NL80211_ATTR_PBSS, + + NL80211_ATTR_BSS_SELECT, + + NL80211_ATTR_STA_SUPPORT_P2P_PS, + + NL80211_ATTR_PAD, + + NL80211_ATTR_IFTYPE_EXT_CAPA, + + NL80211_ATTR_MU_MIMO_GROUP_DATA, + NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR, + + NL80211_ATTR_SCAN_START_TIME_TSF, + NL80211_ATTR_SCAN_START_TIME_TSF_BSSID, + NL80211_ATTR_MEASUREMENT_DURATION, + NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY, + + NL80211_ATTR_MESH_PEER_AID, + + NL80211_ATTR_NAN_MASTER_PREF, + NL80211_ATTR_BANDS, + NL80211_ATTR_NAN_FUNC, + NL80211_ATTR_NAN_MATCH, + + NL80211_ATTR_FILS_KEK, + NL80211_ATTR_FILS_NONCES, + + NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED, + + NL80211_ATTR_BSSID, + + NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI, + NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST, + + NL80211_ATTR_TIMEOUT_REASON, + + NL80211_ATTR_FILS_ERP_USERNAME, + NL80211_ATTR_FILS_ERP_REALM, + NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM, + NL80211_ATTR_FILS_ERP_RRK, + NL80211_ATTR_FILS_CACHE_ID, + + NL80211_ATTR_PMK, + + NL80211_ATTR_SCHED_SCAN_MULTI, + NL80211_ATTR_SCHED_SCAN_MAX_REQS, + + NL80211_ATTR_WANT_1X_4WAY_HS, + NL80211_ATTR_PMKR0_NAME, + NL80211_ATTR_PORT_AUTHORIZED, + + NL80211_ATTR_EXTERNAL_AUTH_ACTION, + NL80211_ATTR_EXTERNAL_AUTH_SUPPORT, + + NL80211_ATTR_NSS, + NL80211_ATTR_ACK_SIGNAL, + + NL80211_ATTR_CONTROL_PORT_OVER_NL80211, + + NL80211_ATTR_TXQ_STATS, + NL80211_ATTR_TXQ_LIMIT, + NL80211_ATTR_TXQ_MEMORY_LIMIT, + NL80211_ATTR_TXQ_QUANTUM, + + NL80211_ATTR_HE_CAPABILITY, + + NL80211_ATTR_FTM_RESPONDER, + + NL80211_ATTR_FTM_RESPONDER_STATS, + + NL80211_ATTR_TIMEOUT, + + NL80211_ATTR_PEER_MEASUREMENTS, + + NL80211_ATTR_AIRTIME_WEIGHT, + NL80211_ATTR_STA_TX_POWER_SETTING, + NL80211_ATTR_STA_TX_POWER, + + NL80211_ATTR_SAE_PASSWORD, + + NL80211_ATTR_TWT_RESPONDER, + + NL80211_ATTR_HE_OBSS_PD, + + NL80211_ATTR_WIPHY_EDMG_CHANNELS, + NL80211_ATTR_WIPHY_EDMG_BW_CONFIG, + + /* add attributes here, update the policy in nl80211.c */ + + __NL80211_ATTR_AFTER_LAST, + NUM_NL80211_ATTR = __NL80211_ATTR_AFTER_LAST, + NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1 +}; + +/* source-level API compatibility */ +#define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION +#define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG +#define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER +#define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA + +/* + * Allow user space programs to use #ifdef on new attributes by defining them + * here + */ +#define NL80211_CMD_CONNECT NL80211_CMD_CONNECT +#define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY +#define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES +#define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS +#define NL80211_ATTR_WIPHY_FREQ NL80211_ATTR_WIPHY_FREQ +#define NL80211_ATTR_WIPHY_CHANNEL_TYPE NL80211_ATTR_WIPHY_CHANNEL_TYPE +#define NL80211_ATTR_MGMT_SUBTYPE NL80211_ATTR_MGMT_SUBTYPE +#define NL80211_ATTR_IE NL80211_ATTR_IE +#define NL80211_ATTR_REG_INITIATOR NL80211_ATTR_REG_INITIATOR +#define NL80211_ATTR_REG_TYPE NL80211_ATTR_REG_TYPE +#define NL80211_ATTR_FRAME NL80211_ATTR_FRAME +#define NL80211_ATTR_SSID NL80211_ATTR_SSID +#define NL80211_ATTR_AUTH_TYPE NL80211_ATTR_AUTH_TYPE +#define NL80211_ATTR_REASON_CODE NL80211_ATTR_REASON_CODE +#define NL80211_ATTR_CIPHER_SUITES_PAIRWISE NL80211_ATTR_CIPHER_SUITES_PAIRWISE +#define NL80211_ATTR_CIPHER_SUITE_GROUP NL80211_ATTR_CIPHER_SUITE_GROUP +#define NL80211_ATTR_WPA_VERSIONS NL80211_ATTR_WPA_VERSIONS +#define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES +#define NL80211_ATTR_KEY NL80211_ATTR_KEY +#define NL80211_ATTR_KEYS NL80211_ATTR_KEYS +#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS + +#define NL80211_WIPHY_NAME_MAXLEN 64 + +#define NL80211_MAX_SUPP_RATES 32 +#define NL80211_MAX_SUPP_HT_RATES 77 +#define NL80211_MAX_SUPP_REG_RULES 128 +#define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0 +#define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 +#define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 +#define NL80211_HT_CAPABILITY_LEN 26 +#define NL80211_VHT_CAPABILITY_LEN 12 +#define NL80211_HE_MIN_CAPABILITY_LEN 16 +#define NL80211_HE_MAX_CAPABILITY_LEN 54 +#define NL80211_MAX_NR_CIPHER_SUITES 5 +#define NL80211_MAX_NR_AKM_SUITES 2 + +#define NL80211_MIN_REMAIN_ON_CHANNEL_TIME 10 + +/* default RSSI threshold for scan results if none specified. */ +#define NL80211_SCAN_RSSI_THOLD_OFF -300 + +#define NL80211_CQM_TXE_MAX_INTVL 1800 + +/** + * enum nl80211_iftype - (virtual) interface types + * + * @NL80211_IFTYPE_UNSPECIFIED: unspecified type, driver decides + * @NL80211_IFTYPE_ADHOC: independent BSS member + * @NL80211_IFTYPE_STATION: managed BSS member + * @NL80211_IFTYPE_AP: access point + * @NL80211_IFTYPE_AP_VLAN: VLAN interface for access points; VLAN interfaces + * are a bit special in that they must always be tied to a pre-existing + * AP type interface. + * @NL80211_IFTYPE_WDS: wireless distribution interface + * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames + * @NL80211_IFTYPE_MESH_POINT: mesh point + * @NL80211_IFTYPE_P2P_CLIENT: P2P client + * @NL80211_IFTYPE_P2P_GO: P2P group owner + * @NL80211_IFTYPE_P2P_DEVICE: P2P device interface type, this is not a netdev + * and therefore can't be created in the normal ways, use the + * %NL80211_CMD_START_P2P_DEVICE and %NL80211_CMD_STOP_P2P_DEVICE + * commands to create and destroy one + * @NL80211_IF_TYPE_OCB: Outside Context of a BSS + * This mode corresponds to the MIB variable dot11OCBActivated=true + * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev) + * @NL80211_IFTYPE_MAX: highest interface type number currently defined + * @NUM_NL80211_IFTYPES: number of defined interface types + * + * These values are used with the %NL80211_ATTR_IFTYPE + * to set the type of an interface. + * + */ +enum nl80211_iftype { + NL80211_IFTYPE_UNSPECIFIED, + NL80211_IFTYPE_ADHOC, + NL80211_IFTYPE_STATION, + NL80211_IFTYPE_AP, + NL80211_IFTYPE_AP_VLAN, + NL80211_IFTYPE_WDS, + NL80211_IFTYPE_MONITOR, + NL80211_IFTYPE_MESH_POINT, + NL80211_IFTYPE_P2P_CLIENT, + NL80211_IFTYPE_P2P_GO, + NL80211_IFTYPE_P2P_DEVICE, + NL80211_IFTYPE_OCB, + NL80211_IFTYPE_NAN, + + /* keep last */ + NUM_NL80211_IFTYPES, + NL80211_IFTYPE_MAX = NUM_NL80211_IFTYPES - 1 +}; + +/** + * enum nl80211_sta_flags - station flags + * + * Station flags. When a station is added to an AP interface, it is + * assumed to be already associated (and hence authenticated.) + * + * @__NL80211_STA_FLAG_INVALID: attribute number 0 is reserved + * @NL80211_STA_FLAG_AUTHORIZED: station is authorized (802.1X) + * @NL80211_STA_FLAG_SHORT_PREAMBLE: station is capable of receiving frames + * with short barker preamble + * @NL80211_STA_FLAG_WME: station is WME/QoS capable + * @NL80211_STA_FLAG_MFP: station uses management frame protection + * @NL80211_STA_FLAG_AUTHENTICATED: station is authenticated + * @NL80211_STA_FLAG_TDLS_PEER: station is a TDLS peer -- this flag should + * only be used in managed mode (even in the flags mask). Note that the + * flag can't be changed, it is only valid while adding a station, and + * attempts to change it will silently be ignored (rather than rejected + * as errors.) + * @NL80211_STA_FLAG_ASSOCIATED: station is associated; used with drivers + * that support %NL80211_FEATURE_FULL_AP_CLIENT_STATE to transition a + * previously added station into associated state + * @NL80211_STA_FLAG_MAX: highest station flag number currently defined + * @__NL80211_STA_FLAG_AFTER_LAST: internal use + */ +enum nl80211_sta_flags { + __NL80211_STA_FLAG_INVALID, + NL80211_STA_FLAG_AUTHORIZED, + NL80211_STA_FLAG_SHORT_PREAMBLE, + NL80211_STA_FLAG_WME, + NL80211_STA_FLAG_MFP, + NL80211_STA_FLAG_AUTHENTICATED, + NL80211_STA_FLAG_TDLS_PEER, + NL80211_STA_FLAG_ASSOCIATED, + + /* keep last */ + __NL80211_STA_FLAG_AFTER_LAST, + NL80211_STA_FLAG_MAX = __NL80211_STA_FLAG_AFTER_LAST - 1 +}; + +/** + * enum nl80211_sta_p2p_ps_status - station support of P2P PS + * + * @NL80211_P2P_PS_UNSUPPORTED: station doesn't support P2P PS mechanism + * @@NL80211_P2P_PS_SUPPORTED: station supports P2P PS mechanism + * @NUM_NL80211_P2P_PS_STATUS: number of values + */ +enum nl80211_sta_p2p_ps_status { + NL80211_P2P_PS_UNSUPPORTED = 0, + NL80211_P2P_PS_SUPPORTED, + + NUM_NL80211_P2P_PS_STATUS, +}; + +#define NL80211_STA_FLAG_MAX_OLD_API NL80211_STA_FLAG_TDLS_PEER + +/** + * struct nl80211_sta_flag_update - station flags mask/set + * @mask: mask of station flags to set + * @set: which values to set them to + * + * Both mask and set contain bits as per &enum nl80211_sta_flags. + */ +struct nl80211_sta_flag_update { + __u32 mask; + __u32 set; +} __attribute__((packed)); + +/** + * enum nl80211_he_gi - HE guard interval + * @NL80211_RATE_INFO_HE_GI_0_8: 0.8 usec + * @NL80211_RATE_INFO_HE_GI_1_6: 1.6 usec + * @NL80211_RATE_INFO_HE_GI_3_2: 3.2 usec + */ +enum nl80211_he_gi { + NL80211_RATE_INFO_HE_GI_0_8, + NL80211_RATE_INFO_HE_GI_1_6, + NL80211_RATE_INFO_HE_GI_3_2, +}; + +/** + * enum nl80211_he_ru_alloc - HE RU allocation values + * @NL80211_RATE_INFO_HE_RU_ALLOC_26: 26-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_52: 52-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_106: 106-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_242: 242-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_484: 484-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_996: 996-tone RU allocation + * @NL80211_RATE_INFO_HE_RU_ALLOC_2x996: 2x996-tone RU allocation + */ +enum nl80211_he_ru_alloc { + NL80211_RATE_INFO_HE_RU_ALLOC_26, + NL80211_RATE_INFO_HE_RU_ALLOC_52, + NL80211_RATE_INFO_HE_RU_ALLOC_106, + NL80211_RATE_INFO_HE_RU_ALLOC_242, + NL80211_RATE_INFO_HE_RU_ALLOC_484, + NL80211_RATE_INFO_HE_RU_ALLOC_996, + NL80211_RATE_INFO_HE_RU_ALLOC_2x996, +}; + +/** + * enum nl80211_rate_info - bitrate information + * + * These attribute types are used with %NL80211_STA_INFO_TXRATE + * when getting information about the bitrate of a station. + * There are 2 attributes for bitrate, a legacy one that represents + * a 16-bit value, and new one that represents a 32-bit value. + * If the rate value fits into 16 bit, both attributes are reported + * with the same value. If the rate is too high to fit into 16 bits + * (>6.5535Gbps) only 32-bit attribute is included. + * User space tools encouraged to use the 32-bit attribute and fall + * back to the 16-bit one for compatibility with older kernels. + * + * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved + * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s) + * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8) + * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 MHz dualchannel bitrate + * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval + * @NL80211_RATE_INFO_BITRATE32: total bitrate (u32, 100kbit/s) + * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined + * @NL80211_RATE_INFO_VHT_MCS: MCS index for VHT (u8) + * @NL80211_RATE_INFO_VHT_NSS: number of streams in VHT (u8) + * @NL80211_RATE_INFO_80_MHZ_WIDTH: 80 MHz VHT rate + * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: unused - 80+80 is treated the + * same as 160 for purposes of the bitrates + * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate + * @NL80211_RATE_INFO_10_MHZ_WIDTH: 10 MHz width - note that this is + * a legacy rate and will be reported as the actual bitrate, i.e. + * half the base (20 MHz) rate + * @NL80211_RATE_INFO_5_MHZ_WIDTH: 5 MHz width - note that this is + * a legacy rate and will be reported as the actual bitrate, i.e. + * a quarter of the base (20 MHz) rate + * @NL80211_RATE_INFO_HE_MCS: HE MCS index (u8, 0-11) + * @NL80211_RATE_INFO_HE_NSS: HE NSS value (u8, 1-8) + * @NL80211_RATE_INFO_HE_GI: HE guard interval identifier + * (u8, see &enum nl80211_he_gi) + * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1) + * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then + * non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc) + * @__NL80211_RATE_INFO_AFTER_LAST: internal use + */ +enum nl80211_rate_info { + __NL80211_RATE_INFO_INVALID, + NL80211_RATE_INFO_BITRATE, + NL80211_RATE_INFO_MCS, + NL80211_RATE_INFO_40_MHZ_WIDTH, + NL80211_RATE_INFO_SHORT_GI, + NL80211_RATE_INFO_BITRATE32, + NL80211_RATE_INFO_VHT_MCS, + NL80211_RATE_INFO_VHT_NSS, + NL80211_RATE_INFO_80_MHZ_WIDTH, + NL80211_RATE_INFO_80P80_MHZ_WIDTH, + NL80211_RATE_INFO_160_MHZ_WIDTH, + NL80211_RATE_INFO_10_MHZ_WIDTH, + NL80211_RATE_INFO_5_MHZ_WIDTH, + NL80211_RATE_INFO_HE_MCS, + NL80211_RATE_INFO_HE_NSS, + NL80211_RATE_INFO_HE_GI, + NL80211_RATE_INFO_HE_DCM, + NL80211_RATE_INFO_HE_RU_ALLOC, + + /* keep last */ + __NL80211_RATE_INFO_AFTER_LAST, + NL80211_RATE_INFO_MAX = __NL80211_RATE_INFO_AFTER_LAST - 1 +}; + +/** + * enum nl80211_sta_bss_param - BSS information collected by STA + * + * These attribute types are used with %NL80211_STA_INFO_BSS_PARAM + * when getting information about the bitrate of a station. + * + * @__NL80211_STA_BSS_PARAM_INVALID: attribute number 0 is reserved + * @NL80211_STA_BSS_PARAM_CTS_PROT: whether CTS protection is enabled (flag) + * @NL80211_STA_BSS_PARAM_SHORT_PREAMBLE: whether short preamble is enabled + * (flag) + * @NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME: whether short slot time is enabled + * (flag) + * @NL80211_STA_BSS_PARAM_DTIM_PERIOD: DTIM period for beaconing (u8) + * @NL80211_STA_BSS_PARAM_BEACON_INTERVAL: Beacon interval (u16) + * @NL80211_STA_BSS_PARAM_MAX: highest sta_bss_param number currently defined + * @__NL80211_STA_BSS_PARAM_AFTER_LAST: internal use + */ +enum nl80211_sta_bss_param { + __NL80211_STA_BSS_PARAM_INVALID, + NL80211_STA_BSS_PARAM_CTS_PROT, + NL80211_STA_BSS_PARAM_SHORT_PREAMBLE, + NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME, + NL80211_STA_BSS_PARAM_DTIM_PERIOD, + NL80211_STA_BSS_PARAM_BEACON_INTERVAL, + + /* keep last */ + __NL80211_STA_BSS_PARAM_AFTER_LAST, + NL80211_STA_BSS_PARAM_MAX = __NL80211_STA_BSS_PARAM_AFTER_LAST - 1 +}; + +/** + * enum nl80211_sta_info - station information + * + * These attribute types are used with %NL80211_ATTR_STA_INFO + * when getting information about a station. + * + * @__NL80211_STA_INFO_INVALID: attribute number 0 is reserved + * @NL80211_STA_INFO_INACTIVE_TIME: time since last activity (u32, msecs) + * @NL80211_STA_INFO_RX_BYTES: total received bytes (MPDU length) + * (u32, from this station) + * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (MPDU length) + * (u32, to this station) + * @NL80211_STA_INFO_RX_BYTES64: total received bytes (MPDU length) + * (u64, from this station) + * @NL80211_STA_INFO_TX_BYTES64: total transmitted bytes (MPDU length) + * (u64, to this station) + * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) + * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute + * containing info as possible, see &enum nl80211_rate_info + * @NL80211_STA_INFO_RX_PACKETS: total received packet (MSDUs and MMPDUs) + * (u32, from this station) + * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (MSDUs and MMPDUs) + * (u32, to this station) + * @NL80211_STA_INFO_TX_RETRIES: total retries (MPDUs) (u32, to this station) + * @NL80211_STA_INFO_TX_FAILED: total failed packets (MPDUs) + * (u32, to this station) + * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm) + * @NL80211_STA_INFO_LLID: the station's mesh LLID + * @NL80211_STA_INFO_PLID: the station's mesh PLID + * @NL80211_STA_INFO_PLINK_STATE: peer link state for the station + * (see %enum nl80211_plink_state) + * @NL80211_STA_INFO_RX_BITRATE: last unicast data frame rx rate, nested + * attribute, like NL80211_STA_INFO_TX_BITRATE. + * @NL80211_STA_INFO_BSS_PARAM: current station's view of BSS, nested attribute + * containing info as possible, see &enum nl80211_sta_bss_param + * @NL80211_STA_INFO_CONNECTED_TIME: time since the station is last connected + * @NL80211_STA_INFO_STA_FLAGS: Contains a struct nl80211_sta_flag_update. + * @NL80211_STA_INFO_BEACON_LOSS: count of times beacon loss was detected (u32) + * @NL80211_STA_INFO_T_OFFSET: timing offset with respect to this STA (s64) + * @NL80211_STA_INFO_LOCAL_PM: local mesh STA link-specific power mode + * @NL80211_STA_INFO_PEER_PM: peer mesh STA link-specific power mode + * @NL80211_STA_INFO_NONPEER_PM: neighbor mesh STA power save mode towards + * non-peer STA + * @NL80211_STA_INFO_CHAIN_SIGNAL: per-chain signal strength of last PPDU + * Contains a nested array of signal strength attributes (u8, dBm) + * @NL80211_STA_INFO_CHAIN_SIGNAL_AVG: per-chain signal strength average + * Same format as NL80211_STA_INFO_CHAIN_SIGNAL. + * @NL80211_STA_EXPECTED_THROUGHPUT: expected throughput considering also the + * 802.11 header (u32, kbps) + * @NL80211_STA_INFO_RX_DROP_MISC: RX packets dropped for unspecified reasons + * (u64) + * @NL80211_STA_INFO_BEACON_RX: number of beacons received from this peer (u64) + * @NL80211_STA_INFO_BEACON_SIGNAL_AVG: signal strength average + * for beacons only (u8, dBm) + * @NL80211_STA_INFO_TID_STATS: per-TID statistics (see &enum nl80211_tid_stats) + * This is a nested attribute where each the inner attribute number is the + * TID+1 and the special TID 16 (i.e. value 17) is used for non-QoS frames; + * each one of those is again nested with &enum nl80211_tid_stats + * attributes carrying the actual values. + * @NL80211_STA_INFO_RX_DURATION: aggregate PPDU duration for all frames + * received from the station (u64, usec) + * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment + * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm) + * @NL80211_STA_INFO_ACK_SIGNAL_AVG: avg signal strength of ACK frames (s8, dBm) + * @NL80211_STA_INFO_RX_MPDUS: total number of received packets (MPDUs) + * (u32, from this station) + * @NL80211_STA_INFO_FCS_ERROR_COUNT: total number of packets (MPDUs) received + * with an FCS error (u32, from this station). This count may not include + * some packets with an FCS error due to TA corruption. Hence this counter + * might not be fully accurate. + * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a + * mesh gate (u8, 0 or 1) + * @NL80211_STA_INFO_TX_DURATION: aggregate PPDU duration for all frames + * sent to the station (u64, usec) + * @NL80211_STA_INFO_AIRTIME_WEIGHT: current airtime weight for station (u16) + * @NL80211_STA_INFO_AIRTIME_LINK_METRIC: airtime link metric for mesh station + * @NL80211_STA_INFO_ASSOC_AT_BOOTTIME: Timestamp (CLOCK_BOOTTIME, nanoseconds) + * of STA's association + * @__NL80211_STA_INFO_AFTER_LAST: internal + * @NL80211_STA_INFO_MAX: highest possible station info attribute + */ +enum nl80211_sta_info { + __NL80211_STA_INFO_INVALID, + NL80211_STA_INFO_INACTIVE_TIME, + NL80211_STA_INFO_RX_BYTES, + NL80211_STA_INFO_TX_BYTES, + NL80211_STA_INFO_LLID, + NL80211_STA_INFO_PLID, + NL80211_STA_INFO_PLINK_STATE, + NL80211_STA_INFO_SIGNAL, + NL80211_STA_INFO_TX_BITRATE, + NL80211_STA_INFO_RX_PACKETS, + NL80211_STA_INFO_TX_PACKETS, + NL80211_STA_INFO_TX_RETRIES, + NL80211_STA_INFO_TX_FAILED, + NL80211_STA_INFO_SIGNAL_AVG, + NL80211_STA_INFO_RX_BITRATE, + NL80211_STA_INFO_BSS_PARAM, + NL80211_STA_INFO_CONNECTED_TIME, + NL80211_STA_INFO_STA_FLAGS, + NL80211_STA_INFO_BEACON_LOSS, + NL80211_STA_INFO_T_OFFSET, + NL80211_STA_INFO_LOCAL_PM, + NL80211_STA_INFO_PEER_PM, + NL80211_STA_INFO_NONPEER_PM, + NL80211_STA_INFO_RX_BYTES64, + NL80211_STA_INFO_TX_BYTES64, + NL80211_STA_INFO_CHAIN_SIGNAL, + NL80211_STA_INFO_CHAIN_SIGNAL_AVG, + NL80211_STA_INFO_EXPECTED_THROUGHPUT, + NL80211_STA_INFO_RX_DROP_MISC, + NL80211_STA_INFO_BEACON_RX, + NL80211_STA_INFO_BEACON_SIGNAL_AVG, + NL80211_STA_INFO_TID_STATS, + NL80211_STA_INFO_RX_DURATION, + NL80211_STA_INFO_PAD, + NL80211_STA_INFO_ACK_SIGNAL, + NL80211_STA_INFO_ACK_SIGNAL_AVG, + NL80211_STA_INFO_RX_MPDUS, + NL80211_STA_INFO_FCS_ERROR_COUNT, + NL80211_STA_INFO_CONNECTED_TO_GATE, + NL80211_STA_INFO_TX_DURATION, + NL80211_STA_INFO_AIRTIME_WEIGHT, + NL80211_STA_INFO_AIRTIME_LINK_METRIC, + NL80211_STA_INFO_ASSOC_AT_BOOTTIME, + + /* keep last */ + __NL80211_STA_INFO_AFTER_LAST, + NL80211_STA_INFO_MAX = __NL80211_STA_INFO_AFTER_LAST - 1 +}; + +/* we renamed this - stay compatible */ +#define NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG NL80211_STA_INFO_ACK_SIGNAL_AVG + + +/** + * enum nl80211_tid_stats - per TID statistics attributes + * @__NL80211_TID_STATS_INVALID: attribute number 0 is reserved + * @NL80211_TID_STATS_RX_MSDU: number of MSDUs received (u64) + * @NL80211_TID_STATS_TX_MSDU: number of MSDUs transmitted (or + * attempted to transmit; u64) + * @NL80211_TID_STATS_TX_MSDU_RETRIES: number of retries for + * transmitted MSDUs (not counting the first attempt; u64) + * @NL80211_TID_STATS_TX_MSDU_FAILED: number of failed transmitted + * MSDUs (u64) + * @NL80211_TID_STATS_PAD: attribute used for padding for 64-bit alignment + * @NL80211_TID_STATS_TXQ_STATS: TXQ stats (nested attribute) + * @NUM_NL80211_TID_STATS: number of attributes here + * @NL80211_TID_STATS_MAX: highest numbered attribute here + */ +enum nl80211_tid_stats { + __NL80211_TID_STATS_INVALID, + NL80211_TID_STATS_RX_MSDU, + NL80211_TID_STATS_TX_MSDU, + NL80211_TID_STATS_TX_MSDU_RETRIES, + NL80211_TID_STATS_TX_MSDU_FAILED, + NL80211_TID_STATS_PAD, + NL80211_TID_STATS_TXQ_STATS, + + /* keep last */ + NUM_NL80211_TID_STATS, + NL80211_TID_STATS_MAX = NUM_NL80211_TID_STATS - 1 +}; + +/** + * enum nl80211_txq_stats - per TXQ statistics attributes + * @__NL80211_TXQ_STATS_INVALID: attribute number 0 is reserved + * @NUM_NL80211_TXQ_STATS: number of attributes here + * @NL80211_TXQ_STATS_BACKLOG_BYTES: number of bytes currently backlogged + * @NL80211_TXQ_STATS_BACKLOG_PACKETS: number of packets currently + * backlogged + * @NL80211_TXQ_STATS_FLOWS: total number of new flows seen + * @NL80211_TXQ_STATS_DROPS: total number of packet drops + * @NL80211_TXQ_STATS_ECN_MARKS: total number of packet ECN marks + * @NL80211_TXQ_STATS_OVERLIMIT: number of drops due to queue space overflow + * @NL80211_TXQ_STATS_OVERMEMORY: number of drops due to memory limit overflow + * (only for per-phy stats) + * @NL80211_TXQ_STATS_COLLISIONS: number of hash collisions + * @NL80211_TXQ_STATS_TX_BYTES: total number of bytes dequeued from TXQ + * @NL80211_TXQ_STATS_TX_PACKETS: total number of packets dequeued from TXQ + * @NL80211_TXQ_STATS_MAX_FLOWS: number of flow buckets for PHY + * @NL80211_TXQ_STATS_MAX: highest numbered attribute here + */ +enum nl80211_txq_stats { + __NL80211_TXQ_STATS_INVALID, + NL80211_TXQ_STATS_BACKLOG_BYTES, + NL80211_TXQ_STATS_BACKLOG_PACKETS, + NL80211_TXQ_STATS_FLOWS, + NL80211_TXQ_STATS_DROPS, + NL80211_TXQ_STATS_ECN_MARKS, + NL80211_TXQ_STATS_OVERLIMIT, + NL80211_TXQ_STATS_OVERMEMORY, + NL80211_TXQ_STATS_COLLISIONS, + NL80211_TXQ_STATS_TX_BYTES, + NL80211_TXQ_STATS_TX_PACKETS, + NL80211_TXQ_STATS_MAX_FLOWS, + + /* keep last */ + NUM_NL80211_TXQ_STATS, + NL80211_TXQ_STATS_MAX = NUM_NL80211_TXQ_STATS - 1 +}; + +/** + * enum nl80211_mpath_flags - nl80211 mesh path flags + * + * @NL80211_MPATH_FLAG_ACTIVE: the mesh path is active + * @NL80211_MPATH_FLAG_RESOLVING: the mesh path discovery process is running + * @NL80211_MPATH_FLAG_SN_VALID: the mesh path contains a valid SN + * @NL80211_MPATH_FLAG_FIXED: the mesh path has been manually set + * @NL80211_MPATH_FLAG_RESOLVED: the mesh path discovery process succeeded + */ +enum nl80211_mpath_flags { + NL80211_MPATH_FLAG_ACTIVE = 1<<0, + NL80211_MPATH_FLAG_RESOLVING = 1<<1, + NL80211_MPATH_FLAG_SN_VALID = 1<<2, + NL80211_MPATH_FLAG_FIXED = 1<<3, + NL80211_MPATH_FLAG_RESOLVED = 1<<4, +}; + +/** + * enum nl80211_mpath_info - mesh path information + * + * These attribute types are used with %NL80211_ATTR_MPATH_INFO when getting + * information about a mesh path. + * + * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved + * @NL80211_MPATH_INFO_FRAME_QLEN: number of queued frames for this destination + * @NL80211_MPATH_INFO_SN: destination sequence number + * @NL80211_MPATH_INFO_METRIC: metric (cost) of this mesh path + * @NL80211_MPATH_INFO_EXPTIME: expiration time for the path, in msec from now + * @NL80211_MPATH_INFO_FLAGS: mesh path flags, enumerated in + * &enum nl80211_mpath_flags; + * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec + * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries + * @NL80211_MPATH_INFO_HOP_COUNT: hop count to destination + * @NL80211_MPATH_INFO_PATH_CHANGE: total number of path changes to destination + * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number + * currently defined + * @__NL80211_MPATH_INFO_AFTER_LAST: internal use + */ +enum nl80211_mpath_info { + __NL80211_MPATH_INFO_INVALID, + NL80211_MPATH_INFO_FRAME_QLEN, + NL80211_MPATH_INFO_SN, + NL80211_MPATH_INFO_METRIC, + NL80211_MPATH_INFO_EXPTIME, + NL80211_MPATH_INFO_FLAGS, + NL80211_MPATH_INFO_DISCOVERY_TIMEOUT, + NL80211_MPATH_INFO_DISCOVERY_RETRIES, + NL80211_MPATH_INFO_HOP_COUNT, + NL80211_MPATH_INFO_PATH_CHANGE, + + /* keep last */ + __NL80211_MPATH_INFO_AFTER_LAST, + NL80211_MPATH_INFO_MAX = __NL80211_MPATH_INFO_AFTER_LAST - 1 +}; + +/** + * enum nl80211_band_iftype_attr - Interface type data attributes + * + * @__NL80211_BAND_IFTYPE_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_BAND_IFTYPE_ATTR_IFTYPES: nested attribute containing a flag attribute + * for each interface type that supports the band data + * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC: HE MAC capabilities as in HE + * capabilities IE + * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY: HE PHY capabilities as in HE + * capabilities IE + * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET: HE supported NSS/MCS as in HE + * capabilities IE + * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE: HE PPE thresholds information as + * defined in HE capabilities IE + * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently + * defined + * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use + */ +enum nl80211_band_iftype_attr { + __NL80211_BAND_IFTYPE_ATTR_INVALID, + + NL80211_BAND_IFTYPE_ATTR_IFTYPES, + NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC, + NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY, + NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET, + NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE, + + /* keep last */ + __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST, + NL80211_BAND_IFTYPE_ATTR_MAX = __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_band_attr - band attributes + * @__NL80211_BAND_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_BAND_ATTR_FREQS: supported frequencies in this band, + * an array of nested frequency attributes + * @NL80211_BAND_ATTR_RATES: supported bitrates in this band, + * an array of nested bitrate attributes + * @NL80211_BAND_ATTR_HT_MCS_SET: 16-byte attribute containing the MCS set as + * defined in 802.11n + * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE + * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n + * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n + * @NL80211_BAND_ATTR_VHT_MCS_SET: 32-byte attribute containing the MCS set as + * defined in 802.11ac + * @NL80211_BAND_ATTR_VHT_CAPA: VHT capabilities, as in the HT information IE + * @NL80211_BAND_ATTR_IFTYPE_DATA: nested array attribute, with each entry using + * attributes from &enum nl80211_band_iftype_attr + * @NL80211_BAND_ATTR_EDMG_CHANNELS: bitmap that indicates the 2.16 GHz + * channel(s) that are allowed to be used for EDMG transmissions. + * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251. + * @NL80211_BAND_ATTR_EDMG_BW_CONFIG: Channel BW Configuration subfield encodes + * the allowed channel bandwidth configurations. + * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13. + * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined + * @__NL80211_BAND_ATTR_AFTER_LAST: internal use + */ +enum nl80211_band_attr { + __NL80211_BAND_ATTR_INVALID, + NL80211_BAND_ATTR_FREQS, + NL80211_BAND_ATTR_RATES, + + NL80211_BAND_ATTR_HT_MCS_SET, + NL80211_BAND_ATTR_HT_CAPA, + NL80211_BAND_ATTR_HT_AMPDU_FACTOR, + NL80211_BAND_ATTR_HT_AMPDU_DENSITY, + + NL80211_BAND_ATTR_VHT_MCS_SET, + NL80211_BAND_ATTR_VHT_CAPA, + NL80211_BAND_ATTR_IFTYPE_DATA, + + NL80211_BAND_ATTR_EDMG_CHANNELS, + NL80211_BAND_ATTR_EDMG_BW_CONFIG, + + /* keep last */ + __NL80211_BAND_ATTR_AFTER_LAST, + NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1 +}; + +#define NL80211_BAND_ATTR_HT_CAPA NL80211_BAND_ATTR_HT_CAPA + +/** + * enum nl80211_wmm_rule - regulatory wmm rule + * + * @__NL80211_WMMR_INVALID: attribute number 0 is reserved + * @NL80211_WMMR_CW_MIN: Minimum contention window slot. + * @NL80211_WMMR_CW_MAX: Maximum contention window slot. + * @NL80211_WMMR_AIFSN: Arbitration Inter Frame Space. + * @NL80211_WMMR_TXOP: Maximum allowed tx operation time. + * @nl80211_WMMR_MAX: highest possible wmm rule. + * @__NL80211_WMMR_LAST: Internal use. + */ +enum nl80211_wmm_rule { + __NL80211_WMMR_INVALID, + NL80211_WMMR_CW_MIN, + NL80211_WMMR_CW_MAX, + NL80211_WMMR_AIFSN, + NL80211_WMMR_TXOP, + + /* keep last */ + __NL80211_WMMR_LAST, + NL80211_WMMR_MAX = __NL80211_WMMR_LAST - 1 +}; + +/** + * enum nl80211_frequency_attr - frequency attributes + * @__NL80211_FREQUENCY_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz + * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current + * regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_IR: no mechanisms that initiate radiation + * are permitted on this channel, this includes sending probe + * requests, or modes of operation that require beaconing. + * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm + * (100 * dBm). + * @NL80211_FREQUENCY_ATTR_DFS_STATE: current state for DFS + * (enum nl80211_dfs_state) + * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in miliseconds for how long + * this channel is in this DFS state. + * @NL80211_FREQUENCY_ATTR_NO_HT40_MINUS: HT40- isn't possible with this + * channel as the control channel + * @NL80211_FREQUENCY_ATTR_NO_HT40_PLUS: HT40+ isn't possible with this + * channel as the control channel + * @NL80211_FREQUENCY_ATTR_NO_80MHZ: any 80 MHz channel using this channel + * as the primary or any of the secondary channels isn't possible, + * this includes 80+80 channels + * @NL80211_FREQUENCY_ATTR_NO_160MHZ: any 160 MHz (but not 80+80) channel + * using this channel as the primary or any of the secondary channels + * isn't possible + * @NL80211_FREQUENCY_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds. + * @NL80211_FREQUENCY_ATTR_INDOOR_ONLY: Only indoor use is permitted on this + * channel. A channel that has the INDOOR_ONLY attribute can only be + * used when there is a clear assessment that the device is operating in + * an indoor surroundings, i.e., it is connected to AC power (and not + * through portable DC inverters) or is under the control of a master + * that is acting as an AP and is connected to AC power. + * @NL80211_FREQUENCY_ATTR_IR_CONCURRENT: IR operation is allowed on this + * channel if it's connected concurrently to a BSS on the same channel on + * the 2 GHz band or to a channel in the same UNII band (on the 5 GHz + * band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO or TDLS + * off-channel on a channel that has the IR_CONCURRENT attribute set can be + * done when there is a clear assessment that the device is operating under + * the guidance of an authorized master, i.e., setting up a GO or TDLS + * off-channel while the device is also connected to an AP with DFS and + * radar detection on the UNII band (it is up to user-space, i.e., + * wpa_supplicant to perform the required verifications). Using this + * attribute for IR is disallowed for master interfaces (IBSS, AP). + * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_WMM: this channel has wmm limitations. + * This is a nested attribute that contains the wmm limitation per AC. + * (see &enum nl80211_wmm_rule) + * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number + * currently defined + * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use + * + * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122 + * for more information on the FCC description of the relaxations allowed + * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and + * NL80211_FREQUENCY_ATTR_IR_CONCURRENT. + */ +enum nl80211_frequency_attr { + __NL80211_FREQUENCY_ATTR_INVALID, + NL80211_FREQUENCY_ATTR_FREQ, + NL80211_FREQUENCY_ATTR_DISABLED, + NL80211_FREQUENCY_ATTR_NO_IR, + __NL80211_FREQUENCY_ATTR_NO_IBSS, + NL80211_FREQUENCY_ATTR_RADAR, + NL80211_FREQUENCY_ATTR_MAX_TX_POWER, + NL80211_FREQUENCY_ATTR_DFS_STATE, + NL80211_FREQUENCY_ATTR_DFS_TIME, + NL80211_FREQUENCY_ATTR_NO_HT40_MINUS, + NL80211_FREQUENCY_ATTR_NO_HT40_PLUS, + NL80211_FREQUENCY_ATTR_NO_80MHZ, + NL80211_FREQUENCY_ATTR_NO_160MHZ, + NL80211_FREQUENCY_ATTR_DFS_CAC_TIME, + NL80211_FREQUENCY_ATTR_INDOOR_ONLY, + NL80211_FREQUENCY_ATTR_IR_CONCURRENT, + NL80211_FREQUENCY_ATTR_NO_20MHZ, + NL80211_FREQUENCY_ATTR_NO_10MHZ, + NL80211_FREQUENCY_ATTR_WMM, + + /* keep last */ + __NL80211_FREQUENCY_ATTR_AFTER_LAST, + NL80211_FREQUENCY_ATTR_MAX = __NL80211_FREQUENCY_ATTR_AFTER_LAST - 1 +}; + +#define NL80211_FREQUENCY_ATTR_MAX_TX_POWER NL80211_FREQUENCY_ATTR_MAX_TX_POWER +#define NL80211_FREQUENCY_ATTR_PASSIVE_SCAN NL80211_FREQUENCY_ATTR_NO_IR +#define NL80211_FREQUENCY_ATTR_NO_IBSS NL80211_FREQUENCY_ATTR_NO_IR +#define NL80211_FREQUENCY_ATTR_NO_IR NL80211_FREQUENCY_ATTR_NO_IR +#define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \ + NL80211_FREQUENCY_ATTR_IR_CONCURRENT + +/** + * enum nl80211_bitrate_attr - bitrate attributes + * @__NL80211_BITRATE_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps + * @NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE: Short preamble supported + * in 2.4 GHz band. + * @NL80211_BITRATE_ATTR_MAX: highest bitrate attribute number + * currently defined + * @__NL80211_BITRATE_ATTR_AFTER_LAST: internal use + */ +enum nl80211_bitrate_attr { + __NL80211_BITRATE_ATTR_INVALID, + NL80211_BITRATE_ATTR_RATE, + NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE, + + /* keep last */ + __NL80211_BITRATE_ATTR_AFTER_LAST, + NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_initiator - Indicates the initiator of a reg domain request + * @NL80211_REGDOM_SET_BY_CORE: Core queried CRDA for a dynamic world + * regulatory domain. + * @NL80211_REGDOM_SET_BY_USER: User asked the wireless core to set the + * regulatory domain. + * @NL80211_REGDOM_SET_BY_DRIVER: a wireless drivers has hinted to the + * wireless core it thinks its knows the regulatory domain we should be in. + * @NL80211_REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an + * 802.11 country information element with regulatory information it + * thinks we should consider. cfg80211 only processes the country + * code from the IE, and relies on the regulatory domain information + * structure passed by userspace (CRDA) from our wireless-regdb. + * If a channel is enabled but the country code indicates it should + * be disabled we disable the channel and re-enable it upon disassociation. + */ +enum nl80211_reg_initiator { + NL80211_REGDOM_SET_BY_CORE, + NL80211_REGDOM_SET_BY_USER, + NL80211_REGDOM_SET_BY_DRIVER, + NL80211_REGDOM_SET_BY_COUNTRY_IE, +}; + +/** + * enum nl80211_reg_type - specifies the type of regulatory domain + * @NL80211_REGDOM_TYPE_COUNTRY: the regulatory domain set is one that pertains + * to a specific country. When this is set you can count on the + * ISO / IEC 3166 alpha2 country code being valid. + * @NL80211_REGDOM_TYPE_WORLD: the regulatory set domain is the world regulatory + * domain. + * @NL80211_REGDOM_TYPE_CUSTOM_WORLD: the regulatory domain set is a custom + * driver specific world regulatory domain. These do not apply system-wide + * and are only applicable to the individual devices which have requested + * them to be applied. + * @NL80211_REGDOM_TYPE_INTERSECTION: the regulatory domain set is the product + * of an intersection between two regulatory domains -- the previously + * set regulatory domain on the system and the last accepted regulatory + * domain request to be processed. + */ +enum nl80211_reg_type { + NL80211_REGDOM_TYPE_COUNTRY, + NL80211_REGDOM_TYPE_WORLD, + NL80211_REGDOM_TYPE_CUSTOM_WORLD, + NL80211_REGDOM_TYPE_INTERSECTION, +}; + +/** + * enum nl80211_reg_rule_attr - regulatory rule attributes + * @__NL80211_REG_RULE_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional + * considerations for a given frequency range. These are the + * &enum nl80211_reg_rule_flags. + * @NL80211_ATTR_FREQ_RANGE_START: starting frequencry for the regulatory + * rule in KHz. This is not a center of frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_END: ending frequency for the regulatory rule + * in KHz. This is not a center a frequency but an actual regulatory + * band edge. + * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this + * frequency range, in KHz. + * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain + * for a given frequency range. The value is in mBi (100 * dBi). + * If you don't have one then don't send this. + * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for + * a given frequency range. The value is in mBm (100 * dBm). + * @NL80211_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds. + * If not present or 0 default CAC time will be used. + * @NL80211_REG_RULE_ATTR_MAX: highest regulatory rule attribute number + * currently defined + * @__NL80211_REG_RULE_ATTR_AFTER_LAST: internal use + */ +enum nl80211_reg_rule_attr { + __NL80211_REG_RULE_ATTR_INVALID, + NL80211_ATTR_REG_RULE_FLAGS, + + NL80211_ATTR_FREQ_RANGE_START, + NL80211_ATTR_FREQ_RANGE_END, + NL80211_ATTR_FREQ_RANGE_MAX_BW, + + NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN, + NL80211_ATTR_POWER_RULE_MAX_EIRP, + + NL80211_ATTR_DFS_CAC_TIME, + + /* keep last */ + __NL80211_REG_RULE_ATTR_AFTER_LAST, + NL80211_REG_RULE_ATTR_MAX = __NL80211_REG_RULE_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_sched_scan_match_attr - scheduled scan match attributes + * @__NL80211_SCHED_SCAN_MATCH_ATTR_INVALID: attribute number 0 is reserved + * @NL80211_SCHED_SCAN_MATCH_ATTR_SSID: SSID to be used for matching, + * only report BSS with matching SSID. + * (This cannot be used together with BSSID.) + * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI: RSSI threshold (in dBm) for reporting a + * BSS in scan results. Filtering is turned off if not specified. Note that + * if this attribute is in a match set of its own, then it is treated as + * the default value for all matchsets with an SSID, rather than being a + * matchset of its own without an RSSI filter. This is due to problems with + * how this API was implemented in the past. Also, due to the same problem, + * the only way to create a matchset with only an RSSI filter (with this + * attribute) is if there's only a single matchset with the RSSI attribute. + * @NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI: Flag indicating whether + * %NL80211_SCHED_SCAN_MATCH_ATTR_RSSI to be used as absolute RSSI or + * relative to current bss's RSSI. + * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST: When present the RSSI level for + * BSS-es in the specified band is to be adjusted before doing + * RSSI-based BSS selection. The attribute value is a packed structure + * value as specified by &struct nl80211_bss_select_rssi_adjust. + * @NL80211_SCHED_SCAN_MATCH_ATTR_BSSID: BSSID to be used for matching + * (this cannot be used together with SSID). + * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Nested attribute that carries the + * band specific minimum rssi thresholds for the bands defined in + * enum nl80211_band. The minimum rssi threshold value(s32) specific to a + * band shall be encapsulated in attribute with type value equals to one + * of the NL80211_BAND_* defined in enum nl80211_band. For example, the + * minimum rssi threshold value for 2.4GHZ band shall be encapsulated + * within an attribute of type NL80211_BAND_2GHZ. And one or more of such + * attributes will be nested within this attribute. + * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter + * attribute number currently defined + * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use + */ +enum nl80211_sched_scan_match_attr { + __NL80211_SCHED_SCAN_MATCH_ATTR_INVALID, + + NL80211_SCHED_SCAN_MATCH_ATTR_SSID, + NL80211_SCHED_SCAN_MATCH_ATTR_RSSI, + NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI, + NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST, + NL80211_SCHED_SCAN_MATCH_ATTR_BSSID, + NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI, + + /* keep last */ + __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST, + NL80211_SCHED_SCAN_MATCH_ATTR_MAX = + __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST - 1 +}; + +/* only for backward compatibility */ +#define NL80211_ATTR_SCHED_SCAN_MATCH_SSID NL80211_SCHED_SCAN_MATCH_ATTR_SSID + +/** + * enum nl80211_reg_rule_flags - regulatory rule flags + * + * @NL80211_RRF_NO_OFDM: OFDM modulation not allowed + * @NL80211_RRF_NO_CCK: CCK modulation not allowed + * @NL80211_RRF_NO_INDOOR: indoor operation not allowed + * @NL80211_RRF_NO_OUTDOOR: outdoor operation not allowed + * @NL80211_RRF_DFS: DFS support is required to be used + * @NL80211_RRF_PTP_ONLY: this is only for Point To Point links + * @NL80211_RRF_PTMP_ONLY: this is only for Point To Multi Point links + * @NL80211_RRF_NO_IR: no mechanisms that initiate radiation are allowed, + * this includes probe requests or modes of operation that require + * beaconing. + * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated + * base on contiguous rules and wider channels will be allowed to cross + * multiple contiguous/overlapping frequency ranges. + * @NL80211_RRF_IR_CONCURRENT: See %NL80211_FREQUENCY_ATTR_IR_CONCURRENT + * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation + * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation + * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed + * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed + */ +enum nl80211_reg_rule_flags { + NL80211_RRF_NO_OFDM = 1<<0, + NL80211_RRF_NO_CCK = 1<<1, + NL80211_RRF_NO_INDOOR = 1<<2, + NL80211_RRF_NO_OUTDOOR = 1<<3, + NL80211_RRF_DFS = 1<<4, + NL80211_RRF_PTP_ONLY = 1<<5, + NL80211_RRF_PTMP_ONLY = 1<<6, + NL80211_RRF_NO_IR = 1<<7, + __NL80211_RRF_NO_IBSS = 1<<8, + NL80211_RRF_AUTO_BW = 1<<11, + NL80211_RRF_IR_CONCURRENT = 1<<12, + NL80211_RRF_NO_HT40MINUS = 1<<13, + NL80211_RRF_NO_HT40PLUS = 1<<14, + NL80211_RRF_NO_80MHZ = 1<<15, + NL80211_RRF_NO_160MHZ = 1<<16, +}; + +#define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR +#define NL80211_RRF_NO_IBSS NL80211_RRF_NO_IR +#define NL80211_RRF_NO_IR NL80211_RRF_NO_IR +#define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS |\ + NL80211_RRF_NO_HT40PLUS) +#define NL80211_RRF_GO_CONCURRENT NL80211_RRF_IR_CONCURRENT + +/* For backport compatibility with older userspace */ +#define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS) + +/** + * enum nl80211_dfs_regions - regulatory DFS regions + * + * @NL80211_DFS_UNSET: Country has no DFS master region specified + * @NL80211_DFS_FCC: Country follows DFS master rules from FCC + * @NL80211_DFS_ETSI: Country follows DFS master rules from ETSI + * @NL80211_DFS_JP: Country follows DFS master rules from JP/MKK/Telec + */ +enum nl80211_dfs_regions { + NL80211_DFS_UNSET = 0, + NL80211_DFS_FCC = 1, + NL80211_DFS_ETSI = 2, + NL80211_DFS_JP = 3, +}; + +/** + * enum nl80211_user_reg_hint_type - type of user regulatory hint + * + * @NL80211_USER_REG_HINT_USER: a user sent the hint. This is always + * assumed if the attribute is not set. + * @NL80211_USER_REG_HINT_CELL_BASE: the hint comes from a cellular + * base station. Device drivers that have been tested to work + * properly to support this type of hint can enable these hints + * by setting the NL80211_FEATURE_CELL_BASE_REG_HINTS feature + * capability on the struct wiphy. The wireless core will + * ignore all cell base station hints until at least one device + * present has been registered with the wireless core that + * has listed NL80211_FEATURE_CELL_BASE_REG_HINTS as a + * supported feature. + * @NL80211_USER_REG_HINT_INDOOR: a user sent an hint indicating that the + * platform is operating in an indoor environment. + */ +enum nl80211_user_reg_hint_type { + NL80211_USER_REG_HINT_USER = 0, + NL80211_USER_REG_HINT_CELL_BASE = 1, + NL80211_USER_REG_HINT_INDOOR = 2, +}; + +/** + * enum nl80211_survey_info - survey information + * + * These attribute types are used with %NL80211_ATTR_SURVEY_INFO + * when getting information about a survey. + * + * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved + * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel + * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm) + * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used + * @NL80211_SURVEY_INFO_TIME: amount of time (in ms) that the radio + * was turned on (on channel or globally) + * @NL80211_SURVEY_INFO_TIME_BUSY: amount of the time the primary + * channel was sensed busy (either due to activity or energy detect) + * @NL80211_SURVEY_INFO_TIME_EXT_BUSY: amount of time the extension + * channel was sensed busy + * @NL80211_SURVEY_INFO_TIME_RX: amount of time the radio spent + * receiving data (on channel or globally) + * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent + * transmitting data (on channel or globally) + * @NL80211_SURVEY_INFO_TIME_SCAN: time the radio spent for scan + * (on this channel or globally) + * @NL80211_SURVEY_INFO_PAD: attribute used for padding for 64-bit alignment + * @NL80211_SURVEY_INFO_TIME_BSS_RX: amount of time the radio spent + * receiving frames destined to the local BSS + * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number + * currently defined + * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use + */ +enum nl80211_survey_info { + __NL80211_SURVEY_INFO_INVALID, + NL80211_SURVEY_INFO_FREQUENCY, + NL80211_SURVEY_INFO_NOISE, + NL80211_SURVEY_INFO_IN_USE, + NL80211_SURVEY_INFO_TIME, + NL80211_SURVEY_INFO_TIME_BUSY, + NL80211_SURVEY_INFO_TIME_EXT_BUSY, + NL80211_SURVEY_INFO_TIME_RX, + NL80211_SURVEY_INFO_TIME_TX, + NL80211_SURVEY_INFO_TIME_SCAN, + NL80211_SURVEY_INFO_PAD, + NL80211_SURVEY_INFO_TIME_BSS_RX, + + /* keep last */ + __NL80211_SURVEY_INFO_AFTER_LAST, + NL80211_SURVEY_INFO_MAX = __NL80211_SURVEY_INFO_AFTER_LAST - 1 +}; + +/* keep old names for compatibility */ +#define NL80211_SURVEY_INFO_CHANNEL_TIME NL80211_SURVEY_INFO_TIME +#define NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY NL80211_SURVEY_INFO_TIME_BUSY +#define NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY NL80211_SURVEY_INFO_TIME_EXT_BUSY +#define NL80211_SURVEY_INFO_CHANNEL_TIME_RX NL80211_SURVEY_INFO_TIME_RX +#define NL80211_SURVEY_INFO_CHANNEL_TIME_TX NL80211_SURVEY_INFO_TIME_TX + +/** + * enum nl80211_mntr_flags - monitor configuration flags + * + * Monitor configuration flags. + * + * @__NL80211_MNTR_FLAG_INVALID: reserved + * + * @NL80211_MNTR_FLAG_FCSFAIL: pass frames with bad FCS + * @NL80211_MNTR_FLAG_PLCPFAIL: pass frames with bad PLCP + * @NL80211_MNTR_FLAG_CONTROL: pass control frames + * @NL80211_MNTR_FLAG_OTHER_BSS: disable BSSID filtering + * @NL80211_MNTR_FLAG_COOK_FRAMES: report frames after processing. + * overrides all other flags. + * @NL80211_MNTR_FLAG_ACTIVE: use the configured MAC address + * and ACK incoming unicast packets. + * + * @__NL80211_MNTR_FLAG_AFTER_LAST: internal use + * @NL80211_MNTR_FLAG_MAX: highest possible monitor flag + */ +enum nl80211_mntr_flags { + __NL80211_MNTR_FLAG_INVALID, + NL80211_MNTR_FLAG_FCSFAIL, + NL80211_MNTR_FLAG_PLCPFAIL, + NL80211_MNTR_FLAG_CONTROL, + NL80211_MNTR_FLAG_OTHER_BSS, + NL80211_MNTR_FLAG_COOK_FRAMES, + NL80211_MNTR_FLAG_ACTIVE, + + /* keep last */ + __NL80211_MNTR_FLAG_AFTER_LAST, + NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1 +}; + +/** + * enum nl80211_mesh_power_mode - mesh power save modes + * + * @NL80211_MESH_POWER_UNKNOWN: The mesh power mode of the mesh STA is + * not known or has not been set yet. + * @NL80211_MESH_POWER_ACTIVE: Active mesh power mode. The mesh STA is + * in Awake state all the time. + * @NL80211_MESH_POWER_LIGHT_SLEEP: Light sleep mode. The mesh STA will + * alternate between Active and Doze states, but will wake up for + * neighbor's beacons. + * @NL80211_MESH_POWER_DEEP_SLEEP: Deep sleep mode. The mesh STA will + * alternate between Active and Doze states, but may not wake up + * for neighbor's beacons. + * + * @__NL80211_MESH_POWER_AFTER_LAST - internal use + * @NL80211_MESH_POWER_MAX - highest possible power save level + */ + +enum nl80211_mesh_power_mode { + NL80211_MESH_POWER_UNKNOWN, + NL80211_MESH_POWER_ACTIVE, + NL80211_MESH_POWER_LIGHT_SLEEP, + NL80211_MESH_POWER_DEEP_SLEEP, + + __NL80211_MESH_POWER_AFTER_LAST, + NL80211_MESH_POWER_MAX = __NL80211_MESH_POWER_AFTER_LAST - 1 +}; + +/** + * enum nl80211_meshconf_params - mesh configuration parameters + * + * Mesh configuration parameters. These can be changed while the mesh is + * active. + * + * @__NL80211_MESHCONF_INVALID: internal use + * + * @NL80211_MESHCONF_RETRY_TIMEOUT: specifies the initial retry timeout in + * millisecond units, used by the Peer Link Open message + * + * @NL80211_MESHCONF_CONFIRM_TIMEOUT: specifies the initial confirm timeout, in + * millisecond units, used by the peer link management to close a peer link + * + * @NL80211_MESHCONF_HOLDING_TIMEOUT: specifies the holding timeout, in + * millisecond units + * + * @NL80211_MESHCONF_MAX_PEER_LINKS: maximum number of peer links allowed + * on this mesh interface + * + * @NL80211_MESHCONF_MAX_RETRIES: specifies the maximum number of peer link + * open retries that can be sent to establish a new peer link instance in a + * mesh + * + * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh + * point. + * + * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically open + * peer links when we detect compatible mesh peers. Disabled if + * @NL80211_MESH_SETUP_USERSPACE_MPM or @NL80211_MESH_SETUP_USERSPACE_AMPE are + * set. + * + * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames + * containing a PREQ that an MP can send to a particular destination (path + * target) + * + * @NL80211_MESHCONF_PATH_REFRESH_TIME: how frequently to refresh mesh paths + * (in milliseconds) + * + * @NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT: minimum length of time to wait + * until giving up on a path discovery (in milliseconds) + * + * @NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT: The time (in TUs) for which mesh + * points receiving a PREQ shall consider the forwarding information from + * the root to be valid. (TU = time unit) + * + * @NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL: The minimum interval of time (in + * TUs) during which an MP can send only one action frame containing a PREQ + * reference element + * + * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs) + * that it takes for an HWMP information element to propagate across the + * mesh + * + * @NL80211_MESHCONF_HWMP_ROOTMODE: whether root mode is enabled or not + * + * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a + * source mesh point for path selection elements. + * + * @NL80211_MESHCONF_HWMP_RANN_INTERVAL: The interval of time (in TUs) between + * root announcements are transmitted. + * + * @NL80211_MESHCONF_GATE_ANNOUNCEMENTS: Advertise that this mesh station has + * access to a broader network beyond the MBSS. This is done via Root + * Announcement frames. + * + * @NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL: The minimum interval of time (in + * TUs) during which a mesh STA can send only one Action frame containing a + * PERR element. + * + * @NL80211_MESHCONF_FORWARDING: set Mesh STA as forwarding or non-forwarding + * or forwarding entity (default is TRUE - forwarding entity) + * + * @NL80211_MESHCONF_RSSI_THRESHOLD: RSSI threshold in dBm. This specifies the + * threshold for average signal strength of candidate station to establish + * a peer link. + * + * @NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR: maximum number of neighbors + * to synchronize to for 11s default synchronization method + * (see 11C.12.2.2) + * + * @NL80211_MESHCONF_HT_OPMODE: set mesh HT protection mode. + * + * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute + * + * @NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT: The time (in TUs) for + * which mesh STAs receiving a proactive PREQ shall consider the forwarding + * information to the root mesh STA to be valid. + * + * @NL80211_MESHCONF_HWMP_ROOT_INTERVAL: The interval of time (in TUs) between + * proactive PREQs are transmitted. + * + * @NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL: The minimum interval of time + * (in TUs) during which a mesh STA can send only one Action frame + * containing a PREQ element for root path confirmation. + * + * @NL80211_MESHCONF_POWER_MODE: Default mesh power mode for new peer links. + * type &enum nl80211_mesh_power_mode (u32) + * + * @NL80211_MESHCONF_AWAKE_WINDOW: awake window duration (in TUs) + * + * @NL80211_MESHCONF_PLINK_TIMEOUT: If no tx activity is seen from a STA we've + * established peering with for longer than this time (in seconds), then + * remove it from the STA's list of peers. You may set this to 0 to disable + * the removal of the STA. Default is 30 minutes. + * + * @NL80211_MESHCONF_CONNECTED_TO_GATE: If set to true then this mesh STA + * will advertise that it is connected to a gate in the mesh formation + * field. If left unset then the mesh formation field will only + * advertise such if there is an active root mesh path. + * + * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use + */ +enum nl80211_meshconf_params { + __NL80211_MESHCONF_INVALID, + NL80211_MESHCONF_RETRY_TIMEOUT, + NL80211_MESHCONF_CONFIRM_TIMEOUT, + NL80211_MESHCONF_HOLDING_TIMEOUT, + NL80211_MESHCONF_MAX_PEER_LINKS, + NL80211_MESHCONF_MAX_RETRIES, + NL80211_MESHCONF_TTL, + NL80211_MESHCONF_AUTO_OPEN_PLINKS, + NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, + NL80211_MESHCONF_PATH_REFRESH_TIME, + NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, + NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, + NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + NL80211_MESHCONF_HWMP_ROOTMODE, + NL80211_MESHCONF_ELEMENT_TTL, + NL80211_MESHCONF_HWMP_RANN_INTERVAL, + NL80211_MESHCONF_GATE_ANNOUNCEMENTS, + NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, + NL80211_MESHCONF_FORWARDING, + NL80211_MESHCONF_RSSI_THRESHOLD, + NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, + NL80211_MESHCONF_HT_OPMODE, + NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, + NL80211_MESHCONF_HWMP_ROOT_INTERVAL, + NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, + NL80211_MESHCONF_POWER_MODE, + NL80211_MESHCONF_AWAKE_WINDOW, + NL80211_MESHCONF_PLINK_TIMEOUT, + NL80211_MESHCONF_CONNECTED_TO_GATE, + + /* keep last */ + __NL80211_MESHCONF_ATTR_AFTER_LAST, + NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_mesh_setup_params - mesh setup parameters + * + * Mesh setup parameters. These are used to start/join a mesh and cannot be + * changed while the mesh is active. + * + * @__NL80211_MESH_SETUP_INVALID: Internal use + * + * @NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL: Enable this option to use a + * vendor specific path selection algorithm or disable it to use the + * default HWMP. + * + * @NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC: Enable this option to use a + * vendor specific path metric or disable it to use the default Airtime + * metric. + * + * @NL80211_MESH_SETUP_IE: Information elements for this mesh, for instance, a + * robust security network ie, or a vendor specific information element + * that vendors will use to identify the path selection methods and + * metrics in use. + * + * @NL80211_MESH_SETUP_USERSPACE_AUTH: Enable this option if an authentication + * daemon will be authenticating mesh candidates. + * + * @NL80211_MESH_SETUP_USERSPACE_AMPE: Enable this option if an authentication + * daemon will be securing peer link frames. AMPE is a secured version of + * Mesh Peering Management (MPM) and is implemented with the assistance of + * a userspace daemon. When this flag is set, the kernel will send peer + * management frames to a userspace daemon that will implement AMPE + * functionality (security capabilities selection, key confirmation, and + * key management). When the flag is unset (default), the kernel can + * autonomously complete (unsecured) mesh peering without the need of a + * userspace daemon. + * + * @NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC: Enable this option to use a + * vendor specific synchronization method or disable it to use the default + * neighbor offset synchronization + * + * @NL80211_MESH_SETUP_USERSPACE_MPM: Enable this option if userspace will + * implement an MPM which handles peer allocation and state. + * + * @NL80211_MESH_SETUP_AUTH_PROTOCOL: Inform the kernel of the authentication + * method (u8, as defined in IEEE 8.4.2.100.6, e.g. 0x1 for SAE). + * Default is no authentication method required. + * + * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number + * + * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use + */ +enum nl80211_mesh_setup_params { + __NL80211_MESH_SETUP_INVALID, + NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL, + NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC, + NL80211_MESH_SETUP_IE, + NL80211_MESH_SETUP_USERSPACE_AUTH, + NL80211_MESH_SETUP_USERSPACE_AMPE, + NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC, + NL80211_MESH_SETUP_USERSPACE_MPM, + NL80211_MESH_SETUP_AUTH_PROTOCOL, + + /* keep last */ + __NL80211_MESH_SETUP_ATTR_AFTER_LAST, + NL80211_MESH_SETUP_ATTR_MAX = __NL80211_MESH_SETUP_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_txq_attr - TX queue parameter attributes + * @__NL80211_TXQ_ATTR_INVALID: Attribute number 0 is reserved + * @NL80211_TXQ_ATTR_AC: AC identifier (NL80211_AC_*) + * @NL80211_TXQ_ATTR_TXOP: Maximum burst time in units of 32 usecs, 0 meaning + * disabled + * @NL80211_TXQ_ATTR_CWMIN: Minimum contention window [a value of the form + * 2^n-1 in the range 1..32767] + * @NL80211_TXQ_ATTR_CWMAX: Maximum contention window [a value of the form + * 2^n-1 in the range 1..32767] + * @NL80211_TXQ_ATTR_AIFS: Arbitration interframe space [0..255] + * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal + * @NL80211_TXQ_ATTR_MAX: Maximum TXQ attribute number + */ +enum nl80211_txq_attr { + __NL80211_TXQ_ATTR_INVALID, + NL80211_TXQ_ATTR_AC, + NL80211_TXQ_ATTR_TXOP, + NL80211_TXQ_ATTR_CWMIN, + NL80211_TXQ_ATTR_CWMAX, + NL80211_TXQ_ATTR_AIFS, + + /* keep last */ + __NL80211_TXQ_ATTR_AFTER_LAST, + NL80211_TXQ_ATTR_MAX = __NL80211_TXQ_ATTR_AFTER_LAST - 1 +}; + +enum nl80211_ac { + NL80211_AC_VO, + NL80211_AC_VI, + NL80211_AC_BE, + NL80211_AC_BK, + NL80211_NUM_ACS +}; + +/* backward compat */ +#define NL80211_TXQ_ATTR_QUEUE NL80211_TXQ_ATTR_AC +#define NL80211_TXQ_Q_VO NL80211_AC_VO +#define NL80211_TXQ_Q_VI NL80211_AC_VI +#define NL80211_TXQ_Q_BE NL80211_AC_BE +#define NL80211_TXQ_Q_BK NL80211_AC_BK + +/** + * enum nl80211_channel_type - channel type + * @NL80211_CHAN_NO_HT: 20 MHz, non-HT channel + * @NL80211_CHAN_HT20: 20 MHz HT channel + * @NL80211_CHAN_HT40MINUS: HT40 channel, secondary channel + * below the control channel + * @NL80211_CHAN_HT40PLUS: HT40 channel, secondary channel + * above the control channel + */ +enum nl80211_channel_type { + NL80211_CHAN_NO_HT, + NL80211_CHAN_HT20, + NL80211_CHAN_HT40MINUS, + NL80211_CHAN_HT40PLUS +}; + +/** + * enum nl80211_key_mode - Key mode + * + * @NL80211_KEY_RX_TX: (Default) + * Key can be used for Rx and Tx immediately + * + * The following modes can only be selected for unicast keys and when the + * driver supports @NL80211_EXT_FEATURE_EXT_KEY_ID: + * + * @NL80211_KEY_NO_TX: Only allowed in combination with @NL80211_CMD_NEW_KEY: + * Unicast key can only be used for Rx, Tx not allowed, yet + * @NL80211_KEY_SET_TX: Only allowed in combination with @NL80211_CMD_SET_KEY: + * The unicast key identified by idx and mac is cleared for Tx and becomes + * the preferred Tx key for the station. + */ +enum nl80211_key_mode { + NL80211_KEY_RX_TX, + NL80211_KEY_NO_TX, + NL80211_KEY_SET_TX +}; + +/** + * enum nl80211_chan_width - channel width definitions + * + * These values are used with the %NL80211_ATTR_CHANNEL_WIDTH + * attribute. + * + * @NL80211_CHAN_WIDTH_20_NOHT: 20 MHz, non-HT channel + * @NL80211_CHAN_WIDTH_20: 20 MHz HT channel + * @NL80211_CHAN_WIDTH_40: 40 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + * @NL80211_CHAN_WIDTH_80: 80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + * @NL80211_CHAN_WIDTH_80P80: 80+80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * and %NL80211_ATTR_CENTER_FREQ2 attributes must be provided as well + * @NL80211_CHAN_WIDTH_160: 160 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + * @NL80211_CHAN_WIDTH_5: 5 MHz OFDM channel + * @NL80211_CHAN_WIDTH_10: 10 MHz OFDM channel + */ +enum nl80211_chan_width { + NL80211_CHAN_WIDTH_20_NOHT, + NL80211_CHAN_WIDTH_20, + NL80211_CHAN_WIDTH_40, + NL80211_CHAN_WIDTH_80, + NL80211_CHAN_WIDTH_80P80, + NL80211_CHAN_WIDTH_160, + NL80211_CHAN_WIDTH_5, + NL80211_CHAN_WIDTH_10, +}; + +/** + * enum nl80211_bss_scan_width - control channel width for a BSS + * + * These values are used with the %NL80211_BSS_CHAN_WIDTH attribute. + * + * @NL80211_BSS_CHAN_WIDTH_20: control channel is 20 MHz wide or compatible + * @NL80211_BSS_CHAN_WIDTH_10: control channel is 10 MHz wide + * @NL80211_BSS_CHAN_WIDTH_5: control channel is 5 MHz wide + */ +enum nl80211_bss_scan_width { + NL80211_BSS_CHAN_WIDTH_20, + NL80211_BSS_CHAN_WIDTH_10, + NL80211_BSS_CHAN_WIDTH_5, +}; + +/** + * enum nl80211_bss - netlink attributes for a BSS + * + * @__NL80211_BSS_INVALID: invalid + * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets) + * @NL80211_BSS_FREQUENCY: frequency in MHz (u32) + * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64) + * (if @NL80211_BSS_PRESP_DATA is present then this is known to be + * from a probe response, otherwise it may be from the same beacon + * that the NL80211_BSS_BEACON_TSF will be from) + * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16) + * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16) + * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the + * raw information elements from the probe response/beacon (bin); + * if the %NL80211_BSS_BEACON_IES attribute is present and the data is + * different then the IEs here are from a Probe Response frame; otherwise + * they are from a Beacon frame. + * However, if the driver does not indicate the source of the IEs, these + * IEs may be from either frame subtype. + * If present, the @NL80211_BSS_PRESP_DATA attribute indicates that the + * data here is known to be from a probe response, without any heuristics. + * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon + * in mBm (100 * dBm) (s32) + * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon + * in unspecified units, scaled to 0..100 (u8) + * @NL80211_BSS_STATUS: status, if this BSS is "used" + * @NL80211_BSS_SEEN_MS_AGO: age of this BSS entry in ms + * @NL80211_BSS_BEACON_IES: binary attribute containing the raw information + * elements from a Beacon frame (bin); not present if no Beacon frame has + * yet been received + * @NL80211_BSS_CHAN_WIDTH: channel width of the control channel + * (u32, enum nl80211_bss_scan_width) + * @NL80211_BSS_BEACON_TSF: TSF of the last received beacon (u64) + * (not present if no beacon frame has been received yet) + * @NL80211_BSS_PRESP_DATA: the data in @NL80211_BSS_INFORMATION_ELEMENTS and + * @NL80211_BSS_TSF is known to be from a probe response (flag attribute) + * @NL80211_BSS_LAST_SEEN_BOOTTIME: CLOCK_BOOTTIME timestamp when this entry + * was last updated by a received frame. The value is expected to be + * accurate to about 10ms. (u64, nanoseconds) + * @NL80211_BSS_PAD: attribute used for padding for 64-bit alignment + * @NL80211_BSS_PARENT_TSF: the time at the start of reception of the first + * octet of the timestamp field of the last beacon/probe received for + * this BSS. The time is the TSF of the BSS specified by + * @NL80211_BSS_PARENT_BSSID. (u64). + * @NL80211_BSS_PARENT_BSSID: the BSS according to which @NL80211_BSS_PARENT_TSF + * is set. + * @NL80211_BSS_CHAIN_SIGNAL: per-chain signal strength of last BSS update. + * Contains a nested array of signal strength attributes (u8, dBm), + * using the nesting index as the antenna number. + * @__NL80211_BSS_AFTER_LAST: internal + * @NL80211_BSS_MAX: highest BSS attribute + */ +enum nl80211_bss { + __NL80211_BSS_INVALID, + NL80211_BSS_BSSID, + NL80211_BSS_FREQUENCY, + NL80211_BSS_TSF, + NL80211_BSS_BEACON_INTERVAL, + NL80211_BSS_CAPABILITY, + NL80211_BSS_INFORMATION_ELEMENTS, + NL80211_BSS_SIGNAL_MBM, + NL80211_BSS_SIGNAL_UNSPEC, + NL80211_BSS_STATUS, + NL80211_BSS_SEEN_MS_AGO, + NL80211_BSS_BEACON_IES, + NL80211_BSS_CHAN_WIDTH, + NL80211_BSS_BEACON_TSF, + NL80211_BSS_PRESP_DATA, + NL80211_BSS_LAST_SEEN_BOOTTIME, + NL80211_BSS_PAD, + NL80211_BSS_PARENT_TSF, + NL80211_BSS_PARENT_BSSID, + NL80211_BSS_CHAIN_SIGNAL, + + /* keep last */ + __NL80211_BSS_AFTER_LAST, + NL80211_BSS_MAX = __NL80211_BSS_AFTER_LAST - 1 +}; + +/** + * enum nl80211_bss_status - BSS "status" + * @NL80211_BSS_STATUS_AUTHENTICATED: Authenticated with this BSS. + * Note that this is no longer used since cfg80211 no longer + * keeps track of whether or not authentication was done with + * a given BSS. + * @NL80211_BSS_STATUS_ASSOCIATED: Associated with this BSS. + * @NL80211_BSS_STATUS_IBSS_JOINED: Joined to this IBSS. + * + * The BSS status is a BSS attribute in scan dumps, which + * indicates the status the interface has wrt. this BSS. + */ +enum nl80211_bss_status { + NL80211_BSS_STATUS_AUTHENTICATED, + NL80211_BSS_STATUS_ASSOCIATED, + NL80211_BSS_STATUS_IBSS_JOINED, +}; + +/** + * enum nl80211_auth_type - AuthenticationType + * + * @NL80211_AUTHTYPE_OPEN_SYSTEM: Open System authentication + * @NL80211_AUTHTYPE_SHARED_KEY: Shared Key authentication (WEP only) + * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r) + * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP) + * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals + * @NL80211_AUTHTYPE_FILS_SK: Fast Initial Link Setup shared key + * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS + * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key + * @__NL80211_AUTHTYPE_NUM: internal + * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm + * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by + * trying multiple times); this is invalid in netlink -- leave out + * the attribute for this on CONNECT commands. + */ +enum nl80211_auth_type { + NL80211_AUTHTYPE_OPEN_SYSTEM, + NL80211_AUTHTYPE_SHARED_KEY, + NL80211_AUTHTYPE_FT, + NL80211_AUTHTYPE_NETWORK_EAP, + NL80211_AUTHTYPE_SAE, + NL80211_AUTHTYPE_FILS_SK, + NL80211_AUTHTYPE_FILS_SK_PFS, + NL80211_AUTHTYPE_FILS_PK, + + /* keep last */ + __NL80211_AUTHTYPE_NUM, + NL80211_AUTHTYPE_MAX = __NL80211_AUTHTYPE_NUM - 1, + NL80211_AUTHTYPE_AUTOMATIC +}; + +/** + * enum nl80211_key_type - Key Type + * @NL80211_KEYTYPE_GROUP: Group (broadcast/multicast) key + * @NL80211_KEYTYPE_PAIRWISE: Pairwise (unicast/individual) key + * @NL80211_KEYTYPE_PEERKEY: PeerKey (DLS) + * @NUM_NL80211_KEYTYPES: number of defined key types + */ +enum nl80211_key_type { + NL80211_KEYTYPE_GROUP, + NL80211_KEYTYPE_PAIRWISE, + NL80211_KEYTYPE_PEERKEY, + + NUM_NL80211_KEYTYPES +}; + +/** + * enum nl80211_mfp - Management frame protection state + * @NL80211_MFP_NO: Management frame protection not used + * @NL80211_MFP_REQUIRED: Management frame protection required + * @NL80211_MFP_OPTIONAL: Management frame protection is optional + */ +enum nl80211_mfp { + NL80211_MFP_NO, + NL80211_MFP_REQUIRED, + NL80211_MFP_OPTIONAL, +}; + +enum nl80211_wpa_versions { + NL80211_WPA_VERSION_1 = 1 << 0, + NL80211_WPA_VERSION_2 = 1 << 1, + NL80211_WPA_VERSION_3 = 1 << 2, +}; + +/** + * enum nl80211_key_default_types - key default types + * @__NL80211_KEY_DEFAULT_TYPE_INVALID: invalid + * @NL80211_KEY_DEFAULT_TYPE_UNICAST: key should be used as default + * unicast key + * @NL80211_KEY_DEFAULT_TYPE_MULTICAST: key should be used as default + * multicast key + * @NUM_NL80211_KEY_DEFAULT_TYPES: number of default types + */ +enum nl80211_key_default_types { + __NL80211_KEY_DEFAULT_TYPE_INVALID, + NL80211_KEY_DEFAULT_TYPE_UNICAST, + NL80211_KEY_DEFAULT_TYPE_MULTICAST, + + NUM_NL80211_KEY_DEFAULT_TYPES +}; + +/** + * enum nl80211_key_attributes - key attributes + * @__NL80211_KEY_INVALID: invalid + * @NL80211_KEY_DATA: (temporal) key data; for TKIP this consists of + * 16 bytes encryption key followed by 8 bytes each for TX and RX MIC + * keys + * @NL80211_KEY_IDX: key ID (u8, 0-3) + * @NL80211_KEY_CIPHER: key cipher suite (u32, as defined by IEEE 802.11 + * section 7.3.2.25.1, e.g. 0x000FAC04) + * @NL80211_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and + * CCMP keys, each six bytes in little endian + * @NL80211_KEY_DEFAULT: flag indicating default key + * @NL80211_KEY_DEFAULT_MGMT: flag indicating default management key + * @NL80211_KEY_TYPE: the key type from enum nl80211_key_type, if not + * specified the default depends on whether a MAC address was + * given with the command using the key or not (u32) + * @NL80211_KEY_DEFAULT_TYPES: A nested attribute containing flags + * attributes, specifying what a key should be set as default as. + * See &enum nl80211_key_default_types. + * @NL80211_KEY_MODE: the mode from enum nl80211_key_mode. + * Defaults to @NL80211_KEY_RX_TX. + * + * @__NL80211_KEY_AFTER_LAST: internal + * @NL80211_KEY_MAX: highest key attribute + */ +enum nl80211_key_attributes { + __NL80211_KEY_INVALID, + NL80211_KEY_DATA, + NL80211_KEY_IDX, + NL80211_KEY_CIPHER, + NL80211_KEY_SEQ, + NL80211_KEY_DEFAULT, + NL80211_KEY_DEFAULT_MGMT, + NL80211_KEY_TYPE, + NL80211_KEY_DEFAULT_TYPES, + NL80211_KEY_MODE, + + /* keep last */ + __NL80211_KEY_AFTER_LAST, + NL80211_KEY_MAX = __NL80211_KEY_AFTER_LAST - 1 +}; + +/** + * enum nl80211_tx_rate_attributes - TX rate set attributes + * @__NL80211_TXRATE_INVALID: invalid + * @NL80211_TXRATE_LEGACY: Legacy (non-MCS) rates allowed for TX rate selection + * in an array of rates as defined in IEEE 802.11 7.3.2.2 (u8 values with + * 1 = 500 kbps) but without the IE length restriction (at most + * %NL80211_MAX_SUPP_RATES in a single array). + * @NL80211_TXRATE_HT: HT (MCS) rates allowed for TX rate selection + * in an array of MCS numbers. + * @NL80211_TXRATE_VHT: VHT rates allowed for TX rate selection, + * see &struct nl80211_txrate_vht + * @NL80211_TXRATE_GI: configure GI, see &enum nl80211_txrate_gi + * @__NL80211_TXRATE_AFTER_LAST: internal + * @NL80211_TXRATE_MAX: highest TX rate attribute + */ +enum nl80211_tx_rate_attributes { + __NL80211_TXRATE_INVALID, + NL80211_TXRATE_LEGACY, + NL80211_TXRATE_HT, + NL80211_TXRATE_VHT, + NL80211_TXRATE_GI, + + /* keep last */ + __NL80211_TXRATE_AFTER_LAST, + NL80211_TXRATE_MAX = __NL80211_TXRATE_AFTER_LAST - 1 +}; + +#define NL80211_TXRATE_MCS NL80211_TXRATE_HT +#define NL80211_VHT_NSS_MAX 8 + +/** + * struct nl80211_txrate_vht - VHT MCS/NSS txrate bitmap + * @mcs: MCS bitmap table for each NSS (array index 0 for 1 stream, etc.) + */ +struct nl80211_txrate_vht { + __u16 mcs[NL80211_VHT_NSS_MAX]; +}; + +enum nl80211_txrate_gi { + NL80211_TXRATE_DEFAULT_GI, + NL80211_TXRATE_FORCE_SGI, + NL80211_TXRATE_FORCE_LGI, +}; + +/** + * enum nl80211_band - Frequency band + * @NL80211_BAND_2GHZ: 2.4 GHz ISM band + * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) + * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 69.12 GHz) + * @NL80211_BAND_6GHZ: around 6 GHz band (5.9 - 7.2 GHz) + * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace + * since newer kernel versions may support more bands + */ +enum nl80211_band { + NL80211_BAND_2GHZ, + NL80211_BAND_5GHZ, + NL80211_BAND_60GHZ, + NL80211_BAND_6GHZ, + + NUM_NL80211_BANDS, +}; + +/** + * enum nl80211_ps_state - powersave state + * @NL80211_PS_DISABLED: powersave is disabled + * @NL80211_PS_ENABLED: powersave is enabled + */ +enum nl80211_ps_state { + NL80211_PS_DISABLED, + NL80211_PS_ENABLED, +}; + +/** + * enum nl80211_attr_cqm - connection quality monitor attributes + * @__NL80211_ATTR_CQM_INVALID: invalid + * @NL80211_ATTR_CQM_RSSI_THOLD: RSSI threshold in dBm. This value specifies + * the threshold for the RSSI level at which an event will be sent. Zero + * to disable. Alternatively, if %NL80211_EXT_FEATURE_CQM_RSSI_LIST is + * set, multiple values can be supplied as a low-to-high sorted array of + * threshold values in dBm. Events will be sent when the RSSI value + * crosses any of the thresholds. + * @NL80211_ATTR_CQM_RSSI_HYST: RSSI hysteresis in dBm. This value specifies + * the minimum amount the RSSI level must change after an event before a + * new event may be issued (to reduce effects of RSSI oscillation). + * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event + * @NL80211_ATTR_CQM_PKT_LOSS_EVENT: a u32 value indicating that this many + * consecutive packets were not acknowledged by the peer + * @NL80211_ATTR_CQM_TXE_RATE: TX error rate in %. Minimum % of TX failures + * during the given %NL80211_ATTR_CQM_TXE_INTVL before an + * %NL80211_CMD_NOTIFY_CQM with reported %NL80211_ATTR_CQM_TXE_RATE and + * %NL80211_ATTR_CQM_TXE_PKTS is generated. + * @NL80211_ATTR_CQM_TXE_PKTS: number of attempted packets in a given + * %NL80211_ATTR_CQM_TXE_INTVL before %NL80211_ATTR_CQM_TXE_RATE is + * checked. + * @NL80211_ATTR_CQM_TXE_INTVL: interval in seconds. Specifies the periodic + * interval in which %NL80211_ATTR_CQM_TXE_PKTS and + * %NL80211_ATTR_CQM_TXE_RATE must be satisfied before generating an + * %NL80211_CMD_NOTIFY_CQM. Set to 0 to turn off TX error reporting. + * @NL80211_ATTR_CQM_BEACON_LOSS_EVENT: flag attribute that's set in a beacon + * loss event + * @NL80211_ATTR_CQM_RSSI_LEVEL: the RSSI value in dBm that triggered the + * RSSI threshold event. + * @__NL80211_ATTR_CQM_AFTER_LAST: internal + * @NL80211_ATTR_CQM_MAX: highest key attribute + */ +enum nl80211_attr_cqm { + __NL80211_ATTR_CQM_INVALID, + NL80211_ATTR_CQM_RSSI_THOLD, + NL80211_ATTR_CQM_RSSI_HYST, + NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, + NL80211_ATTR_CQM_PKT_LOSS_EVENT, + NL80211_ATTR_CQM_TXE_RATE, + NL80211_ATTR_CQM_TXE_PKTS, + NL80211_ATTR_CQM_TXE_INTVL, + NL80211_ATTR_CQM_BEACON_LOSS_EVENT, + NL80211_ATTR_CQM_RSSI_LEVEL, + + /* keep last */ + __NL80211_ATTR_CQM_AFTER_LAST, + NL80211_ATTR_CQM_MAX = __NL80211_ATTR_CQM_AFTER_LAST - 1 +}; + +/** + * enum nl80211_cqm_rssi_threshold_event - RSSI threshold event + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW: The RSSI level is lower than the + * configured threshold + * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH: The RSSI is higher than the + * configured threshold + * @NL80211_CQM_RSSI_BEACON_LOSS_EVENT: (reserved, never sent) + */ +enum nl80211_cqm_rssi_threshold_event { + NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW, + NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH, + NL80211_CQM_RSSI_BEACON_LOSS_EVENT, +}; + + +/** + * enum nl80211_tx_power_setting - TX power adjustment + * @NL80211_TX_POWER_AUTOMATIC: automatically determine transmit power + * @NL80211_TX_POWER_LIMITED: limit TX power by the mBm parameter + * @NL80211_TX_POWER_FIXED: fix TX power to the mBm parameter + */ +enum nl80211_tx_power_setting { + NL80211_TX_POWER_AUTOMATIC, + NL80211_TX_POWER_LIMITED, + NL80211_TX_POWER_FIXED, +}; + +/** + * enum nl80211_packet_pattern_attr - packet pattern attribute + * @__NL80211_PKTPAT_INVALID: invalid number for nested attribute + * @NL80211_PKTPAT_PATTERN: the pattern, values where the mask has + * a zero bit are ignored + * @NL80211_PKTPAT_MASK: pattern mask, must be long enough to have + * a bit for each byte in the pattern. The lowest-order bit corresponds + * to the first byte of the pattern, but the bytes of the pattern are + * in a little-endian-like format, i.e. the 9th byte of the pattern + * corresponds to the lowest-order bit in the second byte of the mask. + * For example: The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where + * xx indicates "don't care") would be represented by a pattern of + * twelve zero bytes, and a mask of "0xed,0x01". + * Note that the pattern matching is done as though frames were not + * 802.11 frames but 802.3 frames, i.e. the frame is fully unpacked + * first (including SNAP header unpacking) and then matched. + * @NL80211_PKTPAT_OFFSET: packet offset, pattern is matched after + * these fixed number of bytes of received packet + * @NUM_NL80211_PKTPAT: number of attributes + * @MAX_NL80211_PKTPAT: max attribute number + */ +enum nl80211_packet_pattern_attr { + __NL80211_PKTPAT_INVALID, + NL80211_PKTPAT_MASK, + NL80211_PKTPAT_PATTERN, + NL80211_PKTPAT_OFFSET, + + NUM_NL80211_PKTPAT, + MAX_NL80211_PKTPAT = NUM_NL80211_PKTPAT - 1, +}; + +/** + * struct nl80211_pattern_support - packet pattern support information + * @max_patterns: maximum number of patterns supported + * @min_pattern_len: minimum length of each pattern + * @max_pattern_len: maximum length of each pattern + * @max_pkt_offset: maximum Rx packet offset + * + * This struct is carried in %NL80211_WOWLAN_TRIG_PKT_PATTERN when + * that is part of %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED or in + * %NL80211_ATTR_COALESCE_RULE_PKT_PATTERN when that is part of + * %NL80211_ATTR_COALESCE_RULE in the capability information given + * by the kernel to userspace. + */ +struct nl80211_pattern_support { + __u32 max_patterns; + __u32 min_pattern_len; + __u32 max_pattern_len; + __u32 max_pkt_offset; +} __attribute__((packed)); + +/* only for backward compatibility */ +#define __NL80211_WOWLAN_PKTPAT_INVALID __NL80211_PKTPAT_INVALID +#define NL80211_WOWLAN_PKTPAT_MASK NL80211_PKTPAT_MASK +#define NL80211_WOWLAN_PKTPAT_PATTERN NL80211_PKTPAT_PATTERN +#define NL80211_WOWLAN_PKTPAT_OFFSET NL80211_PKTPAT_OFFSET +#define NUM_NL80211_WOWLAN_PKTPAT NUM_NL80211_PKTPAT +#define MAX_NL80211_WOWLAN_PKTPAT MAX_NL80211_PKTPAT +#define nl80211_wowlan_pattern_support nl80211_pattern_support + +/** + * enum nl80211_wowlan_triggers - WoWLAN trigger definitions + * @__NL80211_WOWLAN_TRIG_INVALID: invalid number for nested attributes + * @NL80211_WOWLAN_TRIG_ANY: wake up on any activity, do not really put + * the chip into a special state -- works best with chips that have + * support for low-power operation already (flag) + * Note that this mode is incompatible with all of the others, if + * any others are even supported by the device. + * @NL80211_WOWLAN_TRIG_DISCONNECT: wake up on disconnect, the way disconnect + * is detected is implementation-specific (flag) + * @NL80211_WOWLAN_TRIG_MAGIC_PKT: wake up on magic packet (6x 0xff, followed + * by 16 repetitions of MAC addr, anywhere in payload) (flag) + * @NL80211_WOWLAN_TRIG_PKT_PATTERN: wake up on the specified packet patterns + * which are passed in an array of nested attributes, each nested attribute + * defining a with attributes from &struct nl80211_wowlan_trig_pkt_pattern. + * Each pattern defines a wakeup packet. Packet offset is associated with + * each pattern which is used while matching the pattern. The matching is + * done on the MSDU, i.e. as though the packet was an 802.3 packet, so the + * pattern matching is done after the packet is converted to the MSDU. + * + * In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute + * carrying a &struct nl80211_pattern_support. + * + * When reporting wakeup. it is a u32 attribute containing the 0-based + * index of the pattern that caused the wakeup, in the patterns passed + * to the kernel when configuring. + * @NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED: Not a real trigger, and cannot be + * used when setting, used only to indicate that GTK rekeying is supported + * by the device (flag) + * @NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE: wake up on GTK rekey failure (if + * done by the device) (flag) + * @NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST: wake up on EAP Identity Request + * packet (flag) + * @NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE: wake up on 4-way handshake (flag) + * @NL80211_WOWLAN_TRIG_RFKILL_RELEASE: wake up when rfkill is released + * (on devices that have rfkill in the device) (flag) + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211: For wakeup reporting only, contains + * the 802.11 packet that caused the wakeup, e.g. a deauth frame. The frame + * may be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN + * attribute contains the original length. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN: Original length of the 802.11 + * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211 + * attribute if the packet was truncated somewhere. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023: For wakeup reporting only, contains the + * 802.11 packet that caused the wakeup, e.g. a magic packet. The frame may + * be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN attribute + * contains the original length. + * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN: Original length of the 802.3 + * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023 + * attribute if the packet was truncated somewhere. + * @NL80211_WOWLAN_TRIG_TCP_CONNECTION: TCP connection wake, see DOC section + * "TCP connection wakeup" for more details. This is a nested attribute + * containing the exact information for establishing and keeping alive + * the TCP connection. + * @NL80211_WOWLAN_TRIG_TCP_WAKEUP_MATCH: For wakeup reporting only, the + * wakeup packet was received on the TCP connection + * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST: For wakeup reporting only, the + * TCP connection was lost or failed to be established + * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS: For wakeup reporting only, + * the TCP connection ran out of tokens to use for data to send to the + * service + * @NL80211_WOWLAN_TRIG_NET_DETECT: wake up when a configured network + * is detected. This is a nested attribute that contains the + * same attributes used with @NL80211_CMD_START_SCHED_SCAN. It + * specifies how the scan is performed (e.g. the interval, the + * channels to scan and the initial delay) as well as the scan + * results that will trigger a wake (i.e. the matchsets). This + * attribute is also sent in a response to + * @NL80211_CMD_GET_WIPHY, indicating the number of match sets + * supported by the driver (u32). + * @NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS: nested attribute + * containing an array with information about what triggered the + * wake up. If no elements are present in the array, it means + * that the information is not available. If more than one + * element is present, it means that more than one match + * occurred. + * Each element in the array is a nested attribute that contains + * one optional %NL80211_ATTR_SSID attribute and one optional + * %NL80211_ATTR_SCAN_FREQUENCIES attribute. At least one of + * these attributes must be present. If + * %NL80211_ATTR_SCAN_FREQUENCIES contains more than one + * frequency, it means that the match occurred in more than one + * channel. + * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers + * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number + * + * These nested attributes are used to configure the wakeup triggers and + * to report the wakeup reason(s). + */ +enum nl80211_wowlan_triggers { + __NL80211_WOWLAN_TRIG_INVALID, + NL80211_WOWLAN_TRIG_ANY, + NL80211_WOWLAN_TRIG_DISCONNECT, + NL80211_WOWLAN_TRIG_MAGIC_PKT, + NL80211_WOWLAN_TRIG_PKT_PATTERN, + NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED, + NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE, + NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST, + NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE, + NL80211_WOWLAN_TRIG_RFKILL_RELEASE, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023, + NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN, + NL80211_WOWLAN_TRIG_TCP_CONNECTION, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS, + NL80211_WOWLAN_TRIG_NET_DETECT, + NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS, + + /* keep last */ + NUM_NL80211_WOWLAN_TRIG, + MAX_NL80211_WOWLAN_TRIG = NUM_NL80211_WOWLAN_TRIG - 1 +}; + +/** + * DOC: TCP connection wakeup + * + * Some devices can establish a TCP connection in order to be woken up by a + * packet coming in from outside their network segment, or behind NAT. If + * configured, the device will establish a TCP connection to the given + * service, and periodically send data to that service. The first data + * packet is usually transmitted after SYN/ACK, also ACKing the SYN/ACK. + * The data packets can optionally include a (little endian) sequence + * number (in the TCP payload!) that is generated by the device, and, also + * optionally, a token from a list of tokens. This serves as a keep-alive + * with the service, and for NATed connections, etc. + * + * During this keep-alive period, the server doesn't send any data to the + * client. When receiving data, it is compared against the wakeup pattern + * (and mask) and if it matches, the host is woken up. Similarly, if the + * connection breaks or cannot be established to start with, the host is + * also woken up. + * + * Developer's note: ARP offload is required for this, otherwise TCP + * response packets might not go through correctly. + */ + +/** + * struct nl80211_wowlan_tcp_data_seq - WoWLAN TCP data sequence + * @start: starting value + * @offset: offset of sequence number in packet + * @len: length of the sequence value to write, 1 through 4 + * + * Note: don't confuse with the TCP sequence number(s), this is for the + * keepalive packet payload. The actual value is written into the packet + * in little endian. + */ +struct nl80211_wowlan_tcp_data_seq { + __u32 start, offset, len; +}; + +/** + * struct nl80211_wowlan_tcp_data_token - WoWLAN TCP data token config + * @offset: offset of token in packet + * @len: length of each token + * @token_stream: stream of data to be used for the tokens, the length must + * be a multiple of @len for this to make sense + */ +struct nl80211_wowlan_tcp_data_token { + __u32 offset, len; + __u8 token_stream[]; +}; + +/** + * struct nl80211_wowlan_tcp_data_token_feature - data token features + * @min_len: minimum token length + * @max_len: maximum token length + * @bufsize: total available token buffer size (max size of @token_stream) + */ +struct nl80211_wowlan_tcp_data_token_feature { + __u32 min_len, max_len, bufsize; +}; + +/** + * enum nl80211_wowlan_tcp_attrs - WoWLAN TCP connection parameters + * @__NL80211_WOWLAN_TCP_INVALID: invalid number for nested attributes + * @NL80211_WOWLAN_TCP_SRC_IPV4: source IPv4 address (in network byte order) + * @NL80211_WOWLAN_TCP_DST_IPV4: destination IPv4 address + * (in network byte order) + * @NL80211_WOWLAN_TCP_DST_MAC: destination MAC address, this is given because + * route lookup when configured might be invalid by the time we suspend, + * and doing a route lookup when suspending is no longer possible as it + * might require ARP querying. + * @NL80211_WOWLAN_TCP_SRC_PORT: source port (u16); optional, if not given a + * socket and port will be allocated + * @NL80211_WOWLAN_TCP_DST_PORT: destination port (u16) + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD: data packet payload, at least one byte. + * For feature advertising, a u32 attribute holding the maximum length + * of the data payload. + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ: data packet sequence configuration + * (if desired), a &struct nl80211_wowlan_tcp_data_seq. For feature + * advertising it is just a flag + * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN: data packet token configuration, + * see &struct nl80211_wowlan_tcp_data_token and for advertising see + * &struct nl80211_wowlan_tcp_data_token_feature. + * @NL80211_WOWLAN_TCP_DATA_INTERVAL: data interval in seconds, maximum + * interval in feature advertising (u32) + * @NL80211_WOWLAN_TCP_WAKE_PAYLOAD: wake packet payload, for advertising a + * u32 attribute holding the maximum length + * @NL80211_WOWLAN_TCP_WAKE_MASK: Wake packet payload mask, not used for + * feature advertising. The mask works like @NL80211_PKTPAT_MASK + * but on the TCP payload only. + * @NUM_NL80211_WOWLAN_TCP: number of TCP attributes + * @MAX_NL80211_WOWLAN_TCP: highest attribute number + */ +enum nl80211_wowlan_tcp_attrs { + __NL80211_WOWLAN_TCP_INVALID, + NL80211_WOWLAN_TCP_SRC_IPV4, + NL80211_WOWLAN_TCP_DST_IPV4, + NL80211_WOWLAN_TCP_DST_MAC, + NL80211_WOWLAN_TCP_SRC_PORT, + NL80211_WOWLAN_TCP_DST_PORT, + NL80211_WOWLAN_TCP_DATA_PAYLOAD, + NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, + NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + NL80211_WOWLAN_TCP_DATA_INTERVAL, + NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + NL80211_WOWLAN_TCP_WAKE_MASK, + + /* keep last */ + NUM_NL80211_WOWLAN_TCP, + MAX_NL80211_WOWLAN_TCP = NUM_NL80211_WOWLAN_TCP - 1 +}; + +/** + * struct nl80211_coalesce_rule_support - coalesce rule support information + * @max_rules: maximum number of rules supported + * @pat: packet pattern support information + * @max_delay: maximum supported coalescing delay in msecs + * + * This struct is carried in %NL80211_ATTR_COALESCE_RULE in the + * capability information given by the kernel to userspace. + */ +struct nl80211_coalesce_rule_support { + __u32 max_rules; + struct nl80211_pattern_support pat; + __u32 max_delay; +} __attribute__((packed)); + +/** + * enum nl80211_attr_coalesce_rule - coalesce rule attribute + * @__NL80211_COALESCE_RULE_INVALID: invalid number for nested attribute + * @NL80211_ATTR_COALESCE_RULE_DELAY: delay in msecs used for packet coalescing + * @NL80211_ATTR_COALESCE_RULE_CONDITION: condition for packet coalescence, + * see &enum nl80211_coalesce_condition. + * @NL80211_ATTR_COALESCE_RULE_PKT_PATTERN: packet offset, pattern is matched + * after these fixed number of bytes of received packet + * @NUM_NL80211_ATTR_COALESCE_RULE: number of attributes + * @NL80211_ATTR_COALESCE_RULE_MAX: max attribute number + */ +enum nl80211_attr_coalesce_rule { + __NL80211_COALESCE_RULE_INVALID, + NL80211_ATTR_COALESCE_RULE_DELAY, + NL80211_ATTR_COALESCE_RULE_CONDITION, + NL80211_ATTR_COALESCE_RULE_PKT_PATTERN, + + /* keep last */ + NUM_NL80211_ATTR_COALESCE_RULE, + NL80211_ATTR_COALESCE_RULE_MAX = NUM_NL80211_ATTR_COALESCE_RULE - 1 +}; + +/** + * enum nl80211_coalesce_condition - coalesce rule conditions + * @NL80211_COALESCE_CONDITION_MATCH: coalaesce Rx packets when patterns + * in a rule are matched. + * @NL80211_COALESCE_CONDITION_NO_MATCH: coalesce Rx packets when patterns + * in a rule are not matched. + */ +enum nl80211_coalesce_condition { + NL80211_COALESCE_CONDITION_MATCH, + NL80211_COALESCE_CONDITION_NO_MATCH +}; + +/** + * enum nl80211_iface_limit_attrs - limit attributes + * @NL80211_IFACE_LIMIT_UNSPEC: (reserved) + * @NL80211_IFACE_LIMIT_MAX: maximum number of interfaces that + * can be chosen from this set of interface types (u32) + * @NL80211_IFACE_LIMIT_TYPES: nested attribute containing a + * flag attribute for each interface type in this set + * @NUM_NL80211_IFACE_LIMIT: number of attributes + * @MAX_NL80211_IFACE_LIMIT: highest attribute number + */ +enum nl80211_iface_limit_attrs { + NL80211_IFACE_LIMIT_UNSPEC, + NL80211_IFACE_LIMIT_MAX, + NL80211_IFACE_LIMIT_TYPES, + + /* keep last */ + NUM_NL80211_IFACE_LIMIT, + MAX_NL80211_IFACE_LIMIT = NUM_NL80211_IFACE_LIMIT - 1 +}; + +/** + * enum nl80211_if_combination_attrs -- interface combination attributes + * + * @NL80211_IFACE_COMB_UNSPEC: (reserved) + * @NL80211_IFACE_COMB_LIMITS: Nested attributes containing the limits + * for given interface types, see &enum nl80211_iface_limit_attrs. + * @NL80211_IFACE_COMB_MAXNUM: u32 attribute giving the total number of + * interfaces that can be created in this group. This number doesn't + * apply to interfaces purely managed in software, which are listed + * in a separate attribute %NL80211_ATTR_INTERFACES_SOFTWARE. + * @NL80211_IFACE_COMB_STA_AP_BI_MATCH: flag attribute specifying that + * beacon intervals within this group must be all the same even for + * infrastructure and AP/GO combinations, i.e. the GO(s) must adopt + * the infrastructure network's beacon interval. + * @NL80211_IFACE_COMB_NUM_CHANNELS: u32 attribute specifying how many + * different channels may be used within this group. + * @NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS: u32 attribute containing the bitmap + * of supported channel widths for radar detection. + * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap + * of supported regulatory regions for radar detection. + * @NL80211_IFACE_COMB_BI_MIN_GCD: u32 attribute specifying the minimum GCD of + * different beacon intervals supported by all the interface combinations + * in this group (if not present, all beacon intervals be identical). + * @NUM_NL80211_IFACE_COMB: number of attributes + * @MAX_NL80211_IFACE_COMB: highest attribute number + * + * Examples: + * limits = [ #{STA} <= 1, #{AP} <= 1 ], matching BI, channels = 1, max = 2 + * => allows an AP and a STA that must match BIs + * + * numbers = [ #{AP, P2P-GO} <= 8 ], BI min gcd, channels = 1, max = 8, + * => allows 8 of AP/GO that can have BI gcd >= min gcd + * + * numbers = [ #{STA} <= 2 ], channels = 2, max = 2 + * => allows two STAs on different channels + * + * numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4 + * => allows a STA plus three P2P interfaces + * + * The list of these four possibilities could completely be contained + * within the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute to indicate + * that any of these groups must match. + * + * "Combinations" of just a single interface will not be listed here, + * a single interface of any valid interface type is assumed to always + * be possible by itself. This means that implicitly, for each valid + * interface type, the following group always exists: + * numbers = [ #{<type>} <= 1 ], channels = 1, max = 1 + */ +enum nl80211_if_combination_attrs { + NL80211_IFACE_COMB_UNSPEC, + NL80211_IFACE_COMB_LIMITS, + NL80211_IFACE_COMB_MAXNUM, + NL80211_IFACE_COMB_STA_AP_BI_MATCH, + NL80211_IFACE_COMB_NUM_CHANNELS, + NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, + NL80211_IFACE_COMB_BI_MIN_GCD, + + /* keep last */ + NUM_NL80211_IFACE_COMB, + MAX_NL80211_IFACE_COMB = NUM_NL80211_IFACE_COMB - 1 +}; + + +/** + * enum nl80211_plink_state - state of a mesh peer link finite state machine + * + * @NL80211_PLINK_LISTEN: initial state, considered the implicit + * state of non existent mesh peer links + * @NL80211_PLINK_OPN_SNT: mesh plink open frame has been sent to + * this mesh peer + * @NL80211_PLINK_OPN_RCVD: mesh plink open frame has been received + * from this mesh peer + * @NL80211_PLINK_CNF_RCVD: mesh plink confirm frame has been + * received from this mesh peer + * @NL80211_PLINK_ESTAB: mesh peer link is established + * @NL80211_PLINK_HOLDING: mesh peer link is being closed or cancelled + * @NL80211_PLINK_BLOCKED: all frames transmitted from this mesh + * plink are discarded + * @NUM_NL80211_PLINK_STATES: number of peer link states + * @MAX_NL80211_PLINK_STATES: highest numerical value of plink states + */ +enum nl80211_plink_state { + NL80211_PLINK_LISTEN, + NL80211_PLINK_OPN_SNT, + NL80211_PLINK_OPN_RCVD, + NL80211_PLINK_CNF_RCVD, + NL80211_PLINK_ESTAB, + NL80211_PLINK_HOLDING, + NL80211_PLINK_BLOCKED, + + /* keep last */ + NUM_NL80211_PLINK_STATES, + MAX_NL80211_PLINK_STATES = NUM_NL80211_PLINK_STATES - 1 +}; + +/** + * enum nl80211_plink_action - actions to perform in mesh peers + * + * @NL80211_PLINK_ACTION_NO_ACTION: perform no action + * @NL80211_PLINK_ACTION_OPEN: start mesh peer link establishment + * @NL80211_PLINK_ACTION_BLOCK: block traffic from this mesh peer + * @NUM_NL80211_PLINK_ACTIONS: number of possible actions + */ +enum plink_actions { + NL80211_PLINK_ACTION_NO_ACTION, + NL80211_PLINK_ACTION_OPEN, + NL80211_PLINK_ACTION_BLOCK, + + NUM_NL80211_PLINK_ACTIONS, +}; + + +#define NL80211_KCK_LEN 16 +#define NL80211_KEK_LEN 16 +#define NL80211_REPLAY_CTR_LEN 8 + +/** + * enum nl80211_rekey_data - attributes for GTK rekey offload + * @__NL80211_REKEY_DATA_INVALID: invalid number for nested attributes + * @NL80211_REKEY_DATA_KEK: key encryption key (binary) + * @NL80211_REKEY_DATA_KCK: key confirmation key (binary) + * @NL80211_REKEY_DATA_REPLAY_CTR: replay counter (binary) + * @NUM_NL80211_REKEY_DATA: number of rekey attributes (internal) + * @MAX_NL80211_REKEY_DATA: highest rekey attribute (internal) + */ +enum nl80211_rekey_data { + __NL80211_REKEY_DATA_INVALID, + NL80211_REKEY_DATA_KEK, + NL80211_REKEY_DATA_KCK, + NL80211_REKEY_DATA_REPLAY_CTR, + + /* keep last */ + NUM_NL80211_REKEY_DATA, + MAX_NL80211_REKEY_DATA = NUM_NL80211_REKEY_DATA - 1 +}; + +/** + * enum nl80211_hidden_ssid - values for %NL80211_ATTR_HIDDEN_SSID + * @NL80211_HIDDEN_SSID_NOT_IN_USE: do not hide SSID (i.e., broadcast it in + * Beacon frames) + * @NL80211_HIDDEN_SSID_ZERO_LEN: hide SSID by using zero-length SSID element + * in Beacon frames + * @NL80211_HIDDEN_SSID_ZERO_CONTENTS: hide SSID by using correct length of SSID + * element in Beacon frames but zero out each byte in the SSID + */ +enum nl80211_hidden_ssid { + NL80211_HIDDEN_SSID_NOT_IN_USE, + NL80211_HIDDEN_SSID_ZERO_LEN, + NL80211_HIDDEN_SSID_ZERO_CONTENTS +}; + +/** + * enum nl80211_sta_wme_attr - station WME attributes + * @__NL80211_STA_WME_INVALID: invalid number for nested attribute + * @NL80211_STA_WME_UAPSD_QUEUES: bitmap of uapsd queues. the format + * is the same as the AC bitmap in the QoS info field. + * @NL80211_STA_WME_MAX_SP: max service period. the format is the same + * as the MAX_SP field in the QoS info field (but already shifted down). + * @__NL80211_STA_WME_AFTER_LAST: internal + * @NL80211_STA_WME_MAX: highest station WME attribute + */ +enum nl80211_sta_wme_attr { + __NL80211_STA_WME_INVALID, + NL80211_STA_WME_UAPSD_QUEUES, + NL80211_STA_WME_MAX_SP, + + /* keep last */ + __NL80211_STA_WME_AFTER_LAST, + NL80211_STA_WME_MAX = __NL80211_STA_WME_AFTER_LAST - 1 +}; + +/** + * enum nl80211_pmksa_candidate_attr - attributes for PMKSA caching candidates + * @__NL80211_PMKSA_CANDIDATE_INVALID: invalid number for nested attributes + * @NL80211_PMKSA_CANDIDATE_INDEX: candidate index (u32; the smaller, the higher + * priority) + * @NL80211_PMKSA_CANDIDATE_BSSID: candidate BSSID (6 octets) + * @NL80211_PMKSA_CANDIDATE_PREAUTH: RSN pre-authentication supported (flag) + * @NUM_NL80211_PMKSA_CANDIDATE: number of PMKSA caching candidate attributes + * (internal) + * @MAX_NL80211_PMKSA_CANDIDATE: highest PMKSA caching candidate attribute + * (internal) + */ +enum nl80211_pmksa_candidate_attr { + __NL80211_PMKSA_CANDIDATE_INVALID, + NL80211_PMKSA_CANDIDATE_INDEX, + NL80211_PMKSA_CANDIDATE_BSSID, + NL80211_PMKSA_CANDIDATE_PREAUTH, + + /* keep last */ + NUM_NL80211_PMKSA_CANDIDATE, + MAX_NL80211_PMKSA_CANDIDATE = NUM_NL80211_PMKSA_CANDIDATE - 1 +}; + +/** + * enum nl80211_tdls_operation - values for %NL80211_ATTR_TDLS_OPERATION + * @NL80211_TDLS_DISCOVERY_REQ: Send a TDLS discovery request + * @NL80211_TDLS_SETUP: Setup TDLS link + * @NL80211_TDLS_TEARDOWN: Teardown a TDLS link which is already established + * @NL80211_TDLS_ENABLE_LINK: Enable TDLS link + * @NL80211_TDLS_DISABLE_LINK: Disable TDLS link + */ +enum nl80211_tdls_operation { + NL80211_TDLS_DISCOVERY_REQ, + NL80211_TDLS_SETUP, + NL80211_TDLS_TEARDOWN, + NL80211_TDLS_ENABLE_LINK, + NL80211_TDLS_DISABLE_LINK, +}; + +/* + * enum nl80211_ap_sme_features - device-integrated AP features + * Reserved for future use, no bits are defined in + * NL80211_ATTR_DEVICE_AP_SME yet. +enum nl80211_ap_sme_features { +}; + */ + +/** + * enum nl80211_feature_flags - device/driver features + * @NL80211_FEATURE_SK_TX_STATUS: This driver supports reflecting back + * TX status to the socket error queue when requested with the + * socket option. + * @NL80211_FEATURE_HT_IBSS: This driver supports IBSS with HT datarates. + * @NL80211_FEATURE_INACTIVITY_TIMER: This driver takes care of freeing up + * the connected inactive stations in AP mode. + * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested + * to work properly to suppport receiving regulatory hints from + * cellular base stations. + * @NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL: (no longer available, only + * here to reserve the value for API/ABI compatibility) + * @NL80211_FEATURE_SAE: This driver supports simultaneous authentication of + * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station + * mode + * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan + * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported + * @NL80211_FEATURE_AP_SCAN: Support scanning using an AP vif + * @NL80211_FEATURE_VIF_TXPOWER: The driver supports per-vif TX power setting + * @NL80211_FEATURE_NEED_OBSS_SCAN: The driver expects userspace to perform + * OBSS scans and generate 20/40 BSS coex reports. This flag is used only + * for drivers implementing the CONNECT API, for AUTH/ASSOC it is implied. + * @NL80211_FEATURE_P2P_GO_CTWIN: P2P GO implementation supports CT Window + * setting + * @NL80211_FEATURE_P2P_GO_OPPPS: P2P GO implementation supports opportunistic + * powersave + * @NL80211_FEATURE_FULL_AP_CLIENT_STATE: The driver supports full state + * transitions for AP clients. Without this flag (and if the driver + * doesn't have the AP SME in the device) the driver supports adding + * stations only when they're associated and adds them in associated + * state (to later be transitioned into authorized), with this flag + * they should be added before even sending the authentication reply + * and then transitioned into authenticated, associated and authorized + * states using station flags. + * Note that even for drivers that support this, the default is to add + * stations in authenticated/associated state, so to add unauthenticated + * stations the authenticated/associated bits have to be set in the mask. + * @NL80211_FEATURE_ADVERTISE_CHAN_LIMITS: cfg80211 advertises channel limits + * (HT40, VHT 80/160 MHz) if this flag is set + * @NL80211_FEATURE_USERSPACE_MPM: This driver supports a userspace Mesh + * Peering Management entity which may be implemented by registering for + * beacons or NL80211_CMD_NEW_PEER_CANDIDATE events. The mesh beacon is + * still generated by the driver. + * @NL80211_FEATURE_ACTIVE_MONITOR: This driver supports an active monitor + * interface. An active monitor interface behaves like a normal monitor + * interface, but gets added to the driver. It ensures that incoming + * unicast packets directed at the configured interface address get ACKed. + * @NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE: This driver supports dynamic + * channel bandwidth change (e.g., HT 20 <-> 40 MHz channel) during the + * lifetime of a BSS. + * @NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES: This device adds a DS Parameter + * Set IE to probe requests. + * @NL80211_FEATURE_WFA_TPC_IE_IN_PROBES: This device adds a WFA TPC Report IE + * to probe requests. + * @NL80211_FEATURE_QUIET: This device, in client mode, supports Quiet Period + * requests sent to it by an AP. + * @NL80211_FEATURE_TX_POWER_INSERTION: This device is capable of inserting the + * current tx power value into the TPC Report IE in the spectrum + * management TPC Report action frame, and in the Radio Measurement Link + * Measurement Report action frame. + * @NL80211_FEATURE_ACKTO_ESTIMATION: This driver supports dynamic ACK timeout + * estimation (dynack). %NL80211_ATTR_WIPHY_DYN_ACK flag attribute is used + * to enable dynack. + * @NL80211_FEATURE_STATIC_SMPS: Device supports static spatial + * multiplexing powersave, ie. can turn off all but one chain + * even on HT connections that should be using more chains. + * @NL80211_FEATURE_DYNAMIC_SMPS: Device supports dynamic spatial + * multiplexing powersave, ie. can turn off all but one chain + * and then wake the rest up as required after, for example, + * rts/cts handshake. + * @NL80211_FEATURE_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM + * TSPEC sessions (TID aka TSID 0-7) with the %NL80211_CMD_ADD_TX_TS + * command. Standard IEEE 802.11 TSPEC setup is not yet supported, it + * needs to be able to handle Block-Ack agreements and other things. + * @NL80211_FEATURE_MAC_ON_CREATE: Device supports configuring + * the vif's MAC address upon creation. + * See 'macaddr' field in the vif_params (cfg80211.h). + * @NL80211_FEATURE_TDLS_CHANNEL_SWITCH: Driver supports channel switching when + * operating as a TDLS peer. + * @NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR: This device/driver supports using a + * random MAC address during scan (if the device is unassociated); the + * %NL80211_SCAN_FLAG_RANDOM_ADDR flag may be set for scans and the MAC + * address mask/value will be used. + * @NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR: This device/driver supports + * using a random MAC address for every scan iteration during scheduled + * scan (while not associated), the %NL80211_SCAN_FLAG_RANDOM_ADDR may + * be set for scheduled scan and the MAC address mask/value will be used. + * @NL80211_FEATURE_ND_RANDOM_MAC_ADDR: This device/driver supports using a + * random MAC address for every scan iteration during "net detect", i.e. + * scan in unassociated WoWLAN, the %NL80211_SCAN_FLAG_RANDOM_ADDR may + * be set for scheduled scan and the MAC address mask/value will be used. + */ +enum nl80211_feature_flags { + NL80211_FEATURE_SK_TX_STATUS = 1 << 0, + NL80211_FEATURE_HT_IBSS = 1 << 1, + NL80211_FEATURE_INACTIVITY_TIMER = 1 << 2, + NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3, + NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4, + NL80211_FEATURE_SAE = 1 << 5, + NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, + NL80211_FEATURE_SCAN_FLUSH = 1 << 7, + NL80211_FEATURE_AP_SCAN = 1 << 8, + NL80211_FEATURE_VIF_TXPOWER = 1 << 9, + NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, + NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, + NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, + /* bit 13 is reserved */ + NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14, + NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 15, + NL80211_FEATURE_USERSPACE_MPM = 1 << 16, + NL80211_FEATURE_ACTIVE_MONITOR = 1 << 17, + NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE = 1 << 18, + NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES = 1 << 19, + NL80211_FEATURE_WFA_TPC_IE_IN_PROBES = 1 << 20, + NL80211_FEATURE_QUIET = 1 << 21, + NL80211_FEATURE_TX_POWER_INSERTION = 1 << 22, + NL80211_FEATURE_ACKTO_ESTIMATION = 1 << 23, + NL80211_FEATURE_STATIC_SMPS = 1 << 24, + NL80211_FEATURE_DYNAMIC_SMPS = 1 << 25, + NL80211_FEATURE_SUPPORTS_WMM_ADMISSION = 1 << 26, + NL80211_FEATURE_MAC_ON_CREATE = 1 << 27, + NL80211_FEATURE_TDLS_CHANNEL_SWITCH = 1 << 28, + NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR = 1 << 29, + NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR = 1 << 30, + NL80211_FEATURE_ND_RANDOM_MAC_ADDR = 1U << 31, +}; + +/** + * enum nl80211_ext_feature_index - bit index of extended features. + * @NL80211_EXT_FEATURE_VHT_IBSS: This driver supports IBSS with VHT datarates. + * @NL80211_EXT_FEATURE_RRM: This driver supports RRM. When featured, user can + * can request to use RRM (see %NL80211_ATTR_USE_RRM) with + * %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set + * the ASSOC_REQ_USE_RRM flag in the association request even if + * NL80211_FEATURE_QUIET is not advertized. + * @NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER: This device supports MU-MIMO air + * sniffer which means that it can be configured to hear packets from + * certain groups which can be configured by the + * %NL80211_ATTR_MU_MIMO_GROUP_DATA attribute, + * or can be configured to follow a station by configuring the + * %NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR attribute. + * @NL80211_EXT_FEATURE_SCAN_START_TIME: This driver includes the actual + * time the scan started in scan results event. The time is the TSF of + * the BSS that the interface that requested the scan is connected to + * (if available). + * @NL80211_EXT_FEATURE_BSS_PARENT_TSF: Per BSS, this driver reports the + * time the last beacon/probe was received. The time is the TSF of the + * BSS that the interface that requested the scan is connected to + * (if available). + * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of + * channel dwell time. + * @NL80211_EXT_FEATURE_BEACON_RATE_LEGACY: Driver supports beacon rate + * configuration (AP/mesh), supporting a legacy (non HT/VHT) rate. + * @NL80211_EXT_FEATURE_BEACON_RATE_HT: Driver supports beacon rate + * configuration (AP/mesh) with HT rates. + * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate + * configuration (AP/mesh) with VHT rates. + * @NL80211_EXT_FEATURE_FILS_STA: This driver supports Fast Initial Link Setup + * with user space SME (NL80211_CMD_AUTHENTICATE) in station mode. + * @NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA: This driver supports randomized TA + * in @NL80211_CMD_FRAME while not associated. + * @NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED: This driver supports + * randomized TA in @NL80211_CMD_FRAME while associated. + * @NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI: The driver supports sched_scan + * for reporting BSSs with better RSSI than the current connected BSS + * (%NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI). + * @NL80211_EXT_FEATURE_CQM_RSSI_LIST: With this driver the + * %NL80211_ATTR_CQM_RSSI_THOLD attribute accepts a list of zero or more + * RSSI threshold values to monitor rather than exactly one threshold. + * @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD: Driver SME supports FILS shared key + * authentication with %NL80211_CMD_CONNECT. + * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK: Device wants to do 4-way + * handshake with PSK in station mode (PSK is passed as part of the connect + * and associate commands), doing it in the host might not be supported. + * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X: Device wants to do doing 4-way + * handshake with 802.1X in station mode (will pass EAP frames to the host + * and accept the set_pmk/del_pmk commands), doing it in the host might not + * be supported. + * @NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME: Driver is capable of overriding + * the max channel attribute in the FILS request params IE with the + * actual dwell time. + * @NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP: Driver accepts broadcast probe + * response + * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE: Driver supports sending + * the first probe request in each channel at rate of at least 5.5Mbps. + * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: Driver supports + * probe request tx deferral and suppression + * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL + * value in %NL80211_ATTR_USE_MFP. + * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan. + * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan. + * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan. + * @NL80211_EXT_FEATURE_DFS_OFFLOAD: HW/driver will offload DFS actions. + * Device or driver will do all DFS-related actions by itself, + * informing user-space about CAC progress, radar detection event, + * channel change triggered by radar detection event. + * No need to start CAC from user-space, no need to react to + * "radar detected" event. + * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211: Driver supports sending and + * receiving control port frames over nl80211 instead of the netdevice. + * @NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT: This driver/device supports + * (average) ACK signal strength reporting. + * @NL80211_EXT_FEATURE_TXQS: Driver supports FQ-CoDel-enabled intermediate + * TXQs. + * @NL80211_EXT_FEATURE_SCAN_RANDOM_SN: Driver/device supports randomizing the + * SN in probe request frames if requested by %NL80211_SCAN_FLAG_RANDOM_SN. + * @NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT: Driver/device can omit all data + * except for supported rates from the probe request content if requested + * by the %NL80211_SCAN_FLAG_MIN_PREQ_CONTENT flag. + * @NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER: Driver supports enabling fine + * timing measurement responder role. + * + * @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0: Driver/device confirm that they are + * able to rekey an in-use key correctly. Userspace must not rekey PTK keys + * if this flag is not set. Ignoring this can leak clear text packets and/or + * freeze the connection. + * @NL80211_EXT_FEATURE_EXT_KEY_ID: Driver supports "Extended Key ID for + * Individually Addressed Frames" from IEEE802.11-2016. + * + * @NL80211_EXT_FEATURE_AIRTIME_FAIRNESS: Driver supports getting airtime + * fairness for transmitted packets and has enabled airtime fairness + * scheduling. + * + * @NL80211_EXT_FEATURE_AP_PMKSA_CACHING: Driver/device supports PMKSA caching + * (set/del PMKSA operations) in AP mode. + * + * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Driver supports + * filtering of sched scan results using band specific RSSI thresholds. + * + * @NL80211_EXT_FEATURE_STA_TX_PWR: This driver supports controlling tx power + * to a station. + * + * @NL80211_EXT_FEATURE_SAE_OFFLOAD: Device wants to do SAE authentication in + * station mode (SAE password is passed as part of the connect command). + * + * @NUM_NL80211_EXT_FEATURES: number of extended features. + * @MAX_NL80211_EXT_FEATURES: highest extended feature index. + */ +enum nl80211_ext_feature_index { + NL80211_EXT_FEATURE_VHT_IBSS, + NL80211_EXT_FEATURE_RRM, + NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER, + NL80211_EXT_FEATURE_SCAN_START_TIME, + NL80211_EXT_FEATURE_BSS_PARENT_TSF, + NL80211_EXT_FEATURE_SET_SCAN_DWELL, + NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, + NL80211_EXT_FEATURE_BEACON_RATE_HT, + NL80211_EXT_FEATURE_BEACON_RATE_VHT, + NL80211_EXT_FEATURE_FILS_STA, + NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA, + NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED, + NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI, + NL80211_EXT_FEATURE_CQM_RSSI_LIST, + NL80211_EXT_FEATURE_FILS_SK_OFFLOAD, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X, + NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME, + NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION, + NL80211_EXT_FEATURE_MFP_OPTIONAL, + NL80211_EXT_FEATURE_LOW_SPAN_SCAN, + NL80211_EXT_FEATURE_LOW_POWER_SCAN, + NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN, + NL80211_EXT_FEATURE_DFS_OFFLOAD, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211, + NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT, + /* we renamed this - stay compatible */ + NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT = NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT, + NL80211_EXT_FEATURE_TXQS, + NL80211_EXT_FEATURE_SCAN_RANDOM_SN, + NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT, + NL80211_EXT_FEATURE_CAN_REPLACE_PTK0, + NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS, + NL80211_EXT_FEATURE_AP_PMKSA_CACHING, + NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD, + NL80211_EXT_FEATURE_EXT_KEY_ID, + NL80211_EXT_FEATURE_STA_TX_PWR, + NL80211_EXT_FEATURE_SAE_OFFLOAD, + + /* add new features before the definition below */ + NUM_NL80211_EXT_FEATURES, + MAX_NL80211_EXT_FEATURES = NUM_NL80211_EXT_FEATURES - 1 +}; + +/** + * enum nl80211_probe_resp_offload_support_attr - optional supported + * protocols for probe-response offloading by the driver/FW. + * To be used with the %NL80211_ATTR_PROBE_RESP_OFFLOAD attribute. + * Each enum value represents a bit in the bitmap of supported + * protocols. Typically a subset of probe-requests belonging to a + * supported protocol will be excluded from offload and uploaded + * to the host. + * + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS: Support for WPS ver. 1 + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2: Support for WPS ver. 2 + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P: Support for P2P + * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U: Support for 802.11u + */ +enum nl80211_probe_resp_offload_support_attr { + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS = 1<<0, + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 = 1<<1, + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P = 1<<2, + NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U = 1<<3, +}; + +/** + * enum nl80211_connect_failed_reason - connection request failed reasons + * @NL80211_CONN_FAIL_MAX_CLIENTS: Maximum number of clients that can be + * handled by the AP is reached. + * @NL80211_CONN_FAIL_BLOCKED_CLIENT: Connection request is rejected due to ACL. + */ +enum nl80211_connect_failed_reason { + NL80211_CONN_FAIL_MAX_CLIENTS, + NL80211_CONN_FAIL_BLOCKED_CLIENT, +}; + +/** + * enum nl80211_timeout_reason - timeout reasons + * + * @NL80211_TIMEOUT_UNSPECIFIED: Timeout reason unspecified. + * @NL80211_TIMEOUT_SCAN: Scan (AP discovery) timed out. + * @NL80211_TIMEOUT_AUTH: Authentication timed out. + * @NL80211_TIMEOUT_ASSOC: Association timed out. + */ +enum nl80211_timeout_reason { + NL80211_TIMEOUT_UNSPECIFIED, + NL80211_TIMEOUT_SCAN, + NL80211_TIMEOUT_AUTH, + NL80211_TIMEOUT_ASSOC, +}; + +/** + * enum nl80211_scan_flags - scan request control flags + * + * Scan request control flags are used to control the handling + * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN + * requests. + * + * NL80211_SCAN_FLAG_LOW_SPAN, NL80211_SCAN_FLAG_LOW_POWER, and + * NL80211_SCAN_FLAG_HIGH_ACCURACY flags are exclusive of each other, i.e., only + * one of them can be used in the request. + * + * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority + * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning + * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured + * as AP and the beaconing has already been configured. This attribute is + * dangerous because will destroy stations performance as a lot of frames + * will be lost while scanning off-channel, therefore it must be used only + * when really needed + * @NL80211_SCAN_FLAG_RANDOM_ADDR: use a random MAC address for this scan (or + * for scheduled scan: a different one for every scan iteration). When the + * flag is set, depending on device capabilities the @NL80211_ATTR_MAC and + * @NL80211_ATTR_MAC_MASK attributes may also be given in which case only + * the masked bits will be preserved from the MAC address and the remainder + * randomised. If the attributes are not given full randomisation (46 bits, + * locally administered 1, multicast 0) is assumed. + * This flag must not be requested when the feature isn't supported, check + * the nl80211 feature flags for the device. + * @NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME: fill the dwell time in the FILS + * request parameters IE in the probe request + * @NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe responses + * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE: send probe request frames at + * rate of at least 5.5M. In case non OCE AP is discovered in the channel, + * only the first probe req in the channel will be sent in high rate. + * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: allow probe request + * tx deferral (dot11FILSProbeDelay shall be set to 15ms) + * and suppression (if it has received a broadcast Probe Response frame, + * Beacon frame or FILS Discovery frame from an AP that the STA considers + * a suitable candidate for (re-)association - suitable in terms of + * SSID and/or RSSI. + * @NL80211_SCAN_FLAG_LOW_SPAN: Span corresponds to the total time taken to + * accomplish the scan. Thus, this flag intends the driver to perform the + * scan request with lesser span/duration. It is specific to the driver + * implementations on how this is accomplished. Scan accuracy may get + * impacted with this flag. + * @NL80211_SCAN_FLAG_LOW_POWER: This flag intends the scan attempts to consume + * optimal possible power. Drivers can resort to their specific means to + * optimize the power. Scan accuracy may get impacted with this flag. + * @NL80211_SCAN_FLAG_HIGH_ACCURACY: Accuracy here intends to the extent of scan + * results obtained. Thus HIGH_ACCURACY scan flag aims to get maximum + * possible scan results. This flag hints the driver to use the best + * possible scan configuration to improve the accuracy in scanning. + * Latency and power use may get impacted with this flag. + * @NL80211_SCAN_FLAG_RANDOM_SN: randomize the sequence number in probe + * request frames from this scan to avoid correlation/tracking being + * possible. + * @NL80211_SCAN_FLAG_MIN_PREQ_CONTENT: minimize probe request content to + * only have supported rates and no additional capabilities (unless + * added by userspace explicitly.) + */ +enum nl80211_scan_flags { + NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, + NL80211_SCAN_FLAG_FLUSH = 1<<1, + NL80211_SCAN_FLAG_AP = 1<<2, + NL80211_SCAN_FLAG_RANDOM_ADDR = 1<<3, + NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME = 1<<4, + NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP = 1<<5, + NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE = 1<<6, + NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION = 1<<7, + NL80211_SCAN_FLAG_LOW_SPAN = 1<<8, + NL80211_SCAN_FLAG_LOW_POWER = 1<<9, + NL80211_SCAN_FLAG_HIGH_ACCURACY = 1<<10, + NL80211_SCAN_FLAG_RANDOM_SN = 1<<11, + NL80211_SCAN_FLAG_MIN_PREQ_CONTENT = 1<<12, +}; + +/** + * enum nl80211_acl_policy - access control policy + * + * Access control policy is applied on a MAC list set by + * %NL80211_CMD_START_AP and %NL80211_CMD_SET_MAC_ACL, to + * be used with %NL80211_ATTR_ACL_POLICY. + * + * @NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED: Deny stations which are + * listed in ACL, i.e. allow all the stations which are not listed + * in ACL to authenticate. + * @NL80211_ACL_POLICY_DENY_UNLESS_LISTED: Allow the stations which are listed + * in ACL, i.e. deny all the stations which are not listed in ACL. + */ +enum nl80211_acl_policy { + NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED, + NL80211_ACL_POLICY_DENY_UNLESS_LISTED, +}; + +/** + * enum nl80211_smps_mode - SMPS mode + * + * Requested SMPS mode (for AP mode) + * + * @NL80211_SMPS_OFF: SMPS off (use all antennas). + * @NL80211_SMPS_STATIC: static SMPS (use a single antenna) + * @NL80211_SMPS_DYNAMIC: dynamic smps (start with a single antenna and + * turn on other antennas after CTS/RTS). + */ +enum nl80211_smps_mode { + NL80211_SMPS_OFF, + NL80211_SMPS_STATIC, + NL80211_SMPS_DYNAMIC, + + __NL80211_SMPS_AFTER_LAST, + NL80211_SMPS_MAX = __NL80211_SMPS_AFTER_LAST - 1 +}; + +/** + * enum nl80211_radar_event - type of radar event for DFS operation + * + * Type of event to be used with NL80211_ATTR_RADAR_EVENT to inform userspace + * about detected radars or success of the channel available check (CAC) + * + * @NL80211_RADAR_DETECTED: A radar pattern has been detected. The channel is + * now unusable. + * @NL80211_RADAR_CAC_FINISHED: Channel Availability Check has been finished, + * the channel is now available. + * @NL80211_RADAR_CAC_ABORTED: Channel Availability Check has been aborted, no + * change to the channel status. + * @NL80211_RADAR_NOP_FINISHED: The Non-Occupancy Period for this channel is + * over, channel becomes usable. + * @NL80211_RADAR_PRE_CAC_EXPIRED: Channel Availability Check done on this + * non-operating channel is expired and no longer valid. New CAC must + * be done on this channel before starting the operation. This is not + * applicable for ETSI dfs domain where pre-CAC is valid for ever. + * @NL80211_RADAR_CAC_STARTED: Channel Availability Check has been started, + * should be generated by HW if NL80211_EXT_FEATURE_DFS_OFFLOAD is enabled. + */ +enum nl80211_radar_event { + NL80211_RADAR_DETECTED, + NL80211_RADAR_CAC_FINISHED, + NL80211_RADAR_CAC_ABORTED, + NL80211_RADAR_NOP_FINISHED, + NL80211_RADAR_PRE_CAC_EXPIRED, + NL80211_RADAR_CAC_STARTED, +}; + +/** + * enum nl80211_dfs_state - DFS states for channels + * + * Channel states used by the DFS code. + * + * @NL80211_DFS_USABLE: The channel can be used, but channel availability + * check (CAC) must be performed before using it for AP or IBSS. + * @NL80211_DFS_UNAVAILABLE: A radar has been detected on this channel, it + * is therefore marked as not available. + * @NL80211_DFS_AVAILABLE: The channel has been CAC checked and is available. + */ +enum nl80211_dfs_state { + NL80211_DFS_USABLE, + NL80211_DFS_UNAVAILABLE, + NL80211_DFS_AVAILABLE, +}; + +/** + * enum enum nl80211_protocol_features - nl80211 protocol features + * @NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP: nl80211 supports splitting + * wiphy dumps (if requested by the application with the attribute + * %NL80211_ATTR_SPLIT_WIPHY_DUMP. Also supported is filtering the + * wiphy dump by %NL80211_ATTR_WIPHY, %NL80211_ATTR_IFINDEX or + * %NL80211_ATTR_WDEV. + */ +enum nl80211_protocol_features { + NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP = 1 << 0, +}; + +/** + * enum nl80211_crit_proto_id - nl80211 critical protocol identifiers + * + * @NL80211_CRIT_PROTO_UNSPEC: protocol unspecified. + * @NL80211_CRIT_PROTO_DHCP: BOOTP or DHCPv6 protocol. + * @NL80211_CRIT_PROTO_EAPOL: EAPOL protocol. + * @NL80211_CRIT_PROTO_APIPA: APIPA protocol. + * @NUM_NL80211_CRIT_PROTO: must be kept last. + */ +enum nl80211_crit_proto_id { + NL80211_CRIT_PROTO_UNSPEC, + NL80211_CRIT_PROTO_DHCP, + NL80211_CRIT_PROTO_EAPOL, + NL80211_CRIT_PROTO_APIPA, + /* add other protocols before this one */ + NUM_NL80211_CRIT_PROTO +}; + +/* maximum duration for critical protocol measures */ +#define NL80211_CRIT_PROTO_MAX_DURATION 5000 /* msec */ + +/** + * enum nl80211_rxmgmt_flags - flags for received management frame. + * + * Used by cfg80211_rx_mgmt() + * + * @NL80211_RXMGMT_FLAG_ANSWERED: frame was answered by device/driver. + * @NL80211_RXMGMT_FLAG_EXTERNAL_AUTH: Host driver intends to offload + * the authentication. Exclusively defined for host drivers that + * advertises the SME functionality but would like the userspace + * to handle certain authentication algorithms (e.g. SAE). + */ +enum nl80211_rxmgmt_flags { + NL80211_RXMGMT_FLAG_ANSWERED = 1 << 0, + NL80211_RXMGMT_FLAG_EXTERNAL_AUTH = 1 << 1, +}; + +/* + * If this flag is unset, the lower 24 bits are an OUI, if set + * a Linux nl80211 vendor ID is used (no such IDs are allocated + * yet, so that's not valid so far) + */ +#define NL80211_VENDOR_ID_IS_LINUX 0x80000000 + +/** + * struct nl80211_vendor_cmd_info - vendor command data + * @vendor_id: If the %NL80211_VENDOR_ID_IS_LINUX flag is clear, then the + * value is a 24-bit OUI; if it is set then a separately allocated ID + * may be used, but no such IDs are allocated yet. New IDs should be + * added to this file when needed. + * @subcmd: sub-command ID for the command + */ +struct nl80211_vendor_cmd_info { + __u32 vendor_id; + __u32 subcmd; +}; + +/** + * enum nl80211_tdls_peer_capability - TDLS peer flags. + * + * Used by tdls_mgmt() to determine which conditional elements need + * to be added to TDLS Setup frames. + * + * @NL80211_TDLS_PEER_HT: TDLS peer is HT capable. + * @NL80211_TDLS_PEER_VHT: TDLS peer is VHT capable. + * @NL80211_TDLS_PEER_WMM: TDLS peer is WMM capable. + */ +enum nl80211_tdls_peer_capability { + NL80211_TDLS_PEER_HT = 1<<0, + NL80211_TDLS_PEER_VHT = 1<<1, + NL80211_TDLS_PEER_WMM = 1<<2, +}; + +/** + * enum nl80211_sched_scan_plan - scanning plan for scheduled scan + * @__NL80211_SCHED_SCAN_PLAN_INVALID: attribute number 0 is reserved + * @NL80211_SCHED_SCAN_PLAN_INTERVAL: interval between scan iterations. In + * seconds (u32). + * @NL80211_SCHED_SCAN_PLAN_ITERATIONS: number of scan iterations in this + * scan plan (u32). The last scan plan must not specify this attribute + * because it will run infinitely. A value of zero is invalid as it will + * make the scan plan meaningless. + * @NL80211_SCHED_SCAN_PLAN_MAX: highest scheduled scan plan attribute number + * currently defined + * @__NL80211_SCHED_SCAN_PLAN_AFTER_LAST: internal use + */ +enum nl80211_sched_scan_plan { + __NL80211_SCHED_SCAN_PLAN_INVALID, + NL80211_SCHED_SCAN_PLAN_INTERVAL, + NL80211_SCHED_SCAN_PLAN_ITERATIONS, + + /* keep last */ + __NL80211_SCHED_SCAN_PLAN_AFTER_LAST, + NL80211_SCHED_SCAN_PLAN_MAX = + __NL80211_SCHED_SCAN_PLAN_AFTER_LAST - 1 +}; + +/** + * struct nl80211_bss_select_rssi_adjust - RSSI adjustment parameters. + * + * @band: band of BSS that must match for RSSI value adjustment. The value + * of this field is according to &enum nl80211_band. + * @delta: value used to adjust the RSSI value of matching BSS in dB. + */ +struct nl80211_bss_select_rssi_adjust { + __u8 band; + __s8 delta; +} __attribute__((packed)); + +/** + * enum nl80211_bss_select_attr - attributes for bss selection. + * + * @__NL80211_BSS_SELECT_ATTR_INVALID: reserved. + * @NL80211_BSS_SELECT_ATTR_RSSI: Flag indicating only RSSI-based BSS selection + * is requested. + * @NL80211_BSS_SELECT_ATTR_BAND_PREF: attribute indicating BSS + * selection should be done such that the specified band is preferred. + * When there are multiple BSS-es in the preferred band, the driver + * shall use RSSI-based BSS selection as a second step. The value of + * this attribute is according to &enum nl80211_band (u32). + * @NL80211_BSS_SELECT_ATTR_RSSI_ADJUST: When present the RSSI level for + * BSS-es in the specified band is to be adjusted before doing + * RSSI-based BSS selection. The attribute value is a packed structure + * value as specified by &struct nl80211_bss_select_rssi_adjust. + * @NL80211_BSS_SELECT_ATTR_MAX: highest bss select attribute number. + * @__NL80211_BSS_SELECT_ATTR_AFTER_LAST: internal use. + * + * One and only one of these attributes are found within %NL80211_ATTR_BSS_SELECT + * for %NL80211_CMD_CONNECT. It specifies the required BSS selection behaviour + * which the driver shall use. + */ +enum nl80211_bss_select_attr { + __NL80211_BSS_SELECT_ATTR_INVALID, + NL80211_BSS_SELECT_ATTR_RSSI, + NL80211_BSS_SELECT_ATTR_BAND_PREF, + NL80211_BSS_SELECT_ATTR_RSSI_ADJUST, + + /* keep last */ + __NL80211_BSS_SELECT_ATTR_AFTER_LAST, + NL80211_BSS_SELECT_ATTR_MAX = __NL80211_BSS_SELECT_ATTR_AFTER_LAST - 1 +}; + +/** + * enum nl80211_nan_function_type - NAN function type + * + * Defines the function type of a NAN function + * + * @NL80211_NAN_FUNC_PUBLISH: function is publish + * @NL80211_NAN_FUNC_SUBSCRIBE: function is subscribe + * @NL80211_NAN_FUNC_FOLLOW_UP: function is follow-up + */ +enum nl80211_nan_function_type { + NL80211_NAN_FUNC_PUBLISH, + NL80211_NAN_FUNC_SUBSCRIBE, + NL80211_NAN_FUNC_FOLLOW_UP, + + /* keep last */ + __NL80211_NAN_FUNC_TYPE_AFTER_LAST, + NL80211_NAN_FUNC_MAX_TYPE = __NL80211_NAN_FUNC_TYPE_AFTER_LAST - 1, +}; + +/** + * enum nl80211_nan_publish_type - NAN publish tx type + * + * Defines how to send publish Service Discovery Frames + * + * @NL80211_NAN_SOLICITED_PUBLISH: publish function is solicited + * @NL80211_NAN_UNSOLICITED_PUBLISH: publish function is unsolicited + */ +enum nl80211_nan_publish_type { + NL80211_NAN_SOLICITED_PUBLISH = 1 << 0, + NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1, +}; + +/** + * enum nl80211_nan_func_term_reason - NAN functions termination reason + * + * Defines termination reasons of a NAN function + * + * @NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST: requested by user + * @NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED: timeout + * @NL80211_NAN_FUNC_TERM_REASON_ERROR: errored + */ +enum nl80211_nan_func_term_reason { + NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST, + NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED, + NL80211_NAN_FUNC_TERM_REASON_ERROR, +}; + +#define NL80211_NAN_FUNC_SERVICE_ID_LEN 6 +#define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff +#define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff + +/** + * enum nl80211_nan_func_attributes - NAN function attributes + * @__NL80211_NAN_FUNC_INVALID: invalid + * @NL80211_NAN_FUNC_TYPE: &enum nl80211_nan_function_type (u8). + * @NL80211_NAN_FUNC_SERVICE_ID: 6 bytes of the service ID hash as + * specified in NAN spec. This is a binary attribute. + * @NL80211_NAN_FUNC_PUBLISH_TYPE: relevant if the function's type is + * publish. Defines the transmission type for the publish Service Discovery + * Frame, see &enum nl80211_nan_publish_type. Its type is u8. + * @NL80211_NAN_FUNC_PUBLISH_BCAST: relevant if the function is a solicited + * publish. Should the solicited publish Service Discovery Frame be sent to + * the NAN Broadcast address. This is a flag. + * @NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE: relevant if the function's type is + * subscribe. Is the subscribe active. This is a flag. + * @NL80211_NAN_FUNC_FOLLOW_UP_ID: relevant if the function's type is follow up. + * The instance ID for the follow up Service Discovery Frame. This is u8. + * @NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID: relevant if the function's type + * is follow up. This is a u8. + * The requestor instance ID for the follow up Service Discovery Frame. + * @NL80211_NAN_FUNC_FOLLOW_UP_DEST: the MAC address of the recipient of the + * follow up Service Discovery Frame. This is a binary attribute. + * @NL80211_NAN_FUNC_CLOSE_RANGE: is this function limited for devices in a + * close range. The range itself (RSSI) is defined by the device. + * This is a flag. + * @NL80211_NAN_FUNC_TTL: strictly positive number of DWs this function should + * stay active. If not present infinite TTL is assumed. This is a u32. + * @NL80211_NAN_FUNC_SERVICE_INFO: array of bytes describing the service + * specific info. This is a binary attribute. + * @NL80211_NAN_FUNC_SRF: Service Receive Filter. This is a nested attribute. + * See &enum nl80211_nan_srf_attributes. + * @NL80211_NAN_FUNC_RX_MATCH_FILTER: Receive Matching filter. This is a nested + * attribute. It is a list of binary values. + * @NL80211_NAN_FUNC_TX_MATCH_FILTER: Transmit Matching filter. This is a + * nested attribute. It is a list of binary values. + * @NL80211_NAN_FUNC_INSTANCE_ID: The instance ID of the function. + * Its type is u8 and it cannot be 0. + * @NL80211_NAN_FUNC_TERM_REASON: NAN function termination reason. + * See &enum nl80211_nan_func_term_reason. + * + * @NUM_NL80211_NAN_FUNC_ATTR: internal + * @NL80211_NAN_FUNC_ATTR_MAX: highest NAN function attribute + */ +enum nl80211_nan_func_attributes { + __NL80211_NAN_FUNC_INVALID, + NL80211_NAN_FUNC_TYPE, + NL80211_NAN_FUNC_SERVICE_ID, + NL80211_NAN_FUNC_PUBLISH_TYPE, + NL80211_NAN_FUNC_PUBLISH_BCAST, + NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE, + NL80211_NAN_FUNC_FOLLOW_UP_ID, + NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID, + NL80211_NAN_FUNC_FOLLOW_UP_DEST, + NL80211_NAN_FUNC_CLOSE_RANGE, + NL80211_NAN_FUNC_TTL, + NL80211_NAN_FUNC_SERVICE_INFO, + NL80211_NAN_FUNC_SRF, + NL80211_NAN_FUNC_RX_MATCH_FILTER, + NL80211_NAN_FUNC_TX_MATCH_FILTER, + NL80211_NAN_FUNC_INSTANCE_ID, + NL80211_NAN_FUNC_TERM_REASON, + + /* keep last */ + NUM_NL80211_NAN_FUNC_ATTR, + NL80211_NAN_FUNC_ATTR_MAX = NUM_NL80211_NAN_FUNC_ATTR - 1 +}; + +/** + * enum nl80211_nan_srf_attributes - NAN Service Response filter attributes + * @__NL80211_NAN_SRF_INVALID: invalid + * @NL80211_NAN_SRF_INCLUDE: present if the include bit of the SRF set. + * This is a flag. + * @NL80211_NAN_SRF_BF: Bloom Filter. Present if and only if + * %NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary. + * @NL80211_NAN_SRF_BF_IDX: index of the Bloom Filter. Mandatory if + * %NL80211_NAN_SRF_BF is present. This is a u8. + * @NL80211_NAN_SRF_MAC_ADDRS: list of MAC addresses for the SRF. Present if + * and only if %NL80211_NAN_SRF_BF isn't present. This is a nested + * attribute. Each nested attribute is a MAC address. + * @NUM_NL80211_NAN_SRF_ATTR: internal + * @NL80211_NAN_SRF_ATTR_MAX: highest NAN SRF attribute + */ +enum nl80211_nan_srf_attributes { + __NL80211_NAN_SRF_INVALID, + NL80211_NAN_SRF_INCLUDE, + NL80211_NAN_SRF_BF, + NL80211_NAN_SRF_BF_IDX, + NL80211_NAN_SRF_MAC_ADDRS, + + /* keep last */ + NUM_NL80211_NAN_SRF_ATTR, + NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1, +}; + +/** + * enum nl80211_nan_match_attributes - NAN match attributes + * @__NL80211_NAN_MATCH_INVALID: invalid + * @NL80211_NAN_MATCH_FUNC_LOCAL: the local function that had the + * match. This is a nested attribute. + * See &enum nl80211_nan_func_attributes. + * @NL80211_NAN_MATCH_FUNC_PEER: the peer function + * that caused the match. This is a nested attribute. + * See &enum nl80211_nan_func_attributes. + * + * @NUM_NL80211_NAN_MATCH_ATTR: internal + * @NL80211_NAN_MATCH_ATTR_MAX: highest NAN match attribute + */ +enum nl80211_nan_match_attributes { + __NL80211_NAN_MATCH_INVALID, + NL80211_NAN_MATCH_FUNC_LOCAL, + NL80211_NAN_MATCH_FUNC_PEER, + + /* keep last */ + NUM_NL80211_NAN_MATCH_ATTR, + NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1 +}; + +/** + * nl80211_external_auth_action - Action to perform with external + * authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION. + * @NL80211_EXTERNAL_AUTH_START: Start the authentication. + * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication. + */ +enum nl80211_external_auth_action { + NL80211_EXTERNAL_AUTH_START, + NL80211_EXTERNAL_AUTH_ABORT, +}; + +/** + * enum nl80211_ftm_responder_attributes - fine timing measurement + * responder attributes + * @__NL80211_FTM_RESP_ATTR_INVALID: Invalid + * @NL80211_FTM_RESP_ATTR_ENABLED: FTM responder is enabled + * @NL80211_FTM_RESP_ATTR_LCI: The content of Measurement Report Element + * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10), + * i.e. starting with the measurement token + * @NL80211_FTM_RESP_ATTR_CIVIC: The content of Measurement Report Element + * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13), + * i.e. starting with the measurement token + * @__NL80211_FTM_RESP_ATTR_LAST: Internal + * @NL80211_FTM_RESP_ATTR_MAX: highest FTM responder attribute. + */ +enum nl80211_ftm_responder_attributes { + __NL80211_FTM_RESP_ATTR_INVALID, + + NL80211_FTM_RESP_ATTR_ENABLED, + NL80211_FTM_RESP_ATTR_LCI, + NL80211_FTM_RESP_ATTR_CIVICLOC, + + /* keep last */ + __NL80211_FTM_RESP_ATTR_LAST, + NL80211_FTM_RESP_ATTR_MAX = __NL80211_FTM_RESP_ATTR_LAST - 1, +}; + +/* + * enum nl80211_ftm_responder_stats - FTM responder statistics + * + * These attribute types are used with %NL80211_ATTR_FTM_RESPONDER_STATS + * when getting FTM responder statistics. + * + * @__NL80211_FTM_STATS_INVALID: attribute number 0 is reserved + * @NL80211_FTM_STATS_SUCCESS_NUM: number of FTM sessions in which all frames + * were ssfully answered (u32) + * @NL80211_FTM_STATS_PARTIAL_NUM: number of FTM sessions in which part of the + * frames were successfully answered (u32) + * @NL80211_FTM_STATS_FAILED_NUM: number of failed FTM sessions (u32) + * @NL80211_FTM_STATS_ASAP_NUM: number of ASAP sessions (u32) + * @NL80211_FTM_STATS_NON_ASAP_NUM: number of non-ASAP sessions (u32) + * @NL80211_FTM_STATS_TOTAL_DURATION_MSEC: total sessions durations - gives an + * indication of how much time the responder was busy (u64, msec) + * @NL80211_FTM_STATS_UNKNOWN_TRIGGERS_NUM: number of unknown FTM triggers - + * triggers from initiators that didn't finish successfully the negotiation + * phase with the responder (u32) + * @NL80211_FTM_STATS_RESCHEDULE_REQUESTS_NUM: number of FTM reschedule requests + * - initiator asks for a new scheduling although it already has scheduled + * FTM slot (u32) + * @NL80211_FTM_STATS_OUT_OF_WINDOW_TRIGGERS_NUM: number of FTM triggers out of + * scheduled window (u32) + * @NL80211_FTM_STATS_PAD: used for padding, ignore + * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal + * @NL80211_FTM_STATS_MAX: highest possible FTM responder stats attribute + */ +enum nl80211_ftm_responder_stats { + __NL80211_FTM_STATS_INVALID, + NL80211_FTM_STATS_SUCCESS_NUM, + NL80211_FTM_STATS_PARTIAL_NUM, + NL80211_FTM_STATS_FAILED_NUM, + NL80211_FTM_STATS_ASAP_NUM, + NL80211_FTM_STATS_NON_ASAP_NUM, + NL80211_FTM_STATS_TOTAL_DURATION_MSEC, + NL80211_FTM_STATS_UNKNOWN_TRIGGERS_NUM, + NL80211_FTM_STATS_RESCHEDULE_REQUESTS_NUM, + NL80211_FTM_STATS_OUT_OF_WINDOW_TRIGGERS_NUM, + NL80211_FTM_STATS_PAD, + + /* keep last */ + __NL80211_FTM_STATS_AFTER_LAST, + NL80211_FTM_STATS_MAX = __NL80211_FTM_STATS_AFTER_LAST - 1 +}; + +/** + * enum nl80211_preamble - frame preamble types + * @NL80211_PREAMBLE_LEGACY: legacy (HR/DSSS, OFDM, ERP PHY) preamble + * @NL80211_PREAMBLE_HT: HT preamble + * @NL80211_PREAMBLE_VHT: VHT preamble + * @NL80211_PREAMBLE_DMG: DMG preamble + */ +enum nl80211_preamble { + NL80211_PREAMBLE_LEGACY, + NL80211_PREAMBLE_HT, + NL80211_PREAMBLE_VHT, + NL80211_PREAMBLE_DMG, +}; + +/** + * enum nl80211_peer_measurement_type - peer measurement types + * @NL80211_PMSR_TYPE_INVALID: invalid/unused, needed as we use + * these numbers also for attributes + * + * @NL80211_PMSR_TYPE_FTM: flight time measurement + * + * @NUM_NL80211_PMSR_TYPES: internal + * @NL80211_PMSR_TYPE_MAX: highest type number + */ +enum nl80211_peer_measurement_type { + NL80211_PMSR_TYPE_INVALID, + + NL80211_PMSR_TYPE_FTM, + + NUM_NL80211_PMSR_TYPES, + NL80211_PMSR_TYPE_MAX = NUM_NL80211_PMSR_TYPES - 1 +}; + +/** + * enum nl80211_peer_measurement_status - peer measurement status + * @NL80211_PMSR_STATUS_SUCCESS: measurement completed successfully + * @NL80211_PMSR_STATUS_REFUSED: measurement was locally refused + * @NL80211_PMSR_STATUS_TIMEOUT: measurement timed out + * @NL80211_PMSR_STATUS_FAILURE: measurement failed, a type-dependent + * reason may be available in the response data + */ +enum nl80211_peer_measurement_status { + NL80211_PMSR_STATUS_SUCCESS, + NL80211_PMSR_STATUS_REFUSED, + NL80211_PMSR_STATUS_TIMEOUT, + NL80211_PMSR_STATUS_FAILURE, +}; + +/** + * enum nl80211_peer_measurement_req - peer measurement request attributes + * @__NL80211_PMSR_REQ_ATTR_INVALID: invalid + * + * @NL80211_PMSR_REQ_ATTR_DATA: This is a nested attribute with measurement + * type-specific request data inside. The attributes used are from the + * enums named nl80211_peer_measurement_<type>_req. + * @NL80211_PMSR_REQ_ATTR_GET_AP_TSF: include AP TSF timestamp, if supported + * (flag attribute) + * + * @NUM_NL80211_PMSR_REQ_ATTRS: internal + * @NL80211_PMSR_REQ_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_req { + __NL80211_PMSR_REQ_ATTR_INVALID, + + NL80211_PMSR_REQ_ATTR_DATA, + NL80211_PMSR_REQ_ATTR_GET_AP_TSF, + + /* keep last */ + NUM_NL80211_PMSR_REQ_ATTRS, + NL80211_PMSR_REQ_ATTR_MAX = NUM_NL80211_PMSR_REQ_ATTRS - 1 +}; + +/** + * enum nl80211_peer_measurement_resp - peer measurement response attributes + * @__NL80211_PMSR_RESP_ATTR_INVALID: invalid + * + * @NL80211_PMSR_RESP_ATTR_DATA: This is a nested attribute with measurement + * type-specific results inside. The attributes used are from the enums + * named nl80211_peer_measurement_<type>_resp. + * @NL80211_PMSR_RESP_ATTR_STATUS: u32 value with the measurement status + * (using values from &enum nl80211_peer_measurement_status.) + * @NL80211_PMSR_RESP_ATTR_HOST_TIME: host time (%CLOCK_BOOTTIME) when the + * result was measured; this value is not expected to be accurate to + * more than 20ms. (u64, nanoseconds) + * @NL80211_PMSR_RESP_ATTR_AP_TSF: TSF of the AP that the interface + * doing the measurement is connected to when the result was measured. + * This shall be accurately reported if supported and requested + * (u64, usec) + * @NL80211_PMSR_RESP_ATTR_FINAL: If results are sent to the host partially + * (*e.g. with FTM per-burst data) this flag will be cleared on all but + * the last result; if all results are combined it's set on the single + * result. + * @NL80211_PMSR_RESP_ATTR_PAD: padding for 64-bit attributes, ignore + * + * @NUM_NL80211_PMSR_RESP_ATTRS: internal + * @NL80211_PMSR_RESP_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_resp { + __NL80211_PMSR_RESP_ATTR_INVALID, + + NL80211_PMSR_RESP_ATTR_DATA, + NL80211_PMSR_RESP_ATTR_STATUS, + NL80211_PMSR_RESP_ATTR_HOST_TIME, + NL80211_PMSR_RESP_ATTR_AP_TSF, + NL80211_PMSR_RESP_ATTR_FINAL, + NL80211_PMSR_RESP_ATTR_PAD, + + /* keep last */ + NUM_NL80211_PMSR_RESP_ATTRS, + NL80211_PMSR_RESP_ATTR_MAX = NUM_NL80211_PMSR_RESP_ATTRS - 1 +}; + +/** + * enum nl80211_peer_measurement_peer_attrs - peer attributes for measurement + * @__NL80211_PMSR_PEER_ATTR_INVALID: invalid + * + * @NL80211_PMSR_PEER_ATTR_ADDR: peer's MAC address + * @NL80211_PMSR_PEER_ATTR_CHAN: channel definition, nested, using top-level + * attributes like %NL80211_ATTR_WIPHY_FREQ etc. + * @NL80211_PMSR_PEER_ATTR_REQ: This is a nested attribute indexed by + * measurement type, with attributes from the + * &enum nl80211_peer_measurement_req inside. + * @NL80211_PMSR_PEER_ATTR_RESP: This is a nested attribute indexed by + * measurement type, with attributes from the + * &enum nl80211_peer_measurement_resp inside. + * + * @NUM_NL80211_PMSR_PEER_ATTRS: internal + * @NL80211_PMSR_PEER_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_peer_attrs { + __NL80211_PMSR_PEER_ATTR_INVALID, + + NL80211_PMSR_PEER_ATTR_ADDR, + NL80211_PMSR_PEER_ATTR_CHAN, + NL80211_PMSR_PEER_ATTR_REQ, + NL80211_PMSR_PEER_ATTR_RESP, + + /* keep last */ + NUM_NL80211_PMSR_PEER_ATTRS, + NL80211_PMSR_PEER_ATTR_MAX = NUM_NL80211_PMSR_PEER_ATTRS - 1, +}; + +/** + * enum nl80211_peer_measurement_attrs - peer measurement attributes + * @__NL80211_PMSR_ATTR_INVALID: invalid + * + * @NL80211_PMSR_ATTR_MAX_PEERS: u32 attribute used for capability + * advertisement only, indicates the maximum number of peers + * measurements can be done with in a single request + * @NL80211_PMSR_ATTR_REPORT_AP_TSF: flag attribute in capability + * indicating that the connected AP's TSF can be reported in + * measurement results + * @NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR: flag attribute in capability + * indicating that MAC address randomization is supported. + * @NL80211_PMSR_ATTR_TYPE_CAPA: capabilities reported by the device, + * this contains a nesting indexed by measurement type, and + * type-specific capabilities inside, which are from the enums + * named nl80211_peer_measurement_<type>_capa. + * @NL80211_PMSR_ATTR_PEERS: nested attribute, the nesting index is + * meaningless, just a list of peers to measure with, with the + * sub-attributes taken from + * &enum nl80211_peer_measurement_peer_attrs. + * + * @NUM_NL80211_PMSR_ATTR: internal + * @NL80211_PMSR_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_attrs { + __NL80211_PMSR_ATTR_INVALID, + + NL80211_PMSR_ATTR_MAX_PEERS, + NL80211_PMSR_ATTR_REPORT_AP_TSF, + NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR, + NL80211_PMSR_ATTR_TYPE_CAPA, + NL80211_PMSR_ATTR_PEERS, + + /* keep last */ + NUM_NL80211_PMSR_ATTR, + NL80211_PMSR_ATTR_MAX = NUM_NL80211_PMSR_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_capa - FTM capabilities + * @__NL80211_PMSR_FTM_CAPA_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_CAPA_ATTR_ASAP: flag attribute indicating ASAP mode + * is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP: flag attribute indicating non-ASAP + * mode is supported + * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI: flag attribute indicating if LCI + * data can be requested during the measurement + * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC: flag attribute indicating if civic + * location data can be requested during the measurement + * @NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES: u32 bitmap attribute of bits + * from &enum nl80211_preamble. + * @NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS: bitmap of values from + * &enum nl80211_chan_width indicating the supported channel + * bandwidths for FTM. Note that a higher channel bandwidth may be + * configured to allow for other measurements types with different + * bandwidth requirement in the same measurement. + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT: u32 attribute indicating + * the maximum bursts exponent that can be used (if not present anything + * is valid) + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST: u32 attribute indicating + * the maximum FTMs per burst (if not present anything is valid) + * + * @NUM_NL80211_PMSR_FTM_CAPA_ATTR: internal + * @NL80211_PMSR_FTM_CAPA_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_capa { + __NL80211_PMSR_FTM_CAPA_ATTR_INVALID, + + NL80211_PMSR_FTM_CAPA_ATTR_ASAP, + NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP, + NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI, + NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC, + NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES, + NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT, + NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST, + + /* keep last */ + NUM_NL80211_PMSR_FTM_CAPA_ATTR, + NL80211_PMSR_FTM_CAPA_ATTR_MAX = NUM_NL80211_PMSR_FTM_CAPA_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_req - FTM request attributes + * @__NL80211_PMSR_FTM_REQ_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_REQ_ATTR_ASAP: ASAP mode requested (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE: preamble type (see + * &enum nl80211_preamble), optional for DMG (u32) + * @NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP: number of bursts exponent as in + * 802.11-2016 9.4.2.168 "Fine Timing Measurement Parameters element" + * (u8, 0-15, optional with default 15 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD: interval between bursts in units + * of 100ms (u16, optional with default 0) + * @NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION: burst duration, as in 802.11-2016 + * Table 9-257 "Burst Duration field encoding" (u8, 0-15, optional with + * default 15 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST: number of successful FTM frames + * requested per burst + * (u8, 0-31, optional with default 0 i.e. "no preference") + * @NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES: number of FTMR frame retries + * (u8, default 3) + * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI: request LCI data (flag) + * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC: request civic location data + * (flag) + * + * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal + * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_req { + __NL80211_PMSR_FTM_REQ_ATTR_INVALID, + + NL80211_PMSR_FTM_REQ_ATTR_ASAP, + NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE, + NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP, + NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD, + NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION, + NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST, + NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES, + NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI, + NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC, + + /* keep last */ + NUM_NL80211_PMSR_FTM_REQ_ATTR, + NL80211_PMSR_FTM_REQ_ATTR_MAX = NUM_NL80211_PMSR_FTM_REQ_ATTR - 1 +}; + +/** + * enum nl80211_peer_measurement_ftm_failure_reasons - FTM failure reasons + * @NL80211_PMSR_FTM_FAILURE_UNSPECIFIED: unspecified failure, not used + * @NL80211_PMSR_FTM_FAILURE_NO_RESPONSE: no response from the FTM responder + * @NL80211_PMSR_FTM_FAILURE_REJECTED: FTM responder rejected measurement + * @NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL: we already know the peer is + * on a different channel, so can't measure (if we didn't know, we'd + * try and get no response) + * @NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE: peer can't actually do FTM + * @NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP: invalid T1/T4 timestamps + * received + * @NL80211_PMSR_FTM_FAILURE_PEER_BUSY: peer reports busy, you may retry + * later (see %NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME) + * @NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS: parameters were changed + * by the peer and are no longer supported + */ +enum nl80211_peer_measurement_ftm_failure_reasons { + NL80211_PMSR_FTM_FAILURE_UNSPECIFIED, + NL80211_PMSR_FTM_FAILURE_NO_RESPONSE, + NL80211_PMSR_FTM_FAILURE_REJECTED, + NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL, + NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE, + NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP, + NL80211_PMSR_FTM_FAILURE_PEER_BUSY, + NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS, +}; + +/** + * enum nl80211_peer_measurement_ftm_resp - FTM response attributes + * @__NL80211_PMSR_FTM_RESP_ATTR_INVALID: invalid + * + * @NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON: FTM-specific failure reason + * (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX: optional, if bursts are reported + * as separate results then it will be the burst index 0...(N-1) and + * the top level will indicate partial results (u32) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS: number of FTM Request frames + * transmitted (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES: number of FTM Request frames + * that were acknowleged (u32, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME: retry time received from the + * busy peer (u32, seconds) + * @NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP: actual number of bursts exponent + * used by the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION: actual burst duration used by + * the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST: actual FTMs per burst used + * by the responder (similar to request, u8) + * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG: average RSSI across all FTM action + * frames (optional, s32, 1/2 dBm) + * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD: RSSI spread across all FTM action + * frames (optional, s32, 1/2 dBm) + * @NL80211_PMSR_FTM_RESP_ATTR_TX_RATE: bitrate we used for the response to the + * FTM action frame (optional, nested, using &enum nl80211_rate_info + * attributes) + * @NL80211_PMSR_FTM_RESP_ATTR_RX_RATE: bitrate the responder used for the FTM + * action frame (optional, nested, using &enum nl80211_rate_info attrs) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG: average RTT (s64, picoseconds, optional + * but one of RTT/DIST must be present) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE: RTT variance (u64, ps^2, note that + * standard deviation is the square root of variance, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD: RTT spread (u64, picoseconds, + * optional) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG: average distance (s64, mm, optional + * but one of RTT/DIST must be present) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE: distance variance (u64, mm^2, note + * that standard deviation is the square root of variance, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD: distance spread (u64, mm, optional) + * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional); + * this is the contents of the Measurement Report Element (802.11-2016 + * 9.4.2.22.1) starting with the Measurement Token, with Measurement + * Type 8. + * @NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC: civic location data from peer + * (binary, optional); + * this is the contents of the Measurement Report Element (802.11-2016 + * 9.4.2.22.1) starting with the Measurement Token, with Measurement + * Type 11. + * @NL80211_PMSR_FTM_RESP_ATTR_PAD: ignore, for u64/s64 padding only + * + * @NUM_NL80211_PMSR_FTM_RESP_ATTR: internal + * @NL80211_PMSR_FTM_RESP_ATTR_MAX: highest attribute number + */ +enum nl80211_peer_measurement_ftm_resp { + __NL80211_PMSR_FTM_RESP_ATTR_INVALID, + + NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON, + NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX, + NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS, + NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES, + NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME, + NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP, + NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION, + NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST, + NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG, + NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_TX_RATE, + NL80211_PMSR_FTM_RESP_ATTR_RX_RATE, + NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG, + NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE, + NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG, + NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE, + NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD, + NL80211_PMSR_FTM_RESP_ATTR_LCI, + NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC, + NL80211_PMSR_FTM_RESP_ATTR_PAD, + + /* keep last */ + NUM_NL80211_PMSR_FTM_RESP_ATTR, + NL80211_PMSR_FTM_RESP_ATTR_MAX = NUM_NL80211_PMSR_FTM_RESP_ATTR - 1 +}; + +/** + * enum nl80211_obss_pd_attributes - OBSS packet detection attributes + * @__NL80211_HE_OBSS_PD_ATTR_INVALID: Invalid + * + * @NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET: the OBSS PD minimum tx power offset. + * @NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET: the OBSS PD maximum tx power offset. + * + * @__NL80211_HE_OBSS_PD_ATTR_LAST: Internal + * @NL80211_HE_OBSS_PD_ATTR_MAX: highest OBSS PD attribute. + */ +enum nl80211_obss_pd_attributes { + __NL80211_HE_OBSS_PD_ATTR_INVALID, + + NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET, + NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET, + + /* keep last */ + __NL80211_HE_OBSS_PD_ATTR_LAST, + NL80211_HE_OBSS_PD_ATTR_MAX = __NL80211_HE_OBSS_PD_ATTR_LAST - 1, +}; + + +#endif /* __LINUX_NL80211_H */ diff --git a/src/shared/local-addresses.c b/src/shared/local-addresses.c new file mode 100644 index 0000000..2c860f7 --- /dev/null +++ b/src/shared/local-addresses.c @@ -0,0 +1,315 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-netlink.h" + +#include "alloc-util.h" +#include "local-addresses.h" +#include "macro.h" +#include "netlink-util.h" +#include "sort-util.h" + +static int address_compare(const struct local_address *a, const struct local_address *b) { + int r; + + /* Order lowest scope first, IPv4 before IPv6, lowest interface index first */ + + if (a->family == AF_INET && b->family == AF_INET6) + return -1; + if (a->family == AF_INET6 && b->family == AF_INET) + return 1; + + r = CMP(a->scope, b->scope); + if (r != 0) + return r; + + r = CMP(a->metric, b->metric); + if (r != 0) + return r; + + r = CMP(a->ifindex, b->ifindex); + if (r != 0) + return r; + + return memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family)); +} + +int local_addresses(sd_netlink *context, int ifindex, int af, struct local_address **ret) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL; + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_free_ struct local_address *list = NULL; + size_t n_list = 0, n_allocated = 0; + sd_netlink_message *m; + int r; + + if (context) + rtnl = sd_netlink_ref(context); + else { + r = sd_netlink_open(&rtnl); + if (r < 0) + return r; + } + + r = sd_rtnl_message_new_addr(rtnl, &req, RTM_GETADDR, 0, af); + if (r < 0) + return r; + + r = sd_netlink_call(rtnl, req, 0, &reply); + if (r < 0) + return r; + + for (m = reply; m; m = sd_netlink_message_next(m)) { + struct local_address *a; + unsigned char flags; + uint16_t type; + int ifi, family; + + r = sd_netlink_message_get_errno(m); + if (r < 0) + return r; + + r = sd_netlink_message_get_type(m, &type); + if (r < 0) + return r; + if (type != RTM_NEWADDR) + continue; + + r = sd_rtnl_message_addr_get_ifindex(m, &ifi); + if (r < 0) + return r; + if (ifindex > 0 && ifi != ifindex) + continue; + + r = sd_rtnl_message_addr_get_family(m, &family); + if (r < 0) + return r; + if (af != AF_UNSPEC && af != family) + continue; + + r = sd_rtnl_message_addr_get_flags(m, &flags); + if (r < 0) + return r; + if (flags & IFA_F_DEPRECATED) + continue; + + if (!GREEDY_REALLOC0(list, n_allocated, n_list+1)) + return -ENOMEM; + + a = list + n_list; + + r = sd_rtnl_message_addr_get_scope(m, &a->scope); + if (r < 0) + return r; + + if (ifindex == 0 && IN_SET(a->scope, RT_SCOPE_HOST, RT_SCOPE_NOWHERE)) + continue; + + switch (family) { + + case AF_INET: + r = sd_netlink_message_read_in_addr(m, IFA_LOCAL, &a->address.in); + if (r < 0) { + r = sd_netlink_message_read_in_addr(m, IFA_ADDRESS, &a->address.in); + if (r < 0) + continue; + } + break; + + case AF_INET6: + r = sd_netlink_message_read_in6_addr(m, IFA_LOCAL, &a->address.in6); + if (r < 0) { + r = sd_netlink_message_read_in6_addr(m, IFA_ADDRESS, &a->address.in6); + if (r < 0) + continue; + } + break; + + default: + continue; + } + + a->ifindex = ifi; + a->family = family; + + n_list++; + }; + + if (ret) { + typesafe_qsort(list, n_list, address_compare); + *ret = TAKE_PTR(list); + } + + return (int) n_list; +} + +static int add_local_gateway( + struct local_address **list, + size_t *n_list, + size_t *n_allocated, + int af, + int ifindex, + uint32_t metric, + const RouteVia *via) { + + assert(list); + assert(n_list); + assert(n_allocated); + assert(via); + + if (af != AF_UNSPEC && af != via->family) + return 0; + + if (!GREEDY_REALLOC(*list, *n_allocated, *n_list + 1)) + return -ENOMEM; + + (*list)[(*n_list)++] = (struct local_address) { + .ifindex = ifindex, + .metric = metric, + .family = via->family, + .address = via->address, + }; + + return 0; +} + +int local_gateways(sd_netlink *context, int ifindex, int af, struct local_address **ret) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL; + _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL; + _cleanup_free_ struct local_address *list = NULL; + size_t n_list = 0, n_allocated = 0; + int r; + + if (context) + rtnl = sd_netlink_ref(context); + else { + r = sd_netlink_open(&rtnl); + if (r < 0) + return r; + } + + r = sd_rtnl_message_new_route(rtnl, &req, RTM_GETROUTE, af, RTPROT_UNSPEC); + if (r < 0) + return r; + + r = sd_netlink_message_request_dump(req, true); + if (r < 0) + return r; + + r = sd_netlink_call(rtnl, req, 0, &reply); + if (r < 0) + return r; + + for (sd_netlink_message *m = reply; m; m = sd_netlink_message_next(m)) { + _cleanup_ordered_set_free_free_ OrderedSet *multipath_routes = NULL; + _cleanup_free_ void *rta_multipath = NULL; + union in_addr_union gateway; + uint16_t type; + unsigned char dst_len, src_len, table; + uint32_t ifi, metric = 0; + size_t rta_len; + int family; + RouteVia via; + + r = sd_netlink_message_get_errno(m); + if (r < 0) + return r; + + r = sd_netlink_message_get_type(m, &type); + if (r < 0) + return r; + if (type != RTM_NEWROUTE) + continue; + + /* We only care for default routes */ + r = sd_rtnl_message_route_get_dst_prefixlen(m, &dst_len); + if (r < 0) + return r; + if (dst_len != 0) + continue; + + r = sd_rtnl_message_route_get_src_prefixlen(m, &src_len); + if (r < 0) + return r; + if (src_len != 0) + continue; + + r = sd_rtnl_message_route_get_table(m, &table); + if (r < 0) + return r; + if (table != RT_TABLE_MAIN) + continue; + + r = sd_netlink_message_read_u32(m, RTA_PRIORITY, &metric); + if (r < 0 && r != -ENODATA) + return r; + + r = sd_rtnl_message_route_get_family(m, &family); + if (r < 0) + return r; + if (!IN_SET(family, AF_INET, AF_INET6)) + continue; + + r = sd_netlink_message_read_u32(m, RTA_OIF, &ifi); + if (r < 0 && r != -ENODATA) + return r; + if (r >= 0) { + if (ifi <= 0) + return -EINVAL; + if (ifindex > 0 && (int) ifi != ifindex) + continue; + + r = netlink_message_read_in_addr_union(m, RTA_GATEWAY, family, &gateway); + if (r < 0 && r != -ENODATA) + return r; + if (r >= 0) { + via.family = family; + via.address = gateway; + r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &via); + if (r < 0) + return r; + + continue; + } + + if (family != AF_INET) + continue; + + r = sd_netlink_message_read(m, RTA_VIA, sizeof(via), &via); + if (r < 0 && r != -ENODATA) + return r; + if (r >= 0) { + r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &via); + if (r < 0) + return r; + + continue; + } + } + + r = sd_netlink_message_read_data(m, RTA_MULTIPATH, &rta_len, &rta_multipath); + if (r < 0 && r != -ENODATA) + return r; + if (r >= 0) { + MultipathRoute *mr; + + r = rtattr_read_nexthop(rta_multipath, rta_len, family, &multipath_routes); + if (r < 0) + return r; + + ORDERED_SET_FOREACH(mr, multipath_routes) { + if (ifindex > 0 && mr->ifindex != ifindex) + continue; + + r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &mr->gateway); + if (r < 0) + return r; + } + } + } + + if (ret) { + typesafe_qsort(list, n_list, address_compare); + *ret = TAKE_PTR(list); + } + + return (int) n_list; +} diff --git a/src/shared/local-addresses.h b/src/shared/local-addresses.h new file mode 100644 index 0000000..c633995 --- /dev/null +++ b/src/shared/local-addresses.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-netlink.h" + +#include "in-addr-util.h" + +struct local_address { + int family, ifindex; + unsigned char scope; + uint32_t metric; + union in_addr_union address; +}; + +int local_addresses(sd_netlink *rtnl, int ifindex, int af, struct local_address **ret); + +int local_gateways(sd_netlink *rtnl, int ifindex, int af, struct local_address **ret); diff --git a/src/shared/lockfile-util.c b/src/shared/lockfile-util.c new file mode 100644 index 0000000..6f059ab --- /dev/null +++ b/src/shared/lockfile-util.c @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <sys/file.h> +#include <sys/stat.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "lockfile-util.h" +#include "macro.h" +#include "missing_fcntl.h" +#include "path-util.h" + +int make_lock_file(const char *p, int operation, LockFile *ret) { + _cleanup_close_ int fd = -1; + _cleanup_free_ char *t = NULL; + int r; + + /* + * We use UNPOSIX locks if they are available. They have nice + * semantics, and are mostly compatible with NFS. However, + * they are only available on new kernels. When we detect we + * are running on an older kernel, then we fall back to good + * old BSD locks. They also have nice semantics, but are + * slightly problematic on NFS, where they are upgraded to + * POSIX locks, even though locally they are orthogonal to + * POSIX locks. + */ + + t = strdup(p); + if (!t) + return -ENOMEM; + + for (;;) { + struct flock fl = { + .l_type = (operation & ~LOCK_NB) == LOCK_EX ? F_WRLCK : F_RDLCK, + .l_whence = SEEK_SET, + }; + struct stat st; + + fd = open(p, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600); + if (fd < 0) + return -errno; + + r = fcntl(fd, (operation & LOCK_NB) ? F_OFD_SETLK : F_OFD_SETLKW, &fl); + if (r < 0) { + + /* If the kernel is too old, use good old BSD locks */ + if (errno == EINVAL) + r = flock(fd, operation); + + if (r < 0) + return errno == EAGAIN ? -EBUSY : -errno; + } + + /* If we acquired the lock, let's check if the file + * still exists in the file system. If not, then the + * previous exclusive owner removed it and then closed + * it. In such a case our acquired lock is worthless, + * hence try again. */ + + r = fstat(fd, &st); + if (r < 0) + return -errno; + if (st.st_nlink > 0) + break; + + fd = safe_close(fd); + } + + ret->path = t; + ret->fd = fd; + ret->operation = operation; + + fd = -1; + t = NULL; + + return r; +} + +int make_lock_file_for(const char *p, int operation, LockFile *ret) { + const char *fn; + char *t; + + assert(p); + assert(ret); + + fn = basename(p); + if (!filename_is_valid(fn)) + return -EINVAL; + + t = newa(char, strlen(p) + 2 + 4 + 1); + stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), fn), ".lck"); + + return make_lock_file(t, operation, ret); +} + +void release_lock_file(LockFile *f) { + int r; + + if (!f) + return; + + if (f->path) { + + /* If we are the exclusive owner we can safely delete + * the lock file itself. If we are not the exclusive + * owner, we can try becoming it. */ + + if (f->fd >= 0 && + (f->operation & ~LOCK_NB) == LOCK_SH) { + static const struct flock fl = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + }; + + r = fcntl(f->fd, F_OFD_SETLK, &fl); + if (r < 0 && errno == EINVAL) + r = flock(f->fd, LOCK_EX|LOCK_NB); + + if (r >= 0) + f->operation = LOCK_EX|LOCK_NB; + } + + if ((f->operation & ~LOCK_NB) == LOCK_EX) + unlink_noerrno(f->path); + + f->path = mfree(f->path); + } + + f->fd = safe_close(f->fd); + f->operation = 0; +} diff --git a/src/shared/lockfile-util.h b/src/shared/lockfile-util.h new file mode 100644 index 0000000..3606327 --- /dev/null +++ b/src/shared/lockfile-util.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +typedef struct LockFile { + char *path; + int fd; + int operation; +} LockFile; + +int make_lock_file(const char *p, int operation, LockFile *ret); +int make_lock_file_for(const char *p, int operation, LockFile *ret); +void release_lock_file(LockFile *f); + +#define LOCK_FILE_INIT { .fd = -1, .path = NULL } diff --git a/src/shared/log-link.h b/src/shared/log-link.h new file mode 100644 index 0000000..bb692e0 --- /dev/null +++ b/src/shared/log-link.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "log.h" + +/* + * The following macros append INTERFACE= to the message. + * The macros require a struct named 'Link' which contains 'char *ifname': + * + * typedef struct Link { + * char *ifname; + * } Link; + * + * See, network/networkd-link.h for example. + */ + +#define log_link_full_errno(link, level, error, ...) \ + ({ \ + const Link *_l = (link); \ + (_l && _l->ifname) ? log_object_internal(level, error, PROJECT_FILE, __LINE__, __func__, "INTERFACE=", _l->ifname, NULL, NULL, ##__VA_ARGS__) : \ + log_internal(level, error, PROJECT_FILE, __LINE__, __func__, ##__VA_ARGS__); \ + }) \ + +#define log_link_full(link, level, ...) (void) log_link_full_errno(link, level, 0, __VA_ARGS__) + +#define log_link_debug(link, ...) log_link_full_errno(link, LOG_DEBUG, 0, __VA_ARGS__) +#define log_link_info(link, ...) log_link_full(link, LOG_INFO, __VA_ARGS__) +#define log_link_notice(link, ...) log_link_full(link, LOG_NOTICE, __VA_ARGS__) +#define log_link_warning(link, ...) log_link_full(link, LOG_WARNING, __VA_ARGS__) +#define log_link_error(link, ...) log_link_full(link, LOG_ERR, __VA_ARGS__) + +#define log_link_debug_errno(link, error, ...) log_link_full_errno(link, LOG_DEBUG, error, __VA_ARGS__) +#define log_link_info_errno(link, error, ...) log_link_full_errno(link, LOG_INFO, error, __VA_ARGS__) +#define log_link_notice_errno(link, error, ...) log_link_full_errno(link, LOG_NOTICE, error, __VA_ARGS__) +#define log_link_warning_errno(link, error, ...) log_link_full_errno(link, LOG_WARNING, error, __VA_ARGS__) +#define log_link_error_errno(link, error, ...) log_link_full_errno(link, LOG_ERR, error, __VA_ARGS__) + +#define LOG_LINK_MESSAGE(link, fmt, ...) "MESSAGE=%s: " fmt, (link)->ifname, ##__VA_ARGS__ +#define LOG_LINK_INTERFACE(link) "INTERFACE=%s", (link)->ifname diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c new file mode 100644 index 0000000..840f221 --- /dev/null +++ b/src/shared/logs-show.c @@ -0,0 +1,1672 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdint.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <syslog.h> +#include <unistd.h> + +#include "sd-id128.h" +#include "sd-journal.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "hashmap.h" +#include "hostname-util.h" +#include "id128-util.h" +#include "io-util.h" +#include "journal-internal.h" +#include "journal-util.h" +#include "json.h" +#include "locale-util.h" +#include "log.h" +#include "logs-show.h" +#include "macro.h" +#include "namespace-util.h" +#include "output-mode.h" +#include "parse-util.h" +#include "pretty-print.h" +#include "process-util.h" +#include "sparse-endian.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "time-util.h" +#include "utf8.h" +#include "util.h" +#include "web-util.h" + +/* up to three lines (each up to 100 characters) or 300 characters, whichever is less */ +#define PRINT_LINE_THRESHOLD 3 +#define PRINT_CHAR_THRESHOLD 300 + +#define JSON_THRESHOLD 4096U + +static int print_catalog(FILE *f, sd_journal *j) { + _cleanup_free_ char *t = NULL, *z = NULL; + const char *newline, *prefix; + int r; + + assert(j); + + r = sd_journal_get_catalog(j, &t); + if (r == -ENOENT) + return 0; + if (r < 0) + return log_error_errno(r, "Failed to find catalog entry: %m"); + + if (is_locale_utf8()) + prefix = strjoina(special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), special_glyph(SPECIAL_GLYPH_LIGHT_SHADE)); + else + prefix = "--"; + + if (colors_enabled()) + newline = strjoina(ANSI_NORMAL "\n" ANSI_GREY, prefix, ANSI_NORMAL " " ANSI_GREEN); + else + newline = strjoina("\n", prefix, " "); + + z = strreplace(strstrip(t), "\n", newline); + if (!z) + return log_oom(); + + if (colors_enabled()) + fprintf(f, ANSI_GREY "%s" ANSI_NORMAL " " ANSI_GREEN, prefix); + else + fprintf(f, "%s ", prefix); + + fputs(z, f); + + if (colors_enabled()) + fputs(ANSI_NORMAL "\n", f); + else + fputc('\n', f); + + return 1; +} + +static int url_from_catalog(sd_journal *j, char **ret) { + _cleanup_free_ char *t = NULL, *url = NULL; + const char *weblink; + int r; + + assert(j); + assert(ret); + + r = sd_journal_get_catalog(j, &t); + if (r == -ENOENT) + goto notfound; + if (r < 0) + return log_error_errno(r, "Failed to find catalog entry: %m"); + + weblink = startswith(t, "Documentation:"); + if (!weblink) { + weblink = strstr(t + 1, "\nDocumentation:"); + if (!weblink) + goto notfound; + + weblink += 15; + } + + /* Skip whitespace to value */ + weblink += strspn(weblink, " \t"); + + /* Cut out till next whitespace/newline */ + url = strndup(weblink, strcspn(weblink, WHITESPACE)); + if (!url) + return log_oom(); + + if (!documentation_url_is_valid(url)) + goto notfound; + + *ret = TAKE_PTR(url); + return 1; + +notfound: + *ret = NULL; + return 0; +} + +static int parse_field(const void *data, size_t length, const char *field, size_t field_len, char **target, size_t *target_len) { + size_t nl; + char *buf; + + assert(data); + assert(field); + assert(target); + + if (length < field_len) + return 0; + + if (memcmp(data, field, field_len)) + return 0; + + nl = length - field_len; + + buf = newdup_suffix0(char, (const char*) data + field_len, nl); + if (!buf) + return log_oom(); + + free(*target); + *target = buf; + + if (target_len) + *target_len = nl; + + return 1; +} + +typedef struct ParseFieldVec { + const char *field; + size_t field_len; + char **target; + size_t *target_len; +} ParseFieldVec; + +#define PARSE_FIELD_VEC_ENTRY(_field, _target, _target_len) { \ + .field = _field, \ + .field_len = strlen(_field), \ + .target = _target, \ + .target_len = _target_len \ + } + +static int parse_fieldv(const void *data, size_t length, const ParseFieldVec *fields, unsigned n_fields) { + unsigned i; + + for (i = 0; i < n_fields; i++) { + const ParseFieldVec *f = &fields[i]; + int r; + + r = parse_field(data, length, f->field, f->field_len, f->target, f->target_len); + if (r < 0) + return r; + else if (r > 0) + break; + } + + return 0; +} + +static int field_set_test(const Set *fields, const char *name, size_t n) { + char *s; + + if (!fields) + return 1; + + s = strndupa(name, n); + return set_contains(fields, s); +} + +static bool shall_print(const char *p, size_t l, OutputFlags flags) { + assert(p); + + if (flags & OUTPUT_SHOW_ALL) + return true; + + if (l >= PRINT_CHAR_THRESHOLD) + return false; + + if (!utf8_is_printable(p, l)) + return false; + + return true; +} + +static bool print_multiline( + FILE *f, + unsigned prefix, + unsigned n_columns, + OutputFlags flags, + int priority, + bool audit, + const char* message, + size_t message_len, + size_t highlight[2]) { + + const char *color_on = "", *color_off = "", *highlight_on = ""; + const char *pos, *end; + bool ellipsized = false; + int line = 0; + + if (flags & OUTPUT_COLOR) { + get_log_colors(priority, &color_on, &color_off, &highlight_on); + + if (audit && strempty(color_on)) { + color_on = ANSI_BLUE; + color_off = ANSI_NORMAL; + } + } + + /* A special case: make sure that we print a newline when + the message is empty. */ + if (message_len == 0) + fputs("\n", f); + + for (pos = message; + pos < message + message_len; + pos = end + 1, line++) { + bool continuation = line > 0; + bool tail_line; + int len; + for (end = pos; end < message + message_len && *end != '\n'; end++) + ; + len = end - pos; + assert(len >= 0); + + /* We need to figure out when we are showing not-last line, *and* + * will skip subsequent lines. In that case, we will put the dots + * at the end of the line, instead of putting dots in the middle + * or not at all. + */ + tail_line = + line + 1 == PRINT_LINE_THRESHOLD || + end + 1 >= message + PRINT_CHAR_THRESHOLD; + + if (flags & (OUTPUT_FULL_WIDTH | OUTPUT_SHOW_ALL) || + (prefix + len + 1 < n_columns && !tail_line)) { + if (highlight && + (size_t) (pos - message) <= highlight[0] && + highlight[0] < (size_t) len) { + + fprintf(f, "%*s%s%.*s", + continuation * prefix, "", + color_on, (int) highlight[0], pos); + fprintf(f, "%s%.*s", + highlight_on, + (int) (MIN((size_t) len, highlight[1]) - highlight[0]), + pos + highlight[0]); + if ((size_t) len > highlight[1]) + fprintf(f, "%s%.*s", + color_on, + (int) (len - highlight[1]), + pos + highlight[1]); + fprintf(f, "%s\n", color_off); + + } else + fprintf(f, "%*s%s%.*s%s\n", + continuation * prefix, "", + color_on, len, pos, color_off); + continue; + } + + /* Beyond this point, ellipsization will happen. */ + ellipsized = true; + + if (prefix < n_columns && n_columns - prefix >= 3) { + if (n_columns - prefix > (unsigned) len + 3) + fprintf(f, "%*s%s%.*s...%s\n", + continuation * prefix, "", + color_on, len, pos, color_off); + else { + _cleanup_free_ char *e; + + e = ellipsize_mem(pos, len, n_columns - prefix, + tail_line ? 100 : 90); + if (!e) + fprintf(f, "%*s%s%.*s%s\n", + continuation * prefix, "", + color_on, len, pos, color_off); + else + fprintf(f, "%*s%s%s%s\n", + continuation * prefix, "", + color_on, e, color_off); + } + } else + fputs("...\n", f); + + if (tail_line) + break; + } + + return ellipsized; +} + +static int output_timestamp_monotonic(FILE *f, sd_journal *j, const char *monotonic) { + sd_id128_t boot_id; + uint64_t t; + int r; + + assert(f); + assert(j); + + r = -ENXIO; + if (monotonic) + r = safe_atou64(monotonic, &t); + if (r < 0) + r = sd_journal_get_monotonic_usec(j, &t, &boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get monotonic timestamp: %m"); + + fprintf(f, "[%5"PRI_USEC".%06"PRI_USEC"]", t / USEC_PER_SEC, t % USEC_PER_SEC); + return 1 + 5 + 1 + 6 + 1; +} + +static int output_timestamp_realtime(FILE *f, sd_journal *j, OutputMode mode, OutputFlags flags, const char *realtime) { + char buf[MAX(FORMAT_TIMESTAMP_MAX, 64U)]; + uint64_t x; + int r; + + assert(f); + assert(j); + + if (realtime) + r = safe_atou64(realtime, &x); + if (!realtime || r < 0 || !VALID_REALTIME(x)) + r = sd_journal_get_realtime_usec(j, &x); + if (r < 0) + return log_error_errno(r, "Failed to get realtime timestamp: %m"); + + if (IN_SET(mode, OUTPUT_SHORT_FULL, OUTPUT_WITH_UNIT)) { + const char *k; + + if (flags & OUTPUT_UTC) + k = format_timestamp_style(buf, sizeof(buf), x, TIMESTAMP_UTC); + else + k = format_timestamp(buf, sizeof(buf), x); + if (!k) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to format timestamp: %" PRIu64, x); + + } else { + struct tm tm; + time_t t; + + t = (time_t) (x / USEC_PER_SEC); + + switch (mode) { + + case OUTPUT_SHORT_UNIX: + xsprintf(buf, "%10"PRI_TIME".%06"PRIu64, t, x % USEC_PER_SEC); + break; + + case OUTPUT_SHORT_ISO: + if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S%z", + localtime_or_gmtime_r(&t, &tm, flags & OUTPUT_UTC)) <= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to format ISO time"); + break; + + case OUTPUT_SHORT_ISO_PRECISE: { + char usec[7]; + + /* No usec in strftime, so we leave space and copy over */ + if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S.xxxxxx%z", + localtime_or_gmtime_r(&t, &tm, flags & OUTPUT_UTC)) <= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to format ISO-precise time"); + xsprintf(usec, "%06"PRI_USEC, x % USEC_PER_SEC); + memcpy(buf + 20, usec, 6); + break; + } + case OUTPUT_SHORT: + case OUTPUT_SHORT_PRECISE: + + if (strftime(buf, sizeof(buf), "%b %d %H:%M:%S", + localtime_or_gmtime_r(&t, &tm, flags & OUTPUT_UTC)) <= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to format syslog time"); + + if (mode == OUTPUT_SHORT_PRECISE) { + size_t k; + + assert(sizeof(buf) > strlen(buf)); + k = sizeof(buf) - strlen(buf); + + r = snprintf(buf + strlen(buf), k, ".%06"PRIu64, x % USEC_PER_SEC); + if (r <= 0 || (size_t) r >= k) /* too long? */ + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to format precise time"); + } + break; + + default: + assert_not_reached("Unknown time format"); + } + } + + fputs(buf, f); + return (int) strlen(buf); +} + +static int output_short( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + const Set *output_fields, + const size_t highlight[2]) { + + int r; + const void *data; + size_t length, n = 0; + _cleanup_free_ char *hostname = NULL, *identifier = NULL, *comm = NULL, *pid = NULL, *fake_pid = NULL, + *message = NULL, *realtime = NULL, *monotonic = NULL, *priority = NULL, *transport = NULL, + *config_file = NULL, *unit = NULL, *user_unit = NULL, *documentation_url = NULL; + size_t hostname_len = 0, identifier_len = 0, comm_len = 0, pid_len = 0, fake_pid_len = 0, message_len = 0, + realtime_len = 0, monotonic_len = 0, priority_len = 0, transport_len = 0, config_file_len = 0, + unit_len = 0, user_unit_len = 0, documentation_url_len = 0; + int p = LOG_INFO; + bool ellipsized = false, audit; + const ParseFieldVec fields[] = { + PARSE_FIELD_VEC_ENTRY("_PID=", &pid, &pid_len), + PARSE_FIELD_VEC_ENTRY("_COMM=", &comm, &comm_len), + PARSE_FIELD_VEC_ENTRY("MESSAGE=", &message, &message_len), + PARSE_FIELD_VEC_ENTRY("PRIORITY=", &priority, &priority_len), + PARSE_FIELD_VEC_ENTRY("_TRANSPORT=", &transport, &transport_len), + PARSE_FIELD_VEC_ENTRY("_HOSTNAME=", &hostname, &hostname_len), + PARSE_FIELD_VEC_ENTRY("SYSLOG_PID=", &fake_pid, &fake_pid_len), + PARSE_FIELD_VEC_ENTRY("SYSLOG_IDENTIFIER=", &identifier, &identifier_len), + PARSE_FIELD_VEC_ENTRY("_SOURCE_REALTIME_TIMESTAMP=", &realtime, &realtime_len), + PARSE_FIELD_VEC_ENTRY("_SOURCE_MONOTONIC_TIMESTAMP=", &monotonic, &monotonic_len), + PARSE_FIELD_VEC_ENTRY("CONFIG_FILE=", &config_file, &config_file_len), + PARSE_FIELD_VEC_ENTRY("_SYSTEMD_UNIT=", &unit, &unit_len), + PARSE_FIELD_VEC_ENTRY("_SYSTEMD_USER_UNIT=", &user_unit, &user_unit_len), + PARSE_FIELD_VEC_ENTRY("DOCUMENTATION=", &documentation_url, &documentation_url_len), + }; + size_t highlight_shifted[] = {highlight ? highlight[0] : 0, highlight ? highlight[1] : 0}; + + assert(f); + assert(j); + + /* Set the threshold to one bigger than the actual print + * threshold, so that if the line is actually longer than what + * we're willing to print, ellipsization will occur. This way + * we won't output a misleading line without any indication of + * truncation. + */ + sd_journal_set_data_threshold(j, flags & (OUTPUT_SHOW_ALL|OUTPUT_FULL_WIDTH) ? 0 : PRINT_CHAR_THRESHOLD + 1); + + JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) { + r = parse_fieldv(data, length, fields, ELEMENTSOF(fields)); + if (r < 0) + return r; + } + if (r == -EBADMSG) { + log_debug_errno(r, "Skipping message we can't read: %m"); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to get journal fields: %m"); + + if (!message) { + log_debug("Skipping message without MESSAGE= field."); + return 0; + } + + if (!(flags & OUTPUT_SHOW_ALL)) + strip_tab_ansi(&message, &message_len, highlight_shifted); + + if (priority_len == 1 && *priority >= '0' && *priority <= '7') + p = *priority - '0'; + + audit = streq_ptr(transport, "audit"); + + if (mode == OUTPUT_SHORT_MONOTONIC) + r = output_timestamp_monotonic(f, j, monotonic); + else + r = output_timestamp_realtime(f, j, mode, flags, realtime); + if (r < 0) + return r; + n += r; + + if (flags & OUTPUT_NO_HOSTNAME) { + /* Suppress display of the hostname if this is requested. */ + hostname = mfree(hostname); + hostname_len = 0; + } + + if (hostname && shall_print(hostname, hostname_len, flags)) { + fprintf(f, " %.*s", (int) hostname_len, hostname); + n += hostname_len + 1; + } + + if (mode == OUTPUT_WITH_UNIT && ((unit && shall_print(unit, unit_len, flags)) || + (user_unit && shall_print(user_unit, user_unit_len, flags)))) { + if (unit) { + fprintf(f, " %.*s", (int) unit_len, unit); + n += unit_len + 1; + } + if (user_unit) { + if (unit) + fprintf(f, "/%.*s", (int) user_unit_len, user_unit); + else + fprintf(f, " %.*s", (int) user_unit_len, user_unit); + n += unit_len + 1; + } + } else if (identifier && shall_print(identifier, identifier_len, flags)) { + fprintf(f, " %.*s", (int) identifier_len, identifier); + n += identifier_len + 1; + } else if (comm && shall_print(comm, comm_len, flags)) { + fprintf(f, " %.*s", (int) comm_len, comm); + n += comm_len + 1; + } else + fputs(" unknown", f); + + if (pid && shall_print(pid, pid_len, flags)) { + fprintf(f, "[%.*s]", (int) pid_len, pid); + n += pid_len + 2; + } else if (fake_pid && shall_print(fake_pid, fake_pid_len, flags)) { + fprintf(f, "[%.*s]", (int) fake_pid_len, fake_pid); + n += fake_pid_len + 2; + } + + fputs(": ", f); + + if (urlify_enabled()) { + _cleanup_free_ char *c = NULL; + + /* Insert a hyperlink to a documentation URL before the message. Note that we don't make the + * whole message a hyperlink, since otherwise the whole screen might end up being just + * hyperlinks. Moreover, we want to be able to highlight parts of the message (such as the + * config file, see below) hence let's keep the documentation URL link separate. */ + + if (documentation_url && shall_print(documentation_url, documentation_url_len, flags)) { + c = strndup(documentation_url, documentation_url_len); + if (!c) + return log_oom(); + + if (!documentation_url_is_valid(c)) /* Eat up invalid links */ + c = mfree(c); + } + + if (!c) + (void) url_from_catalog(j, &c); /* Acquire from catalog if not embedded in log message itself */ + + if (c) { + _cleanup_free_ char *urlified = NULL; + + if (terminal_urlify(c, special_glyph(SPECIAL_GLYPH_EXTERNAL_LINK), &urlified) >= 0) { + fputs(urlified, f); + fputc(' ', f); + } + } + } + + if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(message, message_len)) { + char bytes[FORMAT_BYTES_MAX]; + fprintf(f, "[%s blob data]\n", format_bytes(bytes, sizeof(bytes), message_len)); + } else { + + /* URLify config_file string in message, if the message starts with it. + * Skip URLification if the highlighted pattern overlaps. */ + if (config_file && + message_len >= config_file_len && + memcmp(message, config_file, config_file_len) == 0 && + (message_len == config_file_len || IN_SET(message[config_file_len], ':', ' ')) && + (!highlight || highlight_shifted[0] == 0 || highlight_shifted[0] > config_file_len)) { + + _cleanup_free_ char *t = NULL, *urlified = NULL; + + t = strndup(config_file, config_file_len); + if (t && terminal_urlify_path(t, NULL, &urlified) >= 0) { + size_t urlified_len = strlen(urlified); + size_t shift = urlified_len - config_file_len; + char *joined; + + joined = realloc(urlified, message_len + shift); + if (joined) { + memcpy(joined + urlified_len, message + config_file_len, message_len - config_file_len); + free_and_replace(message, joined); + TAKE_PTR(urlified); + message_len += shift; + if (highlight) { + highlight_shifted[0] += shift; + highlight_shifted[1] += shift; + } + } + } + } + + ellipsized |= + print_multiline(f, n + 2, n_columns, flags, p, audit, + message, message_len, + highlight_shifted); + } + + if (flags & OUTPUT_CATALOG) + (void) print_catalog(f, j); + + return ellipsized; +} + +static int output_verbose( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + const Set *output_fields, + const size_t highlight[2]) { + + const void *data; + size_t length; + _cleanup_free_ char *cursor = NULL; + uint64_t realtime = 0; + char ts[FORMAT_TIMESTAMP_MAX + 7]; + const char *timestamp; + int r; + + assert(f); + assert(j); + + sd_journal_set_data_threshold(j, 0); + + r = sd_journal_get_data(j, "_SOURCE_REALTIME_TIMESTAMP", &data, &length); + if (r == -ENOENT) + log_debug("Source realtime timestamp not found"); + else if (r < 0) + return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get source realtime timestamp: %m"); + else { + _cleanup_free_ char *value = NULL; + + r = parse_field(data, length, "_SOURCE_REALTIME_TIMESTAMP=", + STRLEN("_SOURCE_REALTIME_TIMESTAMP="), &value, + NULL); + if (r < 0) + return r; + assert(r > 0); + + r = safe_atou64(value, &realtime); + if (r < 0) + log_debug_errno(r, "Failed to parse realtime timestamp: %m"); + } + + if (r < 0) { + r = sd_journal_get_realtime_usec(j, &realtime); + if (r < 0) + return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get realtime timestamp: %m"); + } + + r = sd_journal_get_cursor(j, &cursor); + if (r < 0) + return log_error_errno(r, "Failed to get cursor: %m"); + + timestamp = format_timestamp_style(ts, sizeof ts, realtime, + flags & OUTPUT_UTC ? TIMESTAMP_US_UTC : TIMESTAMP_US); + fprintf(f, "%s [%s]\n", + timestamp ?: "(no timestamp)", + cursor); + + JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) { + const char *c, *p; + int fieldlen; + const char *on = "", *off = ""; + _cleanup_free_ char *urlified = NULL; + size_t valuelen; + + c = memchr(data, '=', length); + if (!c) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field."); + + fieldlen = c - (const char*) data; + if (!journal_field_valid(data, fieldlen, true)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field."); + + r = field_set_test(output_fields, data, fieldlen); + if (r < 0) + return r; + if (r == 0) + continue; + + valuelen = length - 1 - fieldlen; + + if ((flags & OUTPUT_COLOR) && (p = startswith(data, "MESSAGE="))) { + on = ANSI_HIGHLIGHT; + off = ANSI_NORMAL; + } else if ((p = startswith(data, "CONFIG_FILE="))) { + if (terminal_urlify_path(p, NULL, &urlified) >= 0) { + p = urlified; + valuelen = strlen(urlified); + } + } else + p = c + 1; + + if ((flags & OUTPUT_SHOW_ALL) || + (((length < PRINT_CHAR_THRESHOLD) || flags & OUTPUT_FULL_WIDTH) + && utf8_is_printable(data, length))) { + fprintf(f, " %s%.*s=", on, fieldlen, (const char*)data); + print_multiline(f, 4 + fieldlen + 1, 0, OUTPUT_FULL_WIDTH, 0, false, + p, valuelen, + NULL); + fputs(off, f); + } else { + char bytes[FORMAT_BYTES_MAX]; + + fprintf(f, " %s%.*s=[%s blob data]%s\n", + on, + (int) (c - (const char*) data), + (const char*) data, + format_bytes(bytes, sizeof(bytes), length - (c - (const char *) data) - 1), + off); + } + } + + if (r < 0) + return r; + + if (flags & OUTPUT_CATALOG) + (void) print_catalog(f, j); + + return 0; +} + +static int output_export( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + const Set *output_fields, + const size_t highlight[2]) { + + sd_id128_t boot_id; + char sid[SD_ID128_STRING_MAX]; + int r; + usec_t realtime, monotonic; + _cleanup_free_ char *cursor = NULL; + const void *data; + size_t length; + + assert(j); + + sd_journal_set_data_threshold(j, 0); + + r = sd_journal_get_realtime_usec(j, &realtime); + if (r < 0) + return log_error_errno(r, "Failed to get realtime timestamp: %m"); + + r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get monotonic timestamp: %m"); + + r = sd_journal_get_cursor(j, &cursor); + if (r < 0) + return log_error_errno(r, "Failed to get cursor: %m"); + + fprintf(f, + "__CURSOR=%s\n" + "__REALTIME_TIMESTAMP="USEC_FMT"\n" + "__MONOTONIC_TIMESTAMP="USEC_FMT"\n" + "_BOOT_ID=%s\n", + cursor, + realtime, + monotonic, + sd_id128_to_string(boot_id, sid)); + + JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) { + size_t fieldlen; + const char *c; + + /* We already printed the boot id from the data in the header, hence let's suppress it here */ + if (memory_startswith(data, length, "_BOOT_ID=")) + continue; + + c = memchr(data, '=', length); + if (!c) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field."); + + fieldlen = c - (const char*) data; + if (!journal_field_valid(data, fieldlen, true)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field."); + + r = field_set_test(output_fields, data, fieldlen); + if (r < 0) + return r; + if (!r) + continue; + + if (utf8_is_printable_newline(data, length, false)) + fwrite(data, length, 1, f); + else { + uint64_t le64; + + fwrite(data, fieldlen, 1, f); + fputc('\n', f); + le64 = htole64(length - fieldlen - 1); + fwrite(&le64, sizeof(le64), 1, f); + fwrite(c + 1, length - fieldlen - 1, 1, f); + } + + fputc('\n', f); + } + if (r == -EBADMSG) { + log_debug_errno(r, "Skipping message we can't read: %m"); + return 0; + } + + if (r < 0) + return r; + + fputc('\n', f); + + return 0; +} + +void json_escape( + FILE *f, + const char* p, + size_t l, + OutputFlags flags) { + + assert(f); + assert(p); + + if (!(flags & OUTPUT_SHOW_ALL) && l >= JSON_THRESHOLD) + fputs("null", f); + + else if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(p, l)) { + bool not_first = false; + + fputs("[ ", f); + + while (l > 0) { + if (not_first) + fprintf(f, ", %u", (uint8_t) *p); + else { + not_first = true; + fprintf(f, "%u", (uint8_t) *p); + } + + p++; + l--; + } + + fputs(" ]", f); + } else { + fputc('"', f); + + while (l > 0) { + if (IN_SET(*p, '"', '\\')) { + fputc('\\', f); + fputc(*p, f); + } else if (*p == '\n') + fputs("\\n", f); + else if ((uint8_t) *p < ' ') + fprintf(f, "\\u%04x", (uint8_t) *p); + else + fputc(*p, f); + + p++; + l--; + } + + fputc('"', f); + } +} + +struct json_data { + JsonVariant* name; + size_t n_values; + JsonVariant* values[]; +}; + +static int update_json_data( + Hashmap *h, + OutputFlags flags, + const char *name, + const void *value, + size_t size) { + + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + struct json_data *d; + int r; + + if (!(flags & OUTPUT_SHOW_ALL) && strlen(name) + 1 + size >= JSON_THRESHOLD) + r = json_variant_new_null(&v); + else if (utf8_is_printable(value, size)) + r = json_variant_new_stringn(&v, value, size); + else + r = json_variant_new_array_bytes(&v, value, size); + if (r < 0) + return log_error_errno(r, "Failed to allocate JSON data: %m"); + + d = hashmap_get(h, name); + if (d) { + struct json_data *w; + + w = realloc(d, offsetof(struct json_data, values) + sizeof(JsonVariant*) * (d->n_values + 1)); + if (!w) + return log_oom(); + + d = w; + assert_se(hashmap_update(h, json_variant_string(d->name), d) >= 0); + } else { + _cleanup_(json_variant_unrefp) JsonVariant *n = NULL; + + r = json_variant_new_string(&n, name); + if (r < 0) + return log_error_errno(r, "Failed to allocate JSON name variant: %m"); + + d = malloc0(offsetof(struct json_data, values) + sizeof(JsonVariant*)); + if (!d) + return log_oom(); + + r = hashmap_put(h, json_variant_string(n), d); + if (r < 0) { + free(d); + return log_error_errno(r, "Failed to insert JSON name into hashmap: %m"); + } + + d->name = TAKE_PTR(n); + } + + d->values[d->n_values++] = TAKE_PTR(v); + return 0; +} + +static int update_json_data_split( + Hashmap *h, + OutputFlags flags, + const Set *output_fields, + const void *data, + size_t size) { + + size_t fieldlen; + const char *eq; + char *name; + + assert(h); + assert(data || size == 0); + + if (memory_startswith(data, size, "_BOOT_ID=")) + return 0; + + eq = memchr(data, '=', MIN(size, JSON_THRESHOLD)); + if (!eq) + return 0; + + fieldlen = eq - (const char*) data; + if (!journal_field_valid(data, fieldlen, true)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field."); + + name = strndupa(data, fieldlen); + if (output_fields && !set_contains(output_fields, name)) + return 0; + + return update_json_data(h, flags, name, eq + 1, size - fieldlen - 1); +} + +static int output_json( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + const Set *output_fields, + const size_t highlight[2]) { + + char sid[SD_ID128_STRING_MAX], usecbuf[DECIMAL_STR_MAX(usec_t)]; + _cleanup_(json_variant_unrefp) JsonVariant *object = NULL; + _cleanup_free_ char *cursor = NULL; + uint64_t realtime, monotonic; + JsonVariant **array = NULL; + struct json_data *d; + sd_id128_t boot_id; + Hashmap *h = NULL; + size_t n = 0; + int r; + + assert(j); + + (void) sd_journal_set_data_threshold(j, flags & OUTPUT_SHOW_ALL ? 0 : JSON_THRESHOLD); + + r = sd_journal_get_realtime_usec(j, &realtime); + if (r < 0) + return log_error_errno(r, "Failed to get realtime timestamp: %m"); + + r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get monotonic timestamp: %m"); + + r = sd_journal_get_cursor(j, &cursor); + if (r < 0) + return log_error_errno(r, "Failed to get cursor: %m"); + + h = hashmap_new(&string_hash_ops); + if (!h) + return log_oom(); + + r = update_json_data(h, flags, "__CURSOR", cursor, strlen(cursor)); + if (r < 0) + goto finish; + + xsprintf(usecbuf, USEC_FMT, realtime); + r = update_json_data(h, flags, "__REALTIME_TIMESTAMP", usecbuf, strlen(usecbuf)); + if (r < 0) + goto finish; + + xsprintf(usecbuf, USEC_FMT, monotonic); + r = update_json_data(h, flags, "__MONOTONIC_TIMESTAMP", usecbuf, strlen(usecbuf)); + if (r < 0) + goto finish; + + sd_id128_to_string(boot_id, sid); + r = update_json_data(h, flags, "_BOOT_ID", sid, strlen(sid)); + if (r < 0) + goto finish; + + for (;;) { + const void *data; + size_t size; + + r = sd_journal_enumerate_data(j, &data, &size); + if (r == -EBADMSG) { + log_debug_errno(r, "Skipping message we can't read: %m"); + r = 0; + goto finish; + } + if (r < 0) { + log_error_errno(r, "Failed to read journal: %m"); + goto finish; + } + if (r == 0) + break; + + r = update_json_data_split(h, flags, output_fields, data, size); + if (r < 0) + goto finish; + } + + array = new(JsonVariant*, hashmap_size(h)*2); + if (!array) { + r = log_oom(); + goto finish; + } + + HASHMAP_FOREACH(d, h) { + assert(d->n_values > 0); + + array[n++] = json_variant_ref(d->name); + + if (d->n_values == 1) + array[n++] = json_variant_ref(d->values[0]); + else { + _cleanup_(json_variant_unrefp) JsonVariant *q = NULL; + + r = json_variant_new_array(&q, d->values, d->n_values); + if (r < 0) { + log_error_errno(r, "Failed to create JSON array: %m"); + goto finish; + } + + array[n++] = TAKE_PTR(q); + } + } + + r = json_variant_new_object(&object, array, n); + if (r < 0) { + log_error_errno(r, "Failed to allocate JSON object: %m"); + goto finish; + } + + json_variant_dump(object, + output_mode_to_json_format_flags(mode) | + (FLAGS_SET(flags, OUTPUT_COLOR) ? JSON_FORMAT_COLOR : 0), + f, NULL); + + r = 0; + +finish: + while ((d = hashmap_steal_first(h))) { + size_t k; + + json_variant_unref(d->name); + for (k = 0; k < d->n_values; k++) + json_variant_unref(d->values[k]); + + free(d); + } + + hashmap_free(h); + + json_variant_unref_many(array, n); + free(array); + + return r; +} + +static int output_cat_field( + FILE *f, + sd_journal *j, + OutputFlags flags, + int prio, + const char *field, + const size_t highlight[2]) { + + const char *color_on = "", *color_off = "", *highlight_on = ""; + const void *data; + size_t l, fl; + int r; + + if (FLAGS_SET(flags, OUTPUT_COLOR)) + get_log_colors(prio, &color_on, &color_off, &highlight_on); + + r = sd_journal_get_data(j, field, &data, &l); + if (r == -EBADMSG) { + log_debug_errno(r, "Skipping message we can't read: %m"); + return 0; + } + if (r == -ENOENT) /* An entry without the requested field */ + return 0; + if (r < 0) + return log_error_errno(r, "Failed to get data: %m"); + + fl = strlen(field); + assert(l >= fl + 1); + assert(((char*) data)[fl] == '='); + + data = (const uint8_t*) data + fl + 1; + l -= fl + 1; + + if (FLAGS_SET(flags, OUTPUT_COLOR)) { + if (highlight) { + assert(highlight[0] <= highlight[1]); + assert(highlight[1] <= l); + + fputs(color_on, f); + fwrite((const char*) data, 1, highlight[0], f); + fputs(highlight_on, f); + fwrite((const char*) data + highlight[0], 1, highlight[1] - highlight[0], f); + fputs(color_on, f); + fwrite((const char*) data + highlight[1], 1, l - highlight[1], f); + fputs(color_off, f); + } else { + fputs(color_on, f); + fwrite((const char*) data, 1, l, f); + fputs(color_off, f); + } + } else + fwrite((const char*) data, 1, l, f); + + fputc('\n', f); + return 0; +} + +static int output_cat( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + const Set *output_fields, + const size_t highlight[2]) { + + int r, prio = LOG_INFO; + const char *field; + + assert(j); + assert(f); + + (void) sd_journal_set_data_threshold(j, 0); + + if (FLAGS_SET(flags, OUTPUT_COLOR)) { + const void *data; + size_t l; + + /* Determine priority of this entry, so that we can color it nicely */ + + r = sd_journal_get_data(j, "PRIORITY", &data, &l); + if (r == -EBADMSG) { + log_debug_errno(r, "Skipping message we can't read: %m"); + return 0; + } + if (r < 0) { + if (r != -ENOENT) + return log_error_errno(r, "Failed to get data: %m"); + + /* An entry without PRIORITY */ + } else if (l == 10 && memcmp(data, "PRIORITY=", 9) == 0) { + char c = ((char*) data)[9]; + + if (c >= '0' && c <= '7') + prio = c - '0'; + } + } + + if (set_isempty(output_fields)) + return output_cat_field(f, j, flags, prio, "MESSAGE", highlight); + + SET_FOREACH(field, output_fields) { + r = output_cat_field(f, j, flags, prio, field, streq(field, "MESSAGE") ? highlight : NULL); + if (r < 0) + return r; + } + + return 0; +} + +static int (*output_funcs[_OUTPUT_MODE_MAX])( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + const Set *output_fields, + const size_t highlight[2]) = { + + [OUTPUT_SHORT] = output_short, + [OUTPUT_SHORT_ISO] = output_short, + [OUTPUT_SHORT_ISO_PRECISE] = output_short, + [OUTPUT_SHORT_PRECISE] = output_short, + [OUTPUT_SHORT_MONOTONIC] = output_short, + [OUTPUT_SHORT_UNIX] = output_short, + [OUTPUT_SHORT_FULL] = output_short, + [OUTPUT_VERBOSE] = output_verbose, + [OUTPUT_EXPORT] = output_export, + [OUTPUT_JSON] = output_json, + [OUTPUT_JSON_PRETTY] = output_json, + [OUTPUT_JSON_SSE] = output_json, + [OUTPUT_JSON_SEQ] = output_json, + [OUTPUT_CAT] = output_cat, + [OUTPUT_WITH_UNIT] = output_short, +}; + +int show_journal_entry( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + char **output_fields, + const size_t highlight[2], + bool *ellipsized) { + + _cleanup_set_free_ Set *fields = NULL; + int r; + + assert(mode >= 0); + assert(mode < _OUTPUT_MODE_MAX); + + if (n_columns <= 0) + n_columns = columns(); + + r = set_put_strdupv(&fields, output_fields); + if (r < 0) + return r; + + r = output_funcs[mode](f, j, mode, n_columns, flags, fields, highlight); + + if (ellipsized && r > 0) + *ellipsized = true; + + return r; +} + +static int maybe_print_begin_newline(FILE *f, OutputFlags *flags) { + assert(f); + assert(flags); + + if (!(*flags & OUTPUT_BEGIN_NEWLINE)) + return 0; + + /* Print a beginning new line if that's request, but only once + * on the first line we print. */ + + fputc('\n', f); + *flags &= ~OUTPUT_BEGIN_NEWLINE; + return 0; +} + +int show_journal( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + usec_t not_before, + unsigned how_many, + OutputFlags flags, + bool *ellipsized) { + + int r; + unsigned line = 0; + bool need_seek = false; + int warn_cutoff = flags & OUTPUT_WARN_CUTOFF; + + assert(j); + assert(mode >= 0); + assert(mode < _OUTPUT_MODE_MAX); + + if (how_many == (unsigned) -1) + need_seek = true; + else { + /* Seek to end */ + r = sd_journal_seek_tail(j); + if (r < 0) + return log_error_errno(r, "Failed to seek to tail: %m"); + + r = sd_journal_previous_skip(j, how_many); + if (r < 0) + return log_error_errno(r, "Failed to skip previous: %m"); + } + + for (;;) { + usec_t usec; + + if (need_seek) { + r = sd_journal_next(j); + if (r < 0) + return log_error_errno(r, "Failed to iterate through journal: %m"); + } + + if (r == 0) + break; + + need_seek = true; + + if (not_before > 0) { + r = sd_journal_get_monotonic_usec(j, &usec, NULL); + + /* -ESTALE is returned if the timestamp is not from this boot */ + if (r == -ESTALE) + continue; + else if (r < 0) + return log_error_errno(r, "Failed to get journal time: %m"); + + if (usec < not_before) + continue; + } + + line++; + maybe_print_begin_newline(f, &flags); + + r = show_journal_entry(f, j, mode, n_columns, flags, NULL, NULL, ellipsized); + if (r < 0) + return r; + } + + if (warn_cutoff && line < how_many && not_before > 0) { + sd_id128_t boot_id; + usec_t cutoff = 0; + + /* Check whether the cutoff line is too early */ + + r = sd_id128_get_boot(&boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get boot id: %m"); + + r = sd_journal_get_cutoff_monotonic_usec(j, boot_id, &cutoff, NULL); + if (r < 0) + return log_error_errno(r, "Failed to get journal cutoff time: %m"); + + if (r > 0 && not_before < cutoff) { + maybe_print_begin_newline(f, &flags); + + /* If we logged *something* and no permission error happened, than we can reliably + * emit the warning about rotation. If we didn't log anything and access errors + * happened, emit hint about permissions. Otherwise, give a generic message, since we + * can't diagnose the issue. */ + + bool noaccess = journal_access_blocked(j); + + if (line == 0 && noaccess) + fprintf(f, "Warning: some journal files were not opened due to insufficient permissions."); + else if (!noaccess) + fprintf(f, "Warning: journal has been rotated since unit was started, output may be incomplete.\n"); + else + fprintf(f, "Warning: journal has been rotated since unit was started and some journal " + "files were not opened due to insufficient permissions, output may be incomplete.\n"); + } + + warn_cutoff = false; + } + + return 0; +} + +int add_matches_for_unit(sd_journal *j, const char *unit) { + const char *m1, *m2, *m3, *m4; + int r; + + assert(j); + assert(unit); + + m1 = strjoina("_SYSTEMD_UNIT=", unit); + m2 = strjoina("COREDUMP_UNIT=", unit); + m3 = strjoina("UNIT=", unit); + m4 = strjoina("OBJECT_SYSTEMD_UNIT=", unit); + + (void)( + /* Look for messages from the service itself */ + (r = sd_journal_add_match(j, m1, 0)) || + + /* Look for coredumps of the service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, "MESSAGE_ID=fc2e22bc6ee647b6b90729ab34a250b1", 0)) || + (r = sd_journal_add_match(j, "_UID=0", 0)) || + (r = sd_journal_add_match(j, m2, 0)) || + + /* Look for messages from PID 1 about this service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, "_PID=1", 0)) || + (r = sd_journal_add_match(j, m3, 0)) || + + /* Look for messages from authorized daemons about this service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, "_UID=0", 0)) || + (r = sd_journal_add_match(j, m4, 0)) + ); + + if (r == 0 && endswith(unit, ".slice")) { + const char *m5; + + m5 = strjoina("_SYSTEMD_SLICE=", unit); + + /* Show all messages belonging to a slice */ + (void)( + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, m5, 0)) + ); + } + + return r; +} + +int add_matches_for_user_unit(sd_journal *j, const char *unit, uid_t uid) { + int r; + char *m1, *m2, *m3, *m4; + char muid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)]; + + assert(j); + assert(unit); + + m1 = strjoina("_SYSTEMD_USER_UNIT=", unit); + m2 = strjoina("USER_UNIT=", unit); + m3 = strjoina("COREDUMP_USER_UNIT=", unit); + m4 = strjoina("OBJECT_SYSTEMD_USER_UNIT=", unit); + sprintf(muid, "_UID="UID_FMT, uid); + + (void) ( + /* Look for messages from the user service itself */ + (r = sd_journal_add_match(j, m1, 0)) || + (r = sd_journal_add_match(j, muid, 0)) || + + /* Look for messages from systemd about this service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, m2, 0)) || + (r = sd_journal_add_match(j, muid, 0)) || + + /* Look for coredumps of the service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, m3, 0)) || + (r = sd_journal_add_match(j, muid, 0)) || + (r = sd_journal_add_match(j, "_UID=0", 0)) || + + /* Look for messages from authorized daemons about this service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, m4, 0)) || + (r = sd_journal_add_match(j, muid, 0)) || + (r = sd_journal_add_match(j, "_UID=0", 0)) + ); + + if (r == 0 && endswith(unit, ".slice")) { + const char *m5; + + m5 = strjoina("_SYSTEMD_SLICE=", unit); + + /* Show all messages belonging to a slice */ + (void)( + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, m5, 0)) || + (r = sd_journal_add_match(j, muid, 0)) + ); + } + + return r; +} + +static int get_boot_id_for_machine(const char *machine, sd_id128_t *boot_id) { + _cleanup_close_pair_ int pair[2] = { -1, -1 }; + _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, rootfd = -1; + char buf[ID128_UUID_STRING_MAX]; + pid_t pid, child; + ssize_t k; + int r; + + assert(machine); + assert(boot_id); + + if (!machine_name_is_valid(machine)) + return -EINVAL; + + r = container_get_leader(machine, &pid); + if (r < 0) + return r; + + r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, NULL, &rootfd); + if (r < 0) + return r; + + if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0) + return -errno; + + r = namespace_fork("(sd-bootidns)", "(sd-bootid)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG, + pidnsfd, mntnsfd, -1, -1, rootfd, &child); + if (r < 0) + return r; + if (r == 0) { + int fd; + + pair[0] = safe_close(pair[0]); + + fd = open("/proc/sys/kernel/random/boot_id", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + _exit(EXIT_FAILURE); + + r = loop_read_exact(fd, buf, 36, false); + safe_close(fd); + if (r < 0) + _exit(EXIT_FAILURE); + + k = send(pair[1], buf, 36, MSG_NOSIGNAL); + if (k != 36) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + pair[1] = safe_close(pair[1]); + + r = wait_for_terminate_and_check("(sd-bootidns)", child, 0); + if (r < 0) + return r; + if (r != EXIT_SUCCESS) + return -EIO; + + k = recv(pair[0], buf, 36, 0); + if (k != 36) + return -EIO; + + buf[36] = 0; + r = sd_id128_from_string(buf, boot_id); + if (r < 0) + return r; + + return 0; +} + +int add_match_this_boot(sd_journal *j, const char *machine) { + char match[9+32+1] = "_BOOT_ID="; + sd_id128_t boot_id; + int r; + + assert(j); + + if (machine) { + r = get_boot_id_for_machine(machine, &boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get boot id of container %s: %m", machine); + } else { + r = sd_id128_get_boot(&boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get boot id: %m"); + } + + sd_id128_to_string(boot_id, match + 9); + r = sd_journal_add_match(j, match, strlen(match)); + if (r < 0) + return log_error_errno(r, "Failed to add match: %m"); + + r = sd_journal_add_conjunction(j); + if (r < 0) + return log_error_errno(r, "Failed to add conjunction: %m"); + + return 0; +} + +int show_journal_by_unit( + FILE *f, + const char *unit, + const char *log_namespace, + OutputMode mode, + unsigned n_columns, + usec_t not_before, + unsigned how_many, + uid_t uid, + OutputFlags flags, + int journal_open_flags, + bool system_unit, + bool *ellipsized) { + + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + int r; + + assert(mode >= 0); + assert(mode < _OUTPUT_MODE_MAX); + assert(unit); + + if (how_many <= 0) + return 0; + + r = sd_journal_open_namespace(&j, log_namespace, journal_open_flags | SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE); + if (r < 0) + return log_error_errno(r, "Failed to open journal: %m"); + + r = add_match_this_boot(j, NULL); + if (r < 0) + return r; + + if (system_unit) + r = add_matches_for_unit(j, unit); + else + r = add_matches_for_user_unit(j, unit, uid); + if (r < 0) + return log_error_errno(r, "Failed to add unit matches: %m"); + + if (DEBUG_LOGGING) { + _cleanup_free_ char *filter; + + filter = journal_make_match_string(j); + if (!filter) + return log_oom(); + + log_debug("Journal filter: %s", filter); + } + + return show_journal(f, j, mode, n_columns, not_before, how_many, flags, ellipsized); +} diff --git a/src/shared/logs-show.h b/src/shared/logs-show.h new file mode 100644 index 0000000..71ebe13 --- /dev/null +++ b/src/shared/logs-show.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <sys/types.h> + +#include "sd-journal.h" + +#include "macro.h" +#include "output-mode.h" +#include "time-util.h" +#include "util.h" + +int show_journal_entry( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + char **output_fields, + const size_t highlight[2], + bool *ellipsized); +int show_journal( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + usec_t not_before, + unsigned how_many, + OutputFlags flags, + bool *ellipsized); + +int add_match_this_boot(sd_journal *j, const char *machine); + +int add_matches_for_unit( + sd_journal *j, + const char *unit); + +int add_matches_for_user_unit( + sd_journal *j, + const char *unit, + uid_t uid); + +int show_journal_by_unit( + FILE *f, + const char *unit, + const char *namespace, + OutputMode mode, + unsigned n_columns, + usec_t not_before, + unsigned how_many, + uid_t uid, + OutputFlags flags, + int journal_open_flags, + bool system_unit, + bool *ellipsized); + +void json_escape( + FILE *f, + const char* p, + size_t l, + OutputFlags flags); diff --git a/src/shared/loop-util.c b/src/shared/loop-util.c new file mode 100644 index 0000000..84f415a --- /dev/null +++ b/src/shared/loop-util.c @@ -0,0 +1,722 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#if HAVE_VALGRIND_MEMCHECK_H +#include <valgrind/memcheck.h> +#endif + +#include <errno.h> +#include <fcntl.h> +#include <linux/blkpg.h> +#include <linux/fs.h> +#include <linux/loop.h> +#include <sys/file.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include "sd-device.h" + +#include "alloc-util.h" +#include "blockdev-util.h" +#include "device-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "loop-util.h" +#include "missing_loop.h" +#include "parse-util.h" +#include "random-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "tmpfile-util.h" + +static void cleanup_clear_loop_close(int *fd) { + if (*fd < 0) + return; + + (void) ioctl(*fd, LOOP_CLR_FD); + (void) safe_close(*fd); +} + +static int loop_is_bound(int fd) { + struct loop_info64 info; + + assert(fd >= 0); + + if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0) { + if (errno == ENXIO) + return false; /* not bound! */ + + return -errno; + } + + return true; /* bound! */ +} + +static int device_has_block_children(sd_device *d) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + const char *main_sn, *main_ss; + sd_device *q; + int r; + + assert(d); + + /* Checks if the specified device currently has block device children (i.e. partition block + * devices). */ + + r = sd_device_get_sysname(d, &main_sn); + if (r < 0) + return r; + + r = sd_device_get_subsystem(d, &main_ss); + if (r < 0) + return r; + + if (!streq(main_ss, "block")) + return -EINVAL; + + r = sd_device_enumerator_new(&e); + if (r < 0) + return r; + + r = sd_device_enumerator_allow_uninitialized(e); + if (r < 0) + return r; + + r = sd_device_enumerator_add_match_parent(e, d); + if (r < 0) + return r; + + FOREACH_DEVICE(e, q) { + const char *ss, *sn; + + r = sd_device_get_subsystem(q, &ss); + if (r < 0) + continue; + + if (!streq(ss, "block")) + continue; + + r = sd_device_get_sysname(q, &sn); + if (r < 0) + continue; + + if (streq(sn, main_sn)) + continue; + + return 1; /* we have block device children */ + } + + return 0; +} + +static int loop_configure( + int fd, + int nr, + const struct loop_config *c, + bool *try_loop_configure) { + + _cleanup_(sd_device_unrefp) sd_device *d = NULL; + _cleanup_free_ char *sysname = NULL; + _cleanup_close_ int lock_fd = -1; + int r; + + assert(fd >= 0); + assert(nr >= 0); + assert(c); + assert(try_loop_configure); + + if (asprintf(&sysname, "loop%i", nr) < 0) + return -ENOMEM; + + r = sd_device_new_from_subsystem_sysname(&d, "block", sysname); + if (r < 0) + return r; + + /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened + * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on + * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure + * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a + * long time udev would possibly never run on it again, even though the fd is unlocked, simply + * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to + * automatically release the lock, after we are done. */ + lock_fd = fd_reopen(fd, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY); + if (lock_fd < 0) + return lock_fd; + if (flock(lock_fd, LOCK_EX) < 0) + return -errno; + + /* Let's see if the device is really detached, i.e. currently has no associated partition block + * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that + * superficially is detached but still has partition block devices associated for it. They only go + * away when the device is reattached. (Yes, LOOP_CLR_FD doesn't work then, because officially + * nothing is attached and LOOP_CTL_REMOVE doesn't either, since it doesn't care about partition + * block devices. */ + r = device_has_block_children(d); + if (r < 0) + return r; + if (r > 0) { + r = loop_is_bound(fd); + if (r < 0) + return r; + if (r > 0) + return -EBUSY; + + return -EUCLEAN; /* Bound but children? Tell caller to reattach something so that the + * partition block devices are gone too. */ + } + + if (*try_loop_configure) { + if (ioctl(fd, LOOP_CONFIGURE, c) < 0) { + /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other + * errors. Note that the kernel is weird: non-existing ioctls currently return EINVAL + * rather than ENOTTY on loopback block devices. They should fix that in the kernel, + * but in the meantime we accept both here. */ + if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL) + return -errno; + + *try_loop_configure = false; + } else { + bool good = true; + + if (c->info.lo_sizelimit != 0) { + /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the + * block device. If it's used, let's immediately check if it had the desired + * effect hence. And if not use classic LOOP_SET_STATUS64. */ + uint64_t z; + + if (ioctl(fd, BLKGETSIZE64, &z) < 0) { + r = -errno; + goto fail; + } + + if (z != c->info.lo_sizelimit) { + log_debug("LOOP_CONFIGURE is broken, doesn't honour .lo_sizelimit. Falling back to LOOP_SET_STATUS64."); + good = false; + } + } + + if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN)) { + /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag + * into the block device. Let's hence verify if things work correctly here + * before returning. */ + + r = blockdev_partscan_enabled(fd); + if (r < 0) + goto fail; + if (r == 0) { + log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64."); + good = false; + } + } + + if (!good) { + /* LOOP_CONFIGURE doesn't work. Remember that. */ + *try_loop_configure = false; + + /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD, + * because LOOP_CLR_FD is async: if the operation cannot be executed right + * away it just sets the autoclear flag on the device. This means there's a + * good chance we cannot actually reuse the loopback device right-away. Hence + * let's assume it's busy, avoid the trouble and let the calling loop call us + * again with a new, likely unused device. */ + r = -EBUSY; + goto fail; + } + + return 0; + } + } + + /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64 + * ioctl can return EAGAIN in case we change the lo_offset field, if someone else is accessing the + * block device while we try to reconfigure it. This is a pretty common case, since udev might + * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways: + * first, let's take the BSD lock to ensure that udev will not step in between the point in + * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on + * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms + * needlessly if we are just racing against udev. The latter is protection against all other cases, + * i.e. peers that do not take the BSD lock. */ + + if (ioctl(fd, LOOP_SET_FD, c->fd) < 0) + return -errno; + + for (unsigned n_attempts = 0;;) { + if (ioctl(fd, LOOP_SET_STATUS64, &c->info) >= 0) + break; + if (errno != EAGAIN || ++n_attempts >= 64) { + r = log_debug_errno(errno, "Failed to configure loopback device: %m"); + goto fail; + } + + /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more + * failed attempts we see */ + (void) usleep(UINT64_C(10) * USEC_PER_MSEC + + random_u64() % (UINT64_C(240) * USEC_PER_MSEC * n_attempts/64)); + } + + return 0; + +fail: + (void) ioctl(fd, LOOP_CLR_FD); + return r; +} + +static int attach_empty_file(int loop, int nr) { + _cleanup_close_ int fd = -1; + + /* So here's the thing: on various kernels (5.8 at least) loop block devices might enter a state + * where they are detached but nonetheless have partitions, when used heavily. Accessing these + * partitions results in immediatey IO errors. There's no pretty way to get rid of them + * again. Neither LOOP_CLR_FD nor LOOP_CTL_REMOVE suffice (see above). What does work is to + * reassociate them with a new fd however. This is what we do here hence: we associate the devices + * with an empty file (i.e. an image that definitely has no partitions). We then immediately clear it + * again. This suffices to make the partitions go away. Ugly but appears to work. */ + + log_debug("Found unattached loopback block device /dev/loop%i with partitions. Attaching empty file to remove them.", nr); + + fd = open_tmpfile_unlinkable(NULL, O_RDONLY); + if (fd < 0) + return fd; + + if (flock(loop, LOCK_EX) < 0) + return -errno; + + if (ioctl(loop, LOOP_SET_FD, fd) < 0) + return -errno; + + if (ioctl(loop, LOOP_SET_STATUS64, &(struct loop_info64) { + .lo_flags = LO_FLAGS_READ_ONLY| + LO_FLAGS_AUTOCLEAR| + LO_FLAGS_PARTSCAN, /* enable partscan, so that the partitions really go away */ + }) < 0) + return -errno; + + if (ioctl(loop, LOOP_CLR_FD) < 0) + return -errno; + + /* The caller is expected to immediately close the loopback device after this, so that the BSD lock + * is released, and udev sees the changes. */ + return 0; +} + +int loop_device_make( + int fd, + int open_flags, + uint64_t offset, + uint64_t size, + uint32_t loop_flags, + LoopDevice **ret) { + + _cleanup_free_ char *loopdev = NULL; + bool try_loop_configure = true; + struct loop_config config; + LoopDevice *d = NULL; + struct stat st; + int nr = -1, r; + + assert(fd >= 0); + assert(ret); + assert(IN_SET(open_flags, O_RDWR, O_RDONLY)); + + if (fstat(fd, &st) < 0) + return -errno; + + if (S_ISBLK(st.st_mode)) { + if (ioctl(fd, LOOP_GET_STATUS64, &config.info) >= 0) { + /* Oh! This is a loopback device? That's interesting! */ + +#if HAVE_VALGRIND_MEMCHECK_H + /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */ + VALGRIND_MAKE_MEM_DEFINED(&config.info, sizeof(config.info)); +#endif + nr = config.info.lo_number; + + if (asprintf(&loopdev, "/dev/loop%i", nr) < 0) + return -ENOMEM; + } + + if (offset == 0 && IN_SET(size, 0, UINT64_MAX)) { + _cleanup_close_ int copy = -1; + + /* If this is already a block device, store a copy of the fd as it is */ + + copy = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (copy < 0) + return -errno; + + d = new(LoopDevice, 1); + if (!d) + return -ENOMEM; + *d = (LoopDevice) { + .fd = TAKE_FD(copy), + .nr = nr, + .node = TAKE_PTR(loopdev), + .relinquished = true, /* It's not allocated by us, don't destroy it when this object is freed */ + }; + + *ret = d; + return d->fd; + } + } else { + r = stat_verify_regular(&st); + if (r < 0) + return r; + } + + _cleanup_close_ int control = -1; + _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -1; + + control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); + if (control < 0) + return -errno; + + config = (struct loop_config) { + .fd = fd, + .info = { + /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */ + .lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((open_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR, + .lo_offset = offset, + .lo_sizelimit = size == UINT64_MAX ? 0 : size, + }, + }; + + /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might + * be gone already, taken by somebody else racing against us. */ + for (unsigned n_attempts = 0;;) { + _cleanup_close_ int loop = -1; + + nr = ioctl(control, LOOP_CTL_GET_FREE); + if (nr < 0) + return -errno; + + if (asprintf(&loopdev, "/dev/loop%i", nr) < 0) + return -ENOMEM; + + loop = open(loopdev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags); + if (loop < 0) { + /* Somebody might've gotten the same number from the kernel, used the device, + * and called LOOP_CTL_REMOVE on it. Let's retry with a new number. */ + if (!IN_SET(errno, ENOENT, ENXIO)) + return -errno; + } else { + r = loop_configure(loop, nr, &config, &try_loop_configure); + if (r >= 0) { + loop_with_fd = TAKE_FD(loop); + break; + } + if (r == -EUCLEAN) { + /* Make left-over partition disappear hack (see above) */ + r = attach_empty_file(loop, nr); + if (r < 0 && r != -EBUSY) + return r; + } else if (r != -EBUSY) + return r; + } + + if (++n_attempts >= 64) /* Give up eventually */ + return -EBUSY; + + loopdev = mfree(loopdev); + + /* Wait some random time, to make collision less likely. Let's pick a random time in the + * range 0ms…250ms, linearly scaled by the number of failed attempts. */ + (void) usleep(random_u64() % (UINT64_C(10) * USEC_PER_MSEC + + UINT64_C(240) * USEC_PER_MSEC * n_attempts/64)); + } + + d = new(LoopDevice, 1); + if (!d) + return -ENOMEM; + *d = (LoopDevice) { + .fd = TAKE_FD(loop_with_fd), + .node = TAKE_PTR(loopdev), + .nr = nr, + }; + + *ret = d; + return 0; +} + +int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, LoopDevice **ret) { + _cleanup_close_ int fd = -1; + int r; + + assert(path); + assert(ret); + assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY)); + + /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying + * read-only if we cannot. */ + + fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|(open_flags >= 0 ? open_flags : O_RDWR)); + if (fd < 0) { + r = -errno; + + /* Retry read-only? */ + if (open_flags >= 0 || !(ERRNO_IS_PRIVILEGE(r) || r == -EROFS)) + return r; + + fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY); + if (fd < 0) + return r; /* Propagate original error */ + + open_flags = O_RDONLY; + } else if (open_flags < 0) + open_flags = O_RDWR; + + return loop_device_make(fd, open_flags, 0, 0, loop_flags, ret); +} + +LoopDevice* loop_device_unref(LoopDevice *d) { + if (!d) + return NULL; + + if (d->fd >= 0) { + /* Implicitly sync the device, since otherwise in-flight blocks might not get written */ + if (fsync(d->fd) < 0) + log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m"); + + if (d->nr >= 0 && !d->relinquished) { + if (ioctl(d->fd, LOOP_CLR_FD) < 0) + log_debug_errno(errno, "Failed to clear loop device: %m"); + + } + + safe_close(d->fd); + } + + if (d->nr >= 0 && !d->relinquished) { + _cleanup_close_ int control = -1; + + control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); + if (control < 0) + log_warning_errno(errno, + "Failed to open loop control device, cannot remove loop device %s: %m", + strna(d->node)); + else + for (unsigned n_attempts = 0;;) { + if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0) + break; + if (errno != EBUSY || ++n_attempts >= 64) { + log_warning_errno(errno, "Failed to remove device %s: %m", strna(d->node)); + break; + } + (void) usleep(50 * USEC_PER_MSEC); + } + } + + free(d->node); + return mfree(d); +} + +void loop_device_relinquish(LoopDevice *d) { + assert(d); + + /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel + * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */ + + d->relinquished = true; +} + +int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret) { + _cleanup_close_ int loop_fd = -1; + _cleanup_free_ char *p = NULL; + struct loop_info64 info; + struct stat st; + LoopDevice *d; + int nr; + + assert(loop_path); + assert(ret); + + loop_fd = open(loop_path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags); + if (loop_fd < 0) + return -errno; + + if (fstat(loop_fd, &st) < 0) + return -errno; + if (!S_ISBLK(st.st_mode)) + return -ENOTBLK; + + if (ioctl(loop_fd, LOOP_GET_STATUS64, &info) >= 0) { +#if HAVE_VALGRIND_MEMCHECK_H + /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */ + VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info)); +#endif + nr = info.lo_number; + } else + nr = -1; + + p = strdup(loop_path); + if (!p) + return -ENOMEM; + + d = new(LoopDevice, 1); + if (!d) + return -ENOMEM; + + *d = (LoopDevice) { + .fd = TAKE_FD(loop_fd), + .nr = nr, + .node = TAKE_PTR(p), + .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */ + }; + + *ret = d; + return d->fd; +} + +static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) { + char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1]; + _cleanup_free_ char *whole = NULL, *buffer = NULL; + uint64_t current_offset, current_size, partno; + _cleanup_close_ int whole_fd = -1; + struct stat st; + dev_t devno; + int r; + + assert(partition_fd >= 0); + + /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual + * loopback device), and changes the offset, if needed. This is a fancy wrapper around + * BLKPG_RESIZE_PARTITION. */ + + if (fstat(partition_fd, &st) < 0) + return -errno; + + assert(S_ISBLK(st.st_mode)); + + xsprintf(sysfs, "/sys/dev/block/%u:%u/partition", major(st.st_rdev), minor(st.st_rdev)); + r = read_one_line_file(sysfs, &buffer); + if (r == -ENOENT) /* not a partition, cannot resize */ + return -ENOTTY; + if (r < 0) + return r; + r = safe_atou64(buffer, &partno); + if (r < 0) + return r; + + xsprintf(sysfs, "/sys/dev/block/%u:%u/start", major(st.st_rdev), minor(st.st_rdev)); + + buffer = mfree(buffer); + r = read_one_line_file(sysfs, &buffer); + if (r < 0) + return r; + r = safe_atou64(buffer, ¤t_offset); + if (r < 0) + return r; + if (current_offset > UINT64_MAX/512U) + return -EINVAL; + current_offset *= 512U; + + if (ioctl(partition_fd, BLKGETSIZE64, ¤t_size) < 0) + return -EINVAL; + + if (size == UINT64_MAX && offset == UINT64_MAX) + return 0; + if (current_size == size && current_offset == offset) + return 0; + + xsprintf(sysfs, "/sys/dev/block/%u:%u/../dev", major(st.st_rdev), minor(st.st_rdev)); + + buffer = mfree(buffer); + r = read_one_line_file(sysfs, &buffer); + if (r < 0) + return r; + r = parse_dev(buffer, &devno); + if (r < 0) + return r; + + r = device_path_make_major_minor(S_IFBLK, devno, &whole); + if (r < 0) + return r; + + whole_fd = open(whole, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY); + if (whole_fd < 0) + return -errno; + + struct blkpg_partition bp = { + .pno = partno, + .start = offset == UINT64_MAX ? current_offset : offset, + .length = size == UINT64_MAX ? current_size : size, + }; + + struct blkpg_ioctl_arg ba = { + .op = BLKPG_RESIZE_PARTITION, + .data = &bp, + .datalen = sizeof(bp), + }; + + if (ioctl(whole_fd, BLKPG, &ba) < 0) + return -errno; + + return 0; +} + +int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) { + struct loop_info64 info; + assert(d); + + /* Changes the offset/start of the loop device relative to the beginning of the underlying file or + * block device. If this loop device actually refers to a partition and not a loopback device, we'll + * try to adjust the partition offsets instead. + * + * If either offset or size is UINT64_MAX we won't change that parameter. */ + + if (d->fd < 0) + return -EBADF; + + if (d->nr < 0) /* not a loopback device */ + return resize_partition(d->fd, offset, size); + + if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0) + return -errno; + +#if HAVE_VALGRIND_MEMCHECK_H + /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */ + VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info)); +#endif + + if (size == UINT64_MAX && offset == UINT64_MAX) + return 0; + if (info.lo_sizelimit == size && info.lo_offset == offset) + return 0; + + if (size != UINT64_MAX) + info.lo_sizelimit = size; + if (offset != UINT64_MAX) + info.lo_offset = offset; + + if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0) + return -errno; + + return 0; +} + +int loop_device_flock(LoopDevice *d, int operation) { + assert(d); + + if (d->fd < 0) + return -EBADF; + + if (flock(d->fd, operation) < 0) + return -errno; + + return 0; +} + +int loop_device_sync(LoopDevice *d) { + assert(d); + + /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that + * we can check the return value though. */ + + if (d->fd < 0) + return -EBADF; + + if (fsync(d->fd) < 0) + return -errno; + + return 0; +} diff --git a/src/shared/loop-util.h b/src/shared/loop-util.h new file mode 100644 index 0000000..9538dae --- /dev/null +++ b/src/shared/loop-util.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" + +typedef struct LoopDevice LoopDevice; + +/* Some helpers for setting up loopback block devices */ + +struct LoopDevice { + int fd; + int nr; + char *node; + bool relinquished; +}; + +int loop_device_make(int fd, int open_flags, uint64_t offset, uint64_t size, uint32_t loop_flags, LoopDevice **ret); +int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, LoopDevice **ret); +int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret); + +LoopDevice* loop_device_unref(LoopDevice *d); +DEFINE_TRIVIAL_CLEANUP_FUNC(LoopDevice*, loop_device_unref); + +void loop_device_relinquish(LoopDevice *d); + +int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size); + +int loop_device_flock(LoopDevice *d, int operation); +int loop_device_sync(LoopDevice *d); diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c new file mode 100644 index 0000000..671a56b --- /dev/null +++ b/src/shared/machine-image.c @@ -0,0 +1,1274 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <linux/fs.h> +#include <linux/loop.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/file.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "btrfs-util.h" +#include "chattr-util.h" +#include "copy.h" +#include "dirent-util.h" +#include "dissect-image.h" +#include "env-file.h" +#include "env-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "hashmap.h" +#include "hostname-util.h" +#include "id128-util.h" +#include "lockfile-util.h" +#include "log.h" +#include "loop-util.h" +#include "machine-image.h" +#include "macro.h" +#include "mkdir.h" +#include "nulstr-util.h" +#include "os-util.h" +#include "path-util.h" +#include "rm-rf.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "utf8.h" +#include "xattr-util.h" + +static const char* const image_search_path[_IMAGE_CLASS_MAX] = { + [IMAGE_MACHINE] = "/etc/machines\0" /* only place symlinks here */ + "/run/machines\0" /* and here too */ + "/var/lib/machines\0" /* the main place for images */ + "/var/lib/container\0" /* legacy */ + "/usr/local/lib/machines\0" + "/usr/lib/machines\0", + + [IMAGE_PORTABLE] = "/etc/portables\0" /* only place symlinks here */ + "/run/portables\0" /* and here too */ + "/var/lib/portables\0" /* the main place for images */ + "/usr/local/lib/portables\0" + "/usr/lib/portables\0", +}; + +static Image *image_free(Image *i) { + assert(i); + + free(i->name); + free(i->path); + + free(i->hostname); + strv_free(i->machine_info); + strv_free(i->os_release); + + return mfree(i); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(Image, image, image_free); +DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(image_hash_ops, char, string_hash_func, string_compare_func, + Image, image_unref); + +static char **image_settings_path(Image *image) { + _cleanup_strv_free_ char **l = NULL; + const char *fn, *s; + unsigned i = 0; + + assert(image); + + l = new0(char*, 4); + if (!l) + return NULL; + + fn = strjoina(image->name, ".nspawn"); + + FOREACH_STRING(s, "/etc/systemd/nspawn", "/run/systemd/nspawn") { + l[i] = path_join(s, fn); + if (!l[i]) + return NULL; + + i++; + } + + l[i] = file_in_same_dir(image->path, fn); + if (!l[i]) + return NULL; + + return TAKE_PTR(l); +} + +static char *image_roothash_path(Image *image) { + const char *fn; + + assert(image); + + fn = strjoina(image->name, ".roothash"); + + return file_in_same_dir(image->path, fn); +} + +static int image_new( + ImageType t, + const char *pretty, + const char *path, + const char *filename, + bool read_only, + usec_t crtime, + usec_t mtime, + Image **ret) { + + _cleanup_(image_unrefp) Image *i = NULL; + + assert(t >= 0); + assert(t < _IMAGE_TYPE_MAX); + assert(pretty); + assert(filename); + assert(ret); + + i = new0(Image, 1); + if (!i) + return -ENOMEM; + + i->n_ref = 1; + i->type = t; + i->read_only = read_only; + i->crtime = crtime; + i->mtime = mtime; + i->usage = i->usage_exclusive = (uint64_t) -1; + i->limit = i->limit_exclusive = (uint64_t) -1; + + i->name = strdup(pretty); + if (!i->name) + return -ENOMEM; + + i->path = path_join(path, filename); + if (!i->path) + return -ENOMEM; + + path_simplify(i->path, false); + + *ret = TAKE_PTR(i); + + return 0; +} + +static int extract_pretty(const char *path, const char *suffix, char **ret) { + _cleanup_free_ char *name = NULL; + const char *p; + size_t n; + + assert(path); + assert(ret); + + p = last_path_component(path); + n = strcspn(p, "/"); + + name = strndup(p, n); + if (!name) + return -ENOMEM; + + if (suffix) { + char *e; + + e = endswith(name, suffix); + if (!e) + return -EINVAL; + + *e = 0; + } + + if (!image_name_is_valid(name)) + return -EINVAL; + + *ret = TAKE_PTR(name); + return 0; +} + +static int image_make( + const char *pretty, + int dfd, + const char *path, + const char *filename, + const struct stat *st, + Image **ret) { + + _cleanup_free_ char *pretty_buffer = NULL, *parent = NULL; + struct stat stbuf; + bool read_only; + int r; + + assert(dfd >= 0 || dfd == AT_FDCWD); + assert(path || dfd == AT_FDCWD); + assert(filename); + + /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block + * devices into /var/lib/machines/, and treat them normally. + * + * This function returns -ENOENT if we can't find the image after all, and -EMEDIUMTYPE if it's not a file we + * recognize. */ + + if (!st) { + if (fstatat(dfd, filename, &stbuf, 0) < 0) + return -errno; + + st = &stbuf; + } + + if (!path) { + if (dfd == AT_FDCWD) + (void) safe_getcwd(&parent); + else + (void) fd_get_path(dfd, &parent); + } + + read_only = + (path && path_startswith(path, "/usr")) || + (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS); + + if (S_ISDIR(st->st_mode)) { + _cleanup_close_ int fd = -1; + unsigned file_attr = 0; + + if (!ret) + return 0; + + if (!pretty) { + r = extract_pretty(filename, NULL, &pretty_buffer); + if (r < 0) + return r; + + pretty = pretty_buffer; + } + + fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY); + if (fd < 0) + return -errno; + + /* btrfs subvolumes have inode 256 */ + if (st->st_ino == 256) { + + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (r) { + BtrfsSubvolInfo info; + + /* It's a btrfs subvolume */ + + r = btrfs_subvol_get_info_fd(fd, 0, &info); + if (r < 0) + return r; + + r = image_new(IMAGE_SUBVOLUME, + pretty, + path, + filename, + info.read_only || read_only, + info.otime, + 0, + ret); + if (r < 0) + return r; + + if (btrfs_quota_scan_ongoing(fd) == 0) { + BtrfsQuotaInfo quota; + + r = btrfs_subvol_get_subtree_quota_fd(fd, 0, "a); + if (r >= 0) { + (*ret)->usage = quota.referenced; + (*ret)->usage_exclusive = quota.exclusive; + + (*ret)->limit = quota.referenced_max; + (*ret)->limit_exclusive = quota.exclusive_max; + } + } + + return 0; + } + } + + /* If the IMMUTABLE bit is set, we consider the + * directory read-only. Since the ioctl is not + * supported everywhere we ignore failures. */ + (void) read_attr_fd(fd, &file_attr); + + /* It's just a normal directory. */ + r = image_new(IMAGE_DIRECTORY, + pretty, + path, + filename, + read_only || (file_attr & FS_IMMUTABLE_FL), + 0, + 0, + ret); + if (r < 0) + return r; + + return 0; + + } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) { + usec_t crtime = 0; + + /* It's a RAW disk image */ + + if (!ret) + return 0; + + (void) fd_getcrtime_at(dfd, filename, &crtime, 0); + + if (!pretty) { + r = extract_pretty(filename, ".raw", &pretty_buffer); + if (r < 0) + return r; + + pretty = pretty_buffer; + } + + r = image_new(IMAGE_RAW, + pretty, + path, + filename, + !(st->st_mode & 0222) || read_only, + crtime, + timespec_load(&st->st_mtim), + ret); + if (r < 0) + return r; + + (*ret)->usage = (*ret)->usage_exclusive = st->st_blocks * 512; + (*ret)->limit = (*ret)->limit_exclusive = st->st_size; + + return 0; + + } else if (S_ISBLK(st->st_mode)) { + _cleanup_close_ int block_fd = -1; + uint64_t size = UINT64_MAX; + + /* A block device */ + + if (!ret) + return 0; + + if (!pretty) { + r = extract_pretty(filename, NULL, &pretty_buffer); + if (r < 0) + return r; + + pretty = pretty_buffer; + } + + block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY); + if (block_fd < 0) + log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path ?: strnull(parent), filename); + else { + /* Refresh stat data after opening the node */ + if (fstat(block_fd, &stbuf) < 0) + return -errno; + st = &stbuf; + + if (!S_ISBLK(st->st_mode)) /* Verify that what we opened is actually what we think it is */ + return -ENOTTY; + + if (!read_only) { + int state = 0; + + if (ioctl(block_fd, BLKROGET, &state) < 0) + log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path ?: strnull(parent), filename); + else if (state) + read_only = true; + } + + if (ioctl(block_fd, BLKGETSIZE64, &size) < 0) + log_debug_errno(errno, "Failed to issue BLKGETSIZE64 on device %s/%s, ignoring: %m", path ?: strnull(parent), filename); + + block_fd = safe_close(block_fd); + } + + r = image_new(IMAGE_BLOCK, + pretty, + path, + filename, + !(st->st_mode & 0222) || read_only, + 0, + 0, + ret); + if (r < 0) + return r; + + if (!IN_SET(size, 0, UINT64_MAX)) + (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size; + + return 0; + } + + return -EMEDIUMTYPE; +} + +int image_find(ImageClass class, const char *name, Image **ret) { + const char *path; + int r; + + assert(class >= 0); + assert(class < _IMAGE_CLASS_MAX); + assert(name); + + /* There are no images with invalid names */ + if (!image_name_is_valid(name)) + return -ENOENT; + + NULSTR_FOREACH(path, image_search_path[class]) { + _cleanup_closedir_ DIR *d = NULL; + struct stat st; + + d = opendir(path); + if (!d) { + if (errno == ENOENT) + continue; + + return -errno; + } + + /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people to + * symlink block devices into the search path */ + if (fstatat(dirfd(d), name, &st, 0) < 0) { + _cleanup_free_ char *raw = NULL; + + if (errno != ENOENT) + return -errno; + + raw = strjoin(name, ".raw"); + if (!raw) + return -ENOMEM; + + if (fstatat(dirfd(d), raw, &st, 0) < 0) { + + if (errno == ENOENT) + continue; + + return -errno; + } + + if (!S_ISREG(st.st_mode)) + continue; + + r = image_make(name, dirfd(d), path, raw, &st, ret); + + } else { + if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode)) + continue; + + r = image_make(name, dirfd(d), path, name, &st, ret); + } + if (IN_SET(r, -ENOENT, -EMEDIUMTYPE)) + continue; + if (r < 0) + return r; + + if (ret) + (*ret)->discoverable = true; + + return 1; + } + + if (class == IMAGE_MACHINE && streq(name, ".host")) { + r = image_make(".host", AT_FDCWD, NULL, "/", NULL, ret); + if (r < 0) + return r; + + if (ret) + (*ret)->discoverable = true; + + return r; + } + + return -ENOENT; +}; + +int image_from_path(const char *path, Image **ret) { + + /* Note that we don't set the 'discoverable' field of the returned object, because we don't check here whether + * the image is in the image search path. And if it is we don't know if the path we used is actually not + * overridden by another, different image earlier in the search path */ + + if (path_equal(path, "/")) + return image_make(".host", AT_FDCWD, NULL, "/", NULL, ret); + + return image_make(NULL, AT_FDCWD, NULL, path, NULL, ret); +} + +int image_find_harder(ImageClass class, const char *name_or_path, Image **ret) { + if (image_name_is_valid(name_or_path)) + return image_find(class, name_or_path, ret); + + return image_from_path(name_or_path, ret); +} + +int image_discover(ImageClass class, Hashmap *h) { + const char *path; + int r; + + assert(class >= 0); + assert(class < _IMAGE_CLASS_MAX); + assert(h); + + NULSTR_FOREACH(path, image_search_path[class]) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + + d = opendir(path); + if (!d) { + if (errno == ENOENT) + continue; + + return -errno; + } + + FOREACH_DIRENT_ALL(de, d, return -errno) { + _cleanup_(image_unrefp) Image *image = NULL; + _cleanup_free_ char *truncated = NULL; + const char *pretty; + struct stat st; + + if (dot_or_dot_dot(de->d_name)) + continue; + + /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people + * to symlink block devices into the search path */ + if (fstatat(dirfd(d), de->d_name, &st, 0) < 0) { + if (errno == ENOENT) + continue; + + return -errno; + } + + if (S_ISREG(st.st_mode)) { + const char *e; + + e = endswith(de->d_name, ".raw"); + if (!e) + continue; + + truncated = strndup(de->d_name, e - de->d_name); + if (!truncated) + return -ENOMEM; + + pretty = truncated; + } else if (S_ISDIR(st.st_mode) || S_ISBLK(st.st_mode)) + pretty = de->d_name; + else + continue; + + if (!image_name_is_valid(pretty)) + continue; + + if (hashmap_contains(h, pretty)) + continue; + + r = image_make(pretty, dirfd(d), path, de->d_name, &st, &image); + if (IN_SET(r, -ENOENT, -EMEDIUMTYPE)) + continue; + if (r < 0) + return r; + + image->discoverable = true; + + r = hashmap_put(h, image->name, image); + if (r < 0) + return r; + + image = NULL; + } + } + + if (class == IMAGE_MACHINE && !hashmap_contains(h, ".host")) { + _cleanup_(image_unrefp) Image *image = NULL; + + r = image_make(".host", AT_FDCWD, NULL, "/", NULL, &image); + if (r < 0) + return r; + + image->discoverable = true; + + r = hashmap_put(h, image->name, image); + if (r < 0) + return r; + + image = NULL; + } + + return 0; +} + +int image_remove(Image *i) { + _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; + _cleanup_strv_free_ char **settings = NULL; + _cleanup_free_ char *roothash = NULL; + char **j; + int r; + + assert(i); + + if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) + return -EROFS; + + settings = image_settings_path(i); + if (!settings) + return -ENOMEM; + + roothash = image_roothash_path(i); + if (!roothash) + return -ENOMEM; + + /* Make sure we don't interfere with a running nspawn */ + r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); + if (r < 0) + return r; + + switch (i->type) { + + case IMAGE_SUBVOLUME: + + /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the + * big guns */ + if (unlink(i->path) < 0) { + r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); + if (r < 0) + return r; + } + + break; + + case IMAGE_DIRECTORY: + /* Allow deletion of read-only directories */ + (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL); + r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME); + if (r < 0) + return r; + + break; + + case IMAGE_BLOCK: + + /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node + * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink + * the thing (it's most likely a symlink after all). */ + + if (path_startswith(i->path, "/dev")) + break; + + _fallthrough_; + case IMAGE_RAW: + if (unlink(i->path) < 0) + return -errno; + break; + + default: + return -EOPNOTSUPP; + } + + STRV_FOREACH(j, settings) { + if (unlink(*j) < 0 && errno != ENOENT) + log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j); + } + + if (unlink(roothash) < 0 && errno != ENOENT) + log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash); + + return 0; +} + +static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) { + _cleanup_free_ char *rs = NULL; + const char *fn; + + fn = strjoina(new_name, suffix); + + rs = file_in_same_dir(path, fn); + if (!rs) + return -ENOMEM; + + return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs); +} + +int image_rename(Image *i, const char *new_name) { + _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT; + _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL; + _cleanup_strv_free_ char **settings = NULL; + unsigned file_attr = 0; + char **j; + int r; + + assert(i); + + if (!image_name_is_valid(new_name)) + return -EINVAL; + + if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) + return -EROFS; + + settings = image_settings_path(i); + if (!settings) + return -ENOMEM; + + roothash = image_roothash_path(i); + if (!roothash) + return -ENOMEM; + + /* Make sure we don't interfere with a running nspawn */ + r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); + if (r < 0) + return r; + + /* Make sure nobody takes the new name, between the time we + * checked it is currently unused in all search paths, and the + * time we take possession of it */ + r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock); + if (r < 0) + return r; + + r = image_find(IMAGE_MACHINE, new_name, NULL); + if (r >= 0) + return -EEXIST; + if (r != -ENOENT) + return r; + + switch (i->type) { + + case IMAGE_DIRECTORY: + /* Turn of the immutable bit while we rename the image, so that we can rename it */ + (void) read_attr_path(i->path, &file_attr); + + if (file_attr & FS_IMMUTABLE_FL) + (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL); + + _fallthrough_; + case IMAGE_SUBVOLUME: + new_path = file_in_same_dir(i->path, new_name); + break; + + case IMAGE_BLOCK: + + /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */ + if (path_startswith(i->path, "/dev")) + return -EROFS; + + new_path = file_in_same_dir(i->path, new_name); + break; + + case IMAGE_RAW: { + const char *fn; + + fn = strjoina(new_name, ".raw"); + new_path = file_in_same_dir(i->path, fn); + break; + } + + default: + return -EOPNOTSUPP; + } + + if (!new_path) + return -ENOMEM; + + nn = strdup(new_name); + if (!nn) + return -ENOMEM; + + r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path); + if (r < 0) + return r; + + /* Restore the immutable bit, if it was set before */ + if (file_attr & FS_IMMUTABLE_FL) + (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL); + + free_and_replace(i->path, new_path); + free_and_replace(i->name, nn); + + STRV_FOREACH(j, settings) { + r = rename_auxiliary_file(*j, new_name, ".nspawn"); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j); + } + + r = rename_auxiliary_file(roothash, new_name, ".roothash"); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash); + + return 0; +} + +static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) { + _cleanup_free_ char *rs = NULL; + const char *fn; + + fn = strjoina(new_name, suffix); + + rs = file_in_same_dir(path, fn); + if (!rs) + return -ENOMEM; + + return copy_file_atomic(path, rs, 0664, 0, 0, COPY_REFLINK); +} + +int image_clone(Image *i, const char *new_name, bool read_only) { + _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT; + _cleanup_strv_free_ char **settings = NULL; + _cleanup_free_ char *roothash = NULL; + const char *new_path; + char **j; + int r; + + assert(i); + + if (!image_name_is_valid(new_name)) + return -EINVAL; + + settings = image_settings_path(i); + if (!settings) + return -ENOMEM; + + roothash = image_roothash_path(i); + if (!roothash) + return -ENOMEM; + + /* Make sure nobody takes the new name, between the time we + * checked it is currently unused in all search paths, and the + * time we take possession of it */ + r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock); + if (r < 0) + return r; + + r = image_find(IMAGE_MACHINE, new_name, NULL); + if (r >= 0) + return -EEXIST; + if (r != -ENOENT) + return r; + + switch (i->type) { + + case IMAGE_SUBVOLUME: + case IMAGE_DIRECTORY: + /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain + * directory. */ + + new_path = strjoina("/var/lib/machines/", new_name); + + r = btrfs_subvol_snapshot(i->path, new_path, + (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | + BTRFS_SNAPSHOT_FALLBACK_COPY | + BTRFS_SNAPSHOT_FALLBACK_DIRECTORY | + BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE | + BTRFS_SNAPSHOT_RECURSIVE | + BTRFS_SNAPSHOT_QUOTA); + if (r >= 0) + /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */ + (void) btrfs_subvol_auto_qgroup(new_path, 0, true); + + break; + + case IMAGE_RAW: + new_path = strjoina("/var/lib/machines/", new_name, ".raw"); + + r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, FS_NOCOW_FL, COPY_REFLINK|COPY_CRTIME); + break; + + case IMAGE_BLOCK: + default: + return -EOPNOTSUPP; + } + + if (r < 0) + return r; + + STRV_FOREACH(j, settings) { + r = clone_auxiliary_file(*j, new_name, ".nspawn"); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j); + } + + r = clone_auxiliary_file(roothash, new_name, ".roothash"); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash); + + return 0; +} + +int image_read_only(Image *i, bool b) { + _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; + int r; + + assert(i); + + if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) + return -EROFS; + + /* Make sure we don't interfere with a running nspawn */ + r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); + if (r < 0) + return r; + + switch (i->type) { + + case IMAGE_SUBVOLUME: + + /* Note that we set the flag only on the top-level + * subvolume of the image. */ + + r = btrfs_subvol_set_read_only(i->path, b); + if (r < 0) + return r; + + break; + + case IMAGE_DIRECTORY: + /* For simple directory trees we cannot use the access + mode of the top-level directory, since it has an + effect on the container itself. However, we can + use the "immutable" flag, to at least make the + top-level directory read-only. It's not as good as + a read-only subvolume, but at least something, and + we can read the value back. */ + + r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL, NULL); + if (r < 0) + return r; + + break; + + case IMAGE_RAW: { + struct stat st; + + if (stat(i->path, &st) < 0) + return -errno; + + if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0) + return -errno; + + /* If the images is now read-only, it's a good time to + * defrag it, given that no write patterns will + * fragment it again. */ + if (b) + (void) btrfs_defrag(i->path); + break; + } + + case IMAGE_BLOCK: { + _cleanup_close_ int fd = -1; + struct stat st; + int state = b; + + fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY); + if (fd < 0) + return -errno; + + if (fstat(fd, &st) < 0) + return -errno; + if (!S_ISBLK(st.st_mode)) + return -ENOTTY; + + if (ioctl(fd, BLKROSET, &state) < 0) + return -errno; + + break; + } + + default: + return -EOPNOTSUPP; + } + + return 0; +} + +int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) { + _cleanup_free_ char *p = NULL; + LockFile t = LOCK_FILE_INIT; + struct stat st; + bool exclusive; + int r; + + assert(path); + assert(global); + assert(local); + + /* Locks an image path. This actually creates two locks: one "local" one, next to the image path + * itself, which might be shared via NFS. And another "global" one, in /run, that uses the + * device/inode number. This has the benefit that we can even lock a tree that is a mount point, + * correctly. */ + + if (!path_is_absolute(path)) + return -EINVAL; + + switch (operation & (LOCK_SH|LOCK_EX)) { + case LOCK_SH: + exclusive = false; + break; + case LOCK_EX: + exclusive = true; + break; + default: + return -EINVAL; + } + + if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) { + *local = *global = (LockFile) LOCK_FILE_INIT; + return 0; + } + + /* Prohibit taking exclusive locks on the host image. We can't allow this, since we ourselves are + * running off it after all, and we don't want any images to manipulate the host image. We make an + * exception for shared locks however: we allow those (and make them NOPs since there's no point in + * taking them if there can't be exclusive locks). Strictly speaking these are questionable as well, + * since it means changes made to the host might propagate to the container as they happen (and a + * shared lock kinda suggests that no changes happen at all while it is in place), but it's too + * useful not to allow read-only containers off the host root, hence let's support this, and trust + * the user to do the right thing with this. */ + if (path_equal(path, "/")) { + if (exclusive) + return -EBUSY; + + *local = *global = (LockFile) LOCK_FILE_INIT; + return 0; + } + + if (stat(path, &st) >= 0) { + if (S_ISBLK(st.st_mode)) + r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev)); + else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode)) + r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino); + else + return -ENOTTY; + + if (r < 0) + return -ENOMEM; + } + + /* For block devices we don't need the "local" lock, as the major/minor lock above should be + * sufficient, since block devices are host local anyway. */ + if (!path_startswith(path, "/dev/")) { + r = make_lock_file_for(path, operation, &t); + if (r < 0) { + if (!exclusive && r == -EROFS) + log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path); + else + return r; + } + } + + if (p) { + (void) mkdir_p("/run/systemd/nspawn/locks", 0700); + + r = make_lock_file(p, operation, global); + if (r < 0) { + release_lock_file(&t); + return r; + } + } else + *global = (LockFile) LOCK_FILE_INIT; + + *local = t; + return 0; +} + +int image_set_limit(Image *i, uint64_t referenced_max) { + assert(i); + + if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) + return -EROFS; + + if (i->type != IMAGE_SUBVOLUME) + return -EOPNOTSUPP; + + /* We set the quota both for the subvolume as well as for the + * subtree. The latter is mostly for historical reasons, since + * we didn't use to have a concept of subtree quota, and hence + * only modified the subvolume quota. */ + + (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max); + (void) btrfs_subvol_auto_qgroup(i->path, 0, true); + return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max); +} + +int image_read_metadata(Image *i) { + _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; + int r; + + assert(i); + + r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock); + if (r < 0) + return r; + + switch (i->type) { + + case IMAGE_SUBVOLUME: + case IMAGE_DIRECTORY: { + _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL; + sd_id128_t machine_id = SD_ID128_NULL; + _cleanup_free_ char *hostname = NULL; + _cleanup_free_ char *path = NULL; + + r = chase_symlinks("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name); + else if (r >= 0) { + r = read_etc_hostname(path, &hostname); + if (r < 0) + log_debug_errno(errno, "Failed to read /etc/hostname of image %s: %m", i->name); + } + + path = mfree(path); + + r = chase_symlinks("/etc/machine-id", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to chase /etc/machine-id in image %s: %m", i->name); + else if (r >= 0) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + log_debug_errno(errno, "Failed to open %s: %m", path); + else { + r = id128_read_fd(fd, ID128_PLAIN, &machine_id); + if (r < 0) + log_debug_errno(r, "Image %s contains invalid machine ID.", i->name); + } + } + + path = mfree(path); + + r = chase_symlinks("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name); + else if (r >= 0) { + r = load_env_file_pairs(NULL, path, &machine_info); + if (r < 0) + log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name); + } + + r = load_os_release_pairs(i->path, &os_release); + if (r < 0) + log_debug_errno(r, "Failed to read os-release in image, ignoring: %m"); + + free_and_replace(i->hostname, hostname); + i->machine_id = machine_id; + strv_free_and_replace(i->machine_info, machine_info); + strv_free_and_replace(i->os_release, os_release); + + break; + } + + case IMAGE_RAW: + case IMAGE_BLOCK: { + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; + + r = loop_device_make_by_path(i->path, O_RDONLY, LO_FLAGS_PARTSCAN, &d); + if (r < 0) + return r; + + r = dissect_image(d->fd, NULL, NULL, DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_RELAX_VAR_CHECK, &m); + if (r < 0) + return r; + + r = dissected_image_acquire_metadata(m); + if (r < 0) + return r; + + free_and_replace(i->hostname, m->hostname); + i->machine_id = m->machine_id; + strv_free_and_replace(i->machine_info, m->machine_info); + strv_free_and_replace(i->os_release, m->os_release); + + break; + } + + default: + return -EOPNOTSUPP; + } + + i->metadata_valid = true; + + return 0; +} + +int image_name_lock(const char *name, int operation, LockFile *ret) { + assert(name); + assert(ret); + + /* Locks an image name, regardless of the precise path used. */ + + if (!image_name_is_valid(name)) + return -EINVAL; + + if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) { + *ret = (LockFile) LOCK_FILE_INIT; + return 0; + } + + if (streq(name, ".host")) + return -EBUSY; + + const char *p = strjoina("/run/systemd/nspawn/locks/name-", name); + (void) mkdir_p("/run/systemd/nspawn/locks", 0700); + return make_lock_file(p, operation, ret); +} + +bool image_name_is_valid(const char *s) { + if (!filename_is_valid(s)) + return false; + + if (string_has_cc(s, NULL)) + return false; + + if (!utf8_is_valid(s)) + return false; + + /* Temporary files for atomically creating new files */ + if (startswith(s, ".#")) + return false; + + return true; +} + +bool image_in_search_path(ImageClass class, const char *image) { + const char *path; + + assert(image); + + NULSTR_FOREACH(path, image_search_path[class]) { + const char *p; + size_t k; + + p = path_startswith(image, path); + if (!p) + continue; + + /* Make sure there's a filename following */ + k = strcspn(p, "/"); + if (k == 0) + continue; + + p += k; + + /* Accept trailing slashes */ + if (p[strspn(p, "/")] == 0) + return true; + + } + + return false; +} + +static const char* const image_type_table[_IMAGE_TYPE_MAX] = { + [IMAGE_DIRECTORY] = "directory", + [IMAGE_SUBVOLUME] = "subvolume", + [IMAGE_RAW] = "raw", + [IMAGE_BLOCK] = "block", +}; + +DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType); diff --git a/src/shared/machine-image.h b/src/shared/machine-image.h new file mode 100644 index 0000000..95a8f5c --- /dev/null +++ b/src/shared/machine-image.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include "sd-id128.h" + +#include "hashmap.h" +#include "lockfile-util.h" +#include "macro.h" +#include "path-util.h" +#include "string-util.h" +#include "time-util.h" + +typedef enum ImageClass { + IMAGE_MACHINE, + IMAGE_PORTABLE, + _IMAGE_CLASS_MAX, + _IMAGE_CLASS_INVALID = -1 +} ImageClass; + +typedef enum ImageType { + IMAGE_DIRECTORY, + IMAGE_SUBVOLUME, + IMAGE_RAW, + IMAGE_BLOCK, + _IMAGE_TYPE_MAX, + _IMAGE_TYPE_INVALID = -1 +} ImageType; + +typedef struct Image { + unsigned n_ref; + + ImageType type; + char *name; + char *path; + bool read_only; + + usec_t crtime; + usec_t mtime; + + uint64_t usage; + uint64_t usage_exclusive; + uint64_t limit; + uint64_t limit_exclusive; + + char *hostname; + sd_id128_t machine_id; + char **machine_info; + char **os_release; + + bool metadata_valid:1; + bool discoverable:1; /* true if we know for sure that image_find() would find the image given just the short name */ + + void *userdata; +} Image; + +Image *image_unref(Image *i); +Image *image_ref(Image *i); + +DEFINE_TRIVIAL_CLEANUP_FUNC(Image*, image_unref); + +int image_find(ImageClass class, const char *name, Image **ret); +int image_from_path(const char *path, Image **ret); +int image_find_harder(ImageClass class, const char *name_or_path, Image **ret); +int image_discover(ImageClass class, Hashmap *map); + +int image_remove(Image *i); +int image_rename(Image *i, const char *new_name); +int image_clone(Image *i, const char *new_name, bool read_only); +int image_read_only(Image *i, bool b); + +const char* image_type_to_string(ImageType t) _const_; +ImageType image_type_from_string(const char *s) _pure_; + +bool image_name_is_valid(const char *s) _pure_; + +int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local); +int image_name_lock(const char *name, int operation, LockFile *ret); + +int image_set_limit(Image *i, uint64_t referenced_max); + +int image_read_metadata(Image *i); + +bool image_in_search_path(ImageClass class, const char *image); + +static inline bool IMAGE_IS_HIDDEN(const struct Image *i) { + assert(i); + + return i->name && i->name[0] == '.'; +} + +static inline bool IMAGE_IS_VENDOR(const struct Image *i) { + assert(i); + + return i->path && path_startswith(i->path, "/usr"); +} + +static inline bool IMAGE_IS_HOST(const struct Image *i) { + assert(i); + + if (i->name && streq(i->name, ".host")) + return true; + + if (i->path && path_equal(i->path, "/")) + return true; + + return false; +} + +extern const struct hash_ops image_hash_ops; diff --git a/src/shared/machine-pool.c b/src/shared/machine-pool.c new file mode 100644 index 0000000..1f0b0b4 --- /dev/null +++ b/src/shared/machine-pool.c @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> + +#include "btrfs-util.h" +#include "label.h" +#include "machine-pool.h" +#include "missing_magic.h" +#include "stat-util.h" + +static int check_btrfs(void) { + struct statfs sfs; + + if (statfs("/var/lib/machines", &sfs) < 0) { + if (errno != ENOENT) + return -errno; + + if (statfs("/var/lib", &sfs) < 0) + return -errno; + } + + return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC); +} + +int setup_machine_directory(sd_bus_error *error) { + int r; + + r = check_btrfs(); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to determine whether /var/lib/machines is located on btrfs: %m"); + if (r == 0) + return 0; + + (void) btrfs_subvol_make_label("/var/lib/machines"); + + r = btrfs_quota_enable("/var/lib/machines", true); + if (r < 0) + log_warning_errno(r, "Failed to enable quota for /var/lib/machines, ignoring: %m"); + + r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true); + if (r < 0) + log_warning_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, ignoring: %m"); + + return 1; +} diff --git a/src/shared/machine-pool.h b/src/shared/machine-pool.h new file mode 100644 index 0000000..3f528ab --- /dev/null +++ b/src/shared/machine-pool.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdint.h> + +#include "sd-bus.h" + +int setup_machine_directory(sd_bus_error *error); diff --git a/src/shared/macvlan-util.c b/src/shared/macvlan-util.c new file mode 100644 index 0000000..11dffe9 --- /dev/null +++ b/src/shared/macvlan-util.c @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "conf-parser.h" +#include "macvlan-util.h" +#include "string-table.h" + +static const char* const macvlan_mode_table[_NETDEV_MACVLAN_MODE_MAX] = { + [NETDEV_MACVLAN_MODE_PRIVATE] = "private", + [NETDEV_MACVLAN_MODE_VEPA] = "vepa", + [NETDEV_MACVLAN_MODE_BRIDGE] = "bridge", + [NETDEV_MACVLAN_MODE_PASSTHRU] = "passthru", + [NETDEV_MACVLAN_MODE_SOURCE] = "source", +}; + +DEFINE_STRING_TABLE_LOOKUP(macvlan_mode, MacVlanMode); diff --git a/src/shared/macvlan-util.h b/src/shared/macvlan-util.h new file mode 100644 index 0000000..0d3a5f4 --- /dev/null +++ b/src/shared/macvlan-util.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <linux/if_link.h> + +typedef enum MacVlanMode { + NETDEV_MACVLAN_MODE_PRIVATE = MACVLAN_MODE_PRIVATE, + NETDEV_MACVLAN_MODE_VEPA = MACVLAN_MODE_VEPA, + NETDEV_MACVLAN_MODE_BRIDGE = MACVLAN_MODE_BRIDGE, + NETDEV_MACVLAN_MODE_PASSTHRU = MACVLAN_MODE_PASSTHRU, + NETDEV_MACVLAN_MODE_SOURCE = MACVLAN_MODE_SOURCE, + _NETDEV_MACVLAN_MODE_MAX, + _NETDEV_MACVLAN_MODE_INVALID = -1 +} MacVlanMode; + +const char *macvlan_mode_to_string(MacVlanMode d) _const_; +MacVlanMode macvlan_mode_from_string(const char *d) _pure_; diff --git a/src/shared/main-func.h b/src/shared/main-func.h new file mode 100644 index 0000000..05cdffe --- /dev/null +++ b/src/shared/main-func.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdlib.h> + +#include "sd-daemon.h" + +#include "pager.h" +#include "selinux-util.h" +#include "spawn-ask-password-agent.h" +#include "spawn-polkit-agent.h" +#include "static-destruct.h" +#include "util.h" + +#define _DEFINE_MAIN_FUNCTION(intro, impl, ret) \ + int main(int argc, char *argv[]) { \ + int r; \ + save_argc_argv(argc, argv); \ + intro; \ + r = impl; \ + if (r < 0) \ + (void) sd_notifyf(0, "ERRNO=%i", -r); \ + ask_password_agent_close(); \ + polkit_agent_close(); \ + pager_close(); \ + mac_selinux_finish(); \ + static_destruct(); \ + return ret; \ + } + +/* Negative return values from impl are mapped to EXIT_FAILURE, and + * everything else means success! */ +#define DEFINE_MAIN_FUNCTION(impl) \ + _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : EXIT_SUCCESS) + +/* Zero is mapped to EXIT_SUCCESS, negative values are mapped to EXIT_FAILURE, + * and positive values are propagated. + * Note: "true" means failure! */ +#define DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(impl) \ + _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : r) diff --git a/src/shared/meson.build b/src/shared/meson.build new file mode 100644 index 0000000..f30fe44 --- /dev/null +++ b/src/shared/meson.build @@ -0,0 +1,399 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +shared_sources = files(''' + acl-util.h + acpi-fpdt.c + acpi-fpdt.h + apparmor-util.c + apparmor-util.h + ask-password-api.c + ask-password-api.h + barrier.c + barrier.h + base-filesystem.c + base-filesystem.h + binfmt-util.c + binfmt-util.h + bitmap.c + bitmap.h + blkid-util.h + bond-util.c + bond-util.h + boot-timestamps.c + boot-timestamps.h + bootspec.c + bootspec.h + bpf-program.c + bpf-program.h + bridge-util.c + bridge-util.h + bus-get-properties.c + bus-get-properties.h + bus-locator.c + bus-locator.h + bus-log-control-api.c + bus-log-control-api.h + bus-map-properties.c + bus-map-properties.h + bus-message-util.c + bus-message-util.h + bus-object.c + bus-object.h + bus-polkit.c + bus-polkit.h + bus-print-properties.c + bus-print-properties.h + bus-unit-procs.c + bus-unit-procs.h + bus-unit-util.c + bus-unit-util.h + bus-util.c + bus-util.h + bus-wait-for-jobs.c + bus-wait-for-jobs.h + bus-wait-for-units.c + bus-wait-for-units.h + calendarspec.c + calendarspec.h + cgroup-setup.c + cgroup-setup.h + cgroup-show.c + cgroup-show.h + chown-recursive.c + chown-recursive.h + clean-ipc.c + clean-ipc.h + clock-util.c + clock-util.h + condition.c + condition.h + conf-parser.c + conf-parser.h + coredump-util.c + coredump-util.h + cpu-set-util.c + cpu-set-util.h + cryptsetup-util.c + cryptsetup-util.h + daemon-util.h + dev-setup.c + dev-setup.h + dissect-image.c + dissect-image.h + dm-util.c + dm-util.h + dns-domain.c + dns-domain.h + dropin.c + dropin.h + efi-loader.c + efi-loader.h + enable-mempool.c + env-file-label.c + env-file-label.h + ethtool-util.c + ethtool-util.h + exec-util.c + exec-util.h + exit-status.c + exit-status.h + fdset.c + fdset.h + fileio-label.c + fileio-label.h + firewall-util.h + format-table.c + format-table.h + fsck-util.h + fstab-util.c + fstab-util.h + generator.c + generator.h + geneve-util.c + geneve-util.h + gpt.c + gpt.h + group-record.c + group-record.h + id128-print.c + id128-print.h + idn-util.c + idn-util.h + ima-util.c + ima-util.h + import-util.c + import-util.h + initreq.h + install-printf.c + install-printf.h + install.c + install.h + ipvlan-util.c + ipvlan-util.h + ip-protocol-list.c + ip-protocol-list.h + journal-importer.c + journal-importer.h + journal-util.c + journal-util.h + json-internal.h + json.c + json.h + libcrypt-util.c + libcrypt-util.h + libmount-util.h + linux/auto_dev-ioctl.h + linux/bpf.h + linux/bpf_common.h + linux/bpf_insn.h + linux/dm-ioctl.h + linux/ethtool.h + local-addresses.c + local-addresses.h + lockfile-util.c + lockfile-util.h + log-link.h + logs-show.c + logs-show.h + loop-util.c + loop-util.h + machine-image.c + machine-image.h + machine-pool.c + machine-pool.h + macvlan-util.c + macvlan-util.h + main-func.h + mkfs-util.c + mkfs-util.h + module-util.h + mount-util.c + mount-util.h + netif-naming-scheme.c + netif-naming-scheme.h + nscd-flush.c + nscd-flush.h + nsflags.c + nsflags.h + numa-util.c + numa-util.h + openssl-util.h + os-util.c + os-util.h + output-mode.c + output-mode.h + pager.c + pager.h + pe-header.h + pkcs11-util.c + pkcs11-util.h + pretty-print.c + pretty-print.h + psi-util.c + psi-util.h + ptyfwd.c + ptyfwd.h + pwquality-util.c + pwquality-util.h + qrcode-util.c + qrcode-util.h + reboot-util.c + reboot-util.h + resize-fs.c + resize-fs.h + resolve-util.c + resolve-util.h + seccomp-util.h + securebits-util.c + securebits-util.h + serialize.c + serialize.h + service-util.c + service-util.h + sleep-config.c + sleep-config.h + socket-netlink.c + socket-netlink.h + spawn-ask-password-agent.c + spawn-ask-password-agent.h + spawn-polkit-agent.c + spawn-polkit-agent.h + specifier.c + specifier.h + switch-root.c + switch-root.h + sysctl-util.c + sysctl-util.h + tmpfile-util-label.c + tmpfile-util-label.h + tomoyo-util.c + tomoyo-util.h + udev-util.c + udev-util.h + uid-range.c + uid-range.h + unit-file.c + unit-file.h + user-record-nss.c + user-record-nss.h + user-record-show.c + user-record-show.h + user-record.c + user-record.h + userdb.c + userdb.h + utmp-wtmp.h + varlink.c + varlink.h + verbs.c + verbs.h + vlan-util.c + vlan-util.h + volatile-util.c + volatile-util.h + watchdog.c + watchdog.h + web-util.c + web-util.h + wifi-util.c + wifi-util.h + xml.c + xml.h +'''.split()) + +if get_option('tests') != 'false' + shared_sources += files('tests.c', 'tests.h') +endif + +test_tables_h = files('test-tables.h') +shared_sources += test_tables_h + +generate_syscall_list = find_program('generate-syscall-list.py') +fname = 'syscall-list.h' +syscall_list_h = custom_target( + fname, + input : 'syscall-names.text', + output : fname, + command : [generate_syscall_list, + '@INPUT@'], + capture : true) + +if conf.get('HAVE_ACL') == 1 + shared_sources += files('acl-util.c') +endif + +if conf.get('ENABLE_UTMP') == 1 + shared_sources += files('utmp-wtmp.c') +endif + +if conf.get('HAVE_SECCOMP') == 1 + shared_sources += files('seccomp-util.c') + shared_sources += syscall_list_h +endif + +if conf.get('HAVE_LIBIPTC') == 1 + shared_sources += files('firewall-util.c') +endif + +if conf.get('HAVE_KMOD') == 1 + shared_sources += files('module-util.c') +endif + +if conf.get('HAVE_PAM') == 1 + shared_sources += files(''' + pam-util.c + pam-util.h +'''.split()) +endif + +generate_ip_protocol_list = find_program('generate-ip-protocol-list.sh') +ip_protocol_list_txt = custom_target( + 'ip-protocol-list.txt', + output : 'ip-protocol-list.txt', + command : [generate_ip_protocol_list, cpp], + capture : true) + +fname = 'ip-protocol-from-name.gperf' +gperf_file = custom_target( + fname, + input : ip_protocol_list_txt, + output : fname, + command : [generate_gperfs, 'ip_protocol', 'IPPROTO_', '@INPUT@'], + capture : true) + +fname = 'ip-protocol-from-name.h' +target1 = custom_target( + fname, + input : gperf_file, + output : fname, + command : [gperf, + '-L', 'ANSI-C', '-t', '--ignore-case', + '-N', 'lookup_ip_protocol', + '-H', 'hash_ip_protocol_name', + '-p', '-C', + '@INPUT@'], + capture : true) + +fname = 'ip-protocol-to-name.h' +awkscript = 'ip-protocol-to-name.awk' +target2 = custom_target( + fname, + input : [awkscript, ip_protocol_list_txt], + output : fname, + command : [awk, '-f', '@INPUT0@', '@INPUT1@'], + capture : true) + +shared_generated_gperf_headers = [target1, target2] +shared_sources += shared_generated_gperf_headers + +libshared_name = 'systemd-shared-@0@'.format(meson.project_version()) + +libshared_deps = [threads, + libacl, + libblkid, + libcap, + libcrypt, + libgcrypt, + libiptc, + libkmod, + liblz4, + libmount, + libopenssl, + libp11kit, + libpam, + librt, + libseccomp, + libselinux, + libzstd, + libxz] + +libshared_sym_path = '@0@/libshared.sym'.format(meson.current_source_dir()) + +libshared_static = static_library( + libshared_name, + shared_sources, + include_directories : includes, + dependencies : libshared_deps, + c_args : ['-fvisibility=default']) + +libshared = shared_library( + libshared_name, + libudev_sources, + include_directories : includes, + link_args : ['-shared', + '-Wl,--version-script=' + libshared_sym_path], + link_whole : [libshared_static, + libbasic, + libbasic_gcrypt, + libsystemd_static, + libjournal_client], + c_args : ['-fvisibility=default'], + dependencies : libshared_deps, + install : true, + install_dir : rootlibexecdir) + +############################################################ + +run_target( + 'syscall-names-update', + command : [syscall_names_update_sh, meson.current_source_dir()]) diff --git a/src/shared/mkfs-util.c b/src/shared/mkfs-util.c new file mode 100644 index 0000000..ce10e60 --- /dev/null +++ b/src/shared/mkfs-util.c @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "id128-util.h" +#include "mkfs-util.h" +#include "path-util.h" +#include "process-util.h" +#include "stdio-util.h" +#include "string-util.h" + +int mkfs_exists(const char *fstype) { + const char *mkfs; + int r; + + assert(fstype); + + if (STR_IN_SET(fstype, "auto", "swap")) /* these aren't real file system types, refuse early */ + return -EINVAL; + + mkfs = strjoina("mkfs.", fstype); + if (!filename_is_valid(mkfs)) /* refuse file system types with slashes and similar */ + return -EINVAL; + + r = find_executable(mkfs, NULL); + if (r == -ENOENT) + return false; + if (r < 0) + return r; + + return true; +} + +int make_filesystem( + const char *node, + const char *fstype, + const char *label, + sd_id128_t uuid, + bool discard) { + + _cleanup_free_ char *mkfs = NULL; + int r; + + assert(node); + assert(fstype); + assert(label); + + if (streq(fstype, "swap")) { + r = find_executable("mkswap", &mkfs); + if (r == -ENOENT) + return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "mkswap binary not available."); + if (r < 0) + return log_error_errno(r, "Failed to determine whether mkswap binary exists: %m"); + } else { + r = mkfs_exists(fstype); + if (r < 0) + return log_error_errno(r, "Failed to determine whether mkfs binary for %s exists: %m", fstype); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "mkfs binary for %s is not available.", fstype); + + mkfs = strjoin("mkfs.", fstype); + if (!mkfs) + return log_oom(); + } + + r = safe_fork("(mkfs)", FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_STDOUT_TO_STDERR, NULL); + if (r < 0) + return r; + if (r == 0) { + char suuid[ID128_UUID_STRING_MAX]; + + /* Child */ + id128_to_uuid_string(uuid, suuid); + + if (streq(fstype, "ext4")) + (void) execlp(mkfs, mkfs, + "-L", label, + "-U", suuid, + "-I", "256", + "-O", "has_journal", + "-m", "0", + "-E", discard ? "lazy_itable_init=1,discard" : "lazy_itable_init=1,nodiscard", + node, NULL); + + else if (streq(fstype, "btrfs")) { + if (discard) + (void) execlp(mkfs, mkfs, "-L", label, "-U", suuid, node, NULL); + else + (void) execlp(mkfs, mkfs, "-L", label, "-U", suuid, "--nodiscard", node, NULL); + + } else if (streq(fstype, "xfs")) { + const char *j; + + j = strjoina("uuid=", suuid); + if (discard) + (void) execlp(mkfs, mkfs, "-L", label, "-m", j, "-m", "reflink=1", node, NULL); + else + (void) execlp(mkfs, mkfs, "-L", label, "-m", j, "-m", "reflink=1", "-K", node, NULL); + + } else if (streq(fstype, "vfat")) { + char mangled_label[8 + 3 + 1], vol_id[8 + 1]; + + /* Classic FAT only allows 11 character uppercase labels */ + strncpy(mangled_label, label, sizeof(mangled_label)-1); + mangled_label[sizeof(mangled_label)-1] = 0; + ascii_strupper(mangled_label); + + xsprintf(vol_id, "%08" PRIx32, + ((uint32_t) uuid.bytes[0] << 24) | + ((uint32_t) uuid.bytes[1] << 16) | + ((uint32_t) uuid.bytes[2] << 8) | + ((uint32_t) uuid.bytes[3])); /* Take first 32 byte of UUID */ + + (void) execlp(mkfs, mkfs, + "-i", vol_id, + "-n", mangled_label, + "-F", "32", /* yes, we force FAT32 here */ + node, NULL); + + } else if (streq(fstype, "swap")) { + + (void) execlp(mkfs, mkfs, + "-L", label, + "-U", suuid, + node, NULL); + + } else + /* Generic fallback for all other file systems */ + (void) execlp(mkfs, mkfs, node, NULL); + + log_error_errno(errno, "Failed to execute %s: %m", mkfs); + + _exit(EXIT_FAILURE); + } + + return 0; +} diff --git a/src/shared/mkfs-util.h b/src/shared/mkfs-util.h new file mode 100644 index 0000000..7647afb --- /dev/null +++ b/src/shared/mkfs-util.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "sd-id128.h" + +int mkfs_exists(const char *fstype); + +int make_filesystem(const char *node, const char *fstype, const char *label, sd_id128_t uuid, bool discard); diff --git a/src/shared/module-util.c b/src/shared/module-util.c new file mode 100644 index 0000000..587e636 --- /dev/null +++ b/src/shared/module-util.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> + +#include "module-util.h" + +int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose) { + const int probe_flags = KMOD_PROBE_APPLY_BLACKLIST; + struct kmod_list *itr; + _cleanup_(kmod_module_unref_listp) struct kmod_list *modlist = NULL; + int r; + + /* verbose==true means we should log at non-debug level if we + * fail to find or load the module. */ + + log_debug("Loading module: %s", module); + + r = kmod_module_new_from_lookup(ctx, module, &modlist); + if (r < 0) + return log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r, + "Failed to look up module alias '%s': %m", module); + + if (!modlist) { + log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r, + "Failed to find module '%s'", module); + return -ENOENT; + } + + kmod_list_foreach(itr, modlist) { + _cleanup_(kmod_module_unrefp) struct kmod_module *mod = NULL; + int state, err; + + mod = kmod_module_get_module(itr); + state = kmod_module_get_initstate(mod); + + switch (state) { + case KMOD_MODULE_BUILTIN: + log_full(verbose ? LOG_INFO : LOG_DEBUG, + "Module '%s' is built in", kmod_module_get_name(mod)); + break; + + case KMOD_MODULE_LIVE: + log_debug("Module '%s' is already loaded", kmod_module_get_name(mod)); + break; + + default: + err = kmod_module_probe_insert_module(mod, probe_flags, + NULL, NULL, NULL, NULL); + if (err == 0) + log_full(verbose ? LOG_INFO : LOG_DEBUG, + "Inserted module '%s'", kmod_module_get_name(mod)); + else if (err == KMOD_PROBE_APPLY_BLACKLIST) + log_full(verbose ? LOG_INFO : LOG_DEBUG, + "Module '%s' is deny-listed", kmod_module_get_name(mod)); + else { + assert(err < 0); + + log_full_errno(!verbose ? LOG_DEBUG : + err == -ENODEV ? LOG_NOTICE : + err == -ENOENT ? LOG_WARNING : + LOG_ERR, + err, + "Failed to insert module '%s': %m", + kmod_module_get_name(mod)); + if (!IN_SET(err, -ENODEV, -ENOENT)) + r = err; + } + } + } + + return r; +} diff --git a/src/shared/module-util.h b/src/shared/module-util.h new file mode 100644 index 0000000..4db8c5f --- /dev/null +++ b/src/shared/module-util.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <libkmod.h> + +#include "macro.h" + +DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_ctx*, kmod_unref); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_module*, kmod_module_unref); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_list*, kmod_module_unref_list); + +int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose); diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c new file mode 100644 index 0000000..b19b384 --- /dev/null +++ b/src/shared/mount-util.c @@ -0,0 +1,744 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "hashmap.h" +#include "libmount-util.h" +#include "mount-util.h" +#include "mountpoint-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "set.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" + +int mount_fd(const char *source, + int target_fd, + const char *filesystemtype, + unsigned long mountflags, + const void *data) { + + char path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)]; + + xsprintf(path, "/proc/self/fd/%i", target_fd); + if (mount(source, path, filesystemtype, mountflags, data) < 0) { + if (errno != ENOENT) + return -errno; + + /* ENOENT can mean two things: either that the source is missing, or that /proc/ isn't + * mounted. Check for the latter to generate better error messages. */ + if (proc_mounted() == 0) + return -ENOSYS; + + return -ENOENT; + } + + return 0; +} + +int mount_nofollow( + const char *source, + const char *target, + const char *filesystemtype, + unsigned long mountflags, + const void *data) { + + _cleanup_close_ int fd = -1; + + /* In almost all cases we want to manipulate the mount table without following symlinks, hence + * mount_nofollow() is usually the way to go. The only exceptions are environments where /proc/ is + * not available yet, since we need /proc/self/fd/ for this logic to work. i.e. during the early + * initialization of namespacing/container stuff where /proc is not yet mounted (and maybe even the + * fs to mount) we can only use traditional mount() directly. + * + * Note that this disables following only for the final component of the target, i.e symlinks within + * the path of the target are honoured, as are symlinks in the source path everywhere. */ + + fd = open(target, O_PATH|O_CLOEXEC|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return mount_fd(source, fd, filesystemtype, mountflags, data); +} + +int umount_recursive(const char *prefix, int flags) { + int n = 0, r; + bool again; + + /* Try to umount everything recursively below a + * directory. Also, take care of stacked mounts, and keep + * unmounting them until they are gone. */ + + do { + _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL; + _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL; + + again = false; + + r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter); + if (r < 0) + return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m"); + + for (;;) { + struct libmnt_fs *fs; + const char *path; + + r = mnt_table_next_fs(table, iter, &fs); + if (r == 1) + break; + if (r < 0) + return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m"); + + path = mnt_fs_get_target(fs); + if (!path) + continue; + + if (!path_startswith(path, prefix)) + continue; + + if (umount2(path, flags | UMOUNT_NOFOLLOW) < 0) { + log_debug_errno(errno, "Failed to umount %s, ignoring: %m", path); + continue; + } + + log_debug("Successfully unmounted %s", path); + + again = true; + n++; + + break; + } + } while (again); + + return n; +} + +static int get_mount_flags( + struct libmnt_table *table, + const char *path, + unsigned long *ret) { + + _cleanup_close_ int fd = -1; + struct libmnt_fs *fs; + struct statvfs buf; + const char *opts; + int r; + + /* Get the mount flags for the mountpoint at "path" from "table". We have a fallback using statvfs() + * in place (which provides us with mostly the same info), but it's just a fallback, since using it + * means triggering autofs or NFS mounts, which we'd rather avoid needlessly. + * + * This generally doesn't follow symlinks. */ + + fs = mnt_table_find_target(table, path, MNT_ITER_FORWARD); + if (!fs) { + log_debug("Could not find '%s' in mount table, ignoring.", path); + goto fallback; + } + + opts = mnt_fs_get_vfs_options(fs); + if (!opts) { + *ret = 0; + return 0; + } + + r = mnt_optstr_get_flags(opts, ret, mnt_get_builtin_optmap(MNT_LINUX_MAP)); + if (r != 0) { + log_debug_errno(r, "Could not get flags for '%s', ignoring: %m", path); + goto fallback; + } + + /* MS_RELATIME is default and trying to set it in an unprivileged container causes EPERM */ + *ret &= ~MS_RELATIME; + return 0; + +fallback: + fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW); + if (fd < 0) + return -errno; + + if (fstatvfs(fd, &buf) < 0) + return -errno; + + /* The statvfs() flags and the mount flags mostly have the same values, but for some cases do + * not. Hence map the flags manually. (Strictly speaking, ST_RELATIME/MS_RELATIME is the most + * prominent one that doesn't match, but that's the one we mask away anyway, see above.) */ + + *ret = + FLAGS_SET(buf.f_flag, ST_RDONLY) * MS_RDONLY | + FLAGS_SET(buf.f_flag, ST_NODEV) * MS_NODEV | + FLAGS_SET(buf.f_flag, ST_NOEXEC) * MS_NOEXEC | + FLAGS_SET(buf.f_flag, ST_NOSUID) * MS_NOSUID | + FLAGS_SET(buf.f_flag, ST_NOATIME) * MS_NOATIME | + FLAGS_SET(buf.f_flag, ST_NODIRATIME) * MS_NODIRATIME; + + return 0; +} + +/* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it + * for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */ +int bind_remount_recursive_with_mountinfo( + const char *prefix, + unsigned long new_flags, + unsigned long flags_mask, + char **deny_list, + FILE *proc_self_mountinfo) { + + _cleanup_set_free_free_ Set *done = NULL; + _cleanup_free_ char *simplified = NULL; + int r; + + assert(prefix); + assert(proc_self_mountinfo); + + /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already + * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write + * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to + * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each + * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We + * do not have any effect on future submounts that might get propagated, they might be writable. This includes + * future submounts that have been triggered via autofs. + * + * If the "deny_list" parameter is specified it may contain a list of subtrees to exclude from the + * remount operation. Note that we'll ignore the deny list for the top-level path. */ + + simplified = strdup(prefix); + if (!simplified) + return -ENOMEM; + + path_simplify(simplified, false); + + done = set_new(&path_hash_ops); + if (!done) + return -ENOMEM; + + for (;;) { + _cleanup_set_free_free_ Set *todo = NULL; + _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL; + _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL; + bool top_autofs = false; + char *x; + unsigned long orig_flags; + + todo = set_new(&path_hash_ops); + if (!todo) + return -ENOMEM; + + rewind(proc_self_mountinfo); + + r = libmount_parse("/proc/self/mountinfo", proc_self_mountinfo, &table, &iter); + if (r < 0) + return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m"); + + for (;;) { + struct libmnt_fs *fs; + const char *path, *type; + + r = mnt_table_next_fs(table, iter, &fs); + if (r == 1) + break; + if (r < 0) + return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m"); + + path = mnt_fs_get_target(fs); + type = mnt_fs_get_fstype(fs); + if (!path || !type) + continue; + + if (!path_startswith(path, simplified)) + continue; + + /* Ignore this mount if it is deny-listed, but only if it isn't the top-level mount + * we shall operate on. */ + if (!path_equal(path, simplified)) { + bool deny_listed = false; + char **i; + + STRV_FOREACH(i, deny_list) { + if (path_equal(*i, simplified)) + continue; + + if (!path_startswith(*i, simplified)) + continue; + + if (path_startswith(path, *i)) { + deny_listed = true; + log_debug("Not remounting %s deny-listed by %s, called for %s", + path, *i, simplified); + break; + } + } + if (deny_listed) + continue; + } + + /* Let's ignore autofs mounts. If they aren't + * triggered yet, we want to avoid triggering + * them, as we don't make any guarantees for + * future submounts anyway. If they are + * already triggered, then we will find + * another entry for this. */ + if (streq(type, "autofs")) { + top_autofs = top_autofs || path_equal(path, simplified); + continue; + } + + if (!set_contains(done, path)) { + r = set_put_strdup(&todo, path); + if (r < 0) + return r; + } + } + + /* If we have no submounts to process anymore and if + * the root is either already done, or an autofs, we + * are done */ + if (set_isempty(todo) && + (top_autofs || set_contains(done, simplified))) + return 0; + + if (!set_contains(done, simplified) && + !set_contains(todo, simplified)) { + /* The prefix directory itself is not yet a mount, make it one. */ + r = mount_nofollow(simplified, simplified, NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + return r; + + orig_flags = 0; + (void) get_mount_flags(table, simplified, &orig_flags); + + r = mount_nofollow(NULL, simplified, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL); + if (r < 0) + return r; + + log_debug("Made top-level directory %s a mount point.", prefix); + + r = set_put_strdup(&done, simplified); + if (r < 0) + return r; + } + + while ((x = set_steal_first(todo))) { + + r = set_consume(done, x); + if (IN_SET(r, 0, -EEXIST)) + continue; + if (r < 0) + return r; + + /* Deal with mount points that are obstructed by a later mount */ + r = path_is_mount_point(x, NULL, 0); + if (IN_SET(r, 0, -ENOENT)) + continue; + if (r < 0) { + if (!ERRNO_IS_PRIVILEGE(r)) + return r; + + /* Even if root user invoke this, submounts under private FUSE or NFS mount points + * may not be acceessed. E.g., + * + * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt + * $ bindfs --no-allow-other ~/mnt ~/mnt + * + * Then, root user cannot access the mount point ~/mnt/mnt. + * In such cases, the submounts are ignored, as we have no way to manage them. */ + log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x); + continue; + } + + /* Try to reuse the original flag set */ + orig_flags = 0; + (void) get_mount_flags(table, x, &orig_flags); + + r = mount_nofollow(NULL, x, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL); + if (r < 0) + return r; + + log_debug("Remounted %s read-only.", x); + } + } +} + +int bind_remount_recursive( + const char *prefix, + unsigned long new_flags, + unsigned long flags_mask, + char **deny_list) { + + _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; + int r; + + r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo); + if (r < 0) + return r; + + return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, deny_list, proc_self_mountinfo); +} + +int bind_remount_one_with_mountinfo( + const char *path, + unsigned long new_flags, + unsigned long flags_mask, + FILE *proc_self_mountinfo) { + + _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL; + unsigned long orig_flags = 0; + int r; + + assert(path); + assert(proc_self_mountinfo); + + rewind(proc_self_mountinfo); + + table = mnt_new_table(); + if (!table) + return -ENOMEM; + + r = mnt_table_parse_stream(table, proc_self_mountinfo, "/proc/self/mountinfo"); + if (r < 0) + return r; + + /* Try to reuse the original flag set */ + (void) get_mount_flags(table, path, &orig_flags); + + r = mount_nofollow(NULL, path, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL); + if (r < 0) + return r; + + return 0; +} + +int mount_move_root(const char *path) { + assert(path); + + if (chdir(path) < 0) + return -errno; + + if (mount(path, "/", NULL, MS_MOVE, NULL) < 0) + return -errno; + + if (chroot(".") < 0) + return -errno; + + if (chdir("/") < 0) + return -errno; + + return 0; +} + +int repeat_unmount(const char *path, int flags) { + bool done = false; + + assert(path); + + /* If there are multiple mounts on a mount point, this + * removes them all */ + + for (;;) { + if (umount2(path, flags) < 0) { + + if (errno == EINVAL) + return done; + + return -errno; + } + + done = true; + } +} + +int mode_to_inaccessible_node( + const char *runtime_dir, + mode_t mode, + char **ret) { + + /* This function maps a node type to a corresponding inaccessible file node. These nodes are created + * during early boot by PID 1. In some cases we lacked the privs to create the character and block + * devices (maybe because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a + * devices policy that excludes device nodes with major and minor of 0), but that's fine, in that + * case we use an AF_UNIX file node instead, which is not the same, but close enough for most + * uses. And most importantly, the kernel allows bind mounts from socket nodes to any non-directory + * file nodes, and that's the most important thing that matters. + * + * Note that the runtime directory argument shall be the top-level runtime directory, i.e. /run/ if + * we operate in system context and $XDG_RUNTIME_DIR if we operate in user context. */ + + _cleanup_free_ char *d = NULL; + const char *node = NULL; + + assert(ret); + + if (!runtime_dir) + runtime_dir = "/run"; + + switch(mode & S_IFMT) { + case S_IFREG: + node = "/systemd/inaccessible/reg"; + break; + + case S_IFDIR: + node = "/systemd/inaccessible/dir"; + break; + + case S_IFCHR: + node = "/systemd/inaccessible/chr"; + break; + + case S_IFBLK: + node = "/systemd/inaccessible/blk"; + break; + + case S_IFIFO: + node = "/systemd/inaccessible/fifo"; + break; + + case S_IFSOCK: + node = "/systemd/inaccessible/sock"; + break; + } + if (!node) + return -EINVAL; + + d = path_join(runtime_dir, node); + if (!d) + return -ENOMEM; + + /* On new kernels unprivileged users are permitted to create 0:0 char device nodes (because they also + * act as whiteout inode for overlayfs), but no other char or block device nodes. On old kernels no + * device node whatsoever may be created by unprivileged processes. Hence, if the caller asks for the + * inaccessible block device node let's see if the block device node actually exists, and if not, + * fall back to the character device node. From there fall back to the socket device node. This means + * in the best case we'll get the right device node type — but if not we'll hopefully at least get a + * device node at all. */ + + if (S_ISBLK(mode) && + access(d, F_OK) < 0 && errno == ENOENT) { + free(d); + d = path_join(runtime_dir, "/systemd/inaccessible/chr"); + if (!d) + return -ENOMEM; + } + + if (IN_SET(mode & S_IFMT, S_IFBLK, S_IFCHR) && + access(d, F_OK) < 0 && errno == ENOENT) { + free(d); + d = path_join(runtime_dir, "/systemd/inaccessible/sock"); + if (!d) + return -ENOMEM; + } + + *ret = TAKE_PTR(d); + return 0; +} + +#define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "") +static char* mount_flags_to_string(long unsigned flags) { + char *x; + _cleanup_free_ char *y = NULL; + long unsigned overflow; + + overflow = flags & ~(MS_RDONLY | + MS_NOSUID | + MS_NODEV | + MS_NOEXEC | + MS_SYNCHRONOUS | + MS_REMOUNT | + MS_MANDLOCK | + MS_DIRSYNC | + MS_NOATIME | + MS_NODIRATIME | + MS_BIND | + MS_MOVE | + MS_REC | + MS_SILENT | + MS_POSIXACL | + MS_UNBINDABLE | + MS_PRIVATE | + MS_SLAVE | + MS_SHARED | + MS_RELATIME | + MS_KERNMOUNT | + MS_I_VERSION | + MS_STRICTATIME | + MS_LAZYTIME); + + if (flags == 0 || overflow != 0) + if (asprintf(&y, "%lx", overflow) < 0) + return NULL; + + x = strjoin(FLAG(MS_RDONLY), + FLAG(MS_NOSUID), + FLAG(MS_NODEV), + FLAG(MS_NOEXEC), + FLAG(MS_SYNCHRONOUS), + FLAG(MS_REMOUNT), + FLAG(MS_MANDLOCK), + FLAG(MS_DIRSYNC), + FLAG(MS_NOATIME), + FLAG(MS_NODIRATIME), + FLAG(MS_BIND), + FLAG(MS_MOVE), + FLAG(MS_REC), + FLAG(MS_SILENT), + FLAG(MS_POSIXACL), + FLAG(MS_UNBINDABLE), + FLAG(MS_PRIVATE), + FLAG(MS_SLAVE), + FLAG(MS_SHARED), + FLAG(MS_RELATIME), + FLAG(MS_KERNMOUNT), + FLAG(MS_I_VERSION), + FLAG(MS_STRICTATIME), + FLAG(MS_LAZYTIME), + y); + if (!x) + return NULL; + if (!y) + x[strlen(x) - 1] = '\0'; /* truncate the last | */ + return x; +} + +int mount_verbose_full( + int error_log_level, + const char *what, + const char *where, + const char *type, + unsigned long flags, + const char *options, + bool follow_symlink) { + + _cleanup_free_ char *fl = NULL, *o = NULL; + unsigned long f; + int r; + + r = mount_option_mangle(options, flags, &f, &o); + if (r < 0) + return log_full_errno(error_log_level, r, + "Failed to mangle mount options %s: %m", + strempty(options)); + + fl = mount_flags_to_string(f); + + if ((f & MS_REMOUNT) && !what && !type) + log_debug("Remounting %s (%s \"%s\")...", + where, strnull(fl), strempty(o)); + else if (!what && !type) + log_debug("Mounting %s (%s \"%s\")...", + where, strnull(fl), strempty(o)); + else if ((f & MS_BIND) && !type) + log_debug("Bind-mounting %s on %s (%s \"%s\")...", + what, where, strnull(fl), strempty(o)); + else if (f & MS_MOVE) + log_debug("Moving mount %s → %s (%s \"%s\")...", + what, where, strnull(fl), strempty(o)); + else + log_debug("Mounting %s (%s) on %s (%s \"%s\")...", + strna(what), strna(type), where, strnull(fl), strempty(o)); + + if (follow_symlink) + r = mount(what, where, type, f, o) < 0 ? -errno : 0; + else + r = mount_nofollow(what, where, type, f, o); + if (r < 0) + return log_full_errno(error_log_level, r, + "Failed to mount %s (type %s) on %s (%s \"%s\"): %m", + strna(what), strna(type), where, strnull(fl), strempty(o)); + return 0; +} + +int umount_verbose( + int error_log_level, + const char *what, + int flags) { + + assert(what); + + log_debug("Umounting %s...", what); + + if (umount2(what, flags) < 0) + return log_full_errno(error_log_level, errno, + "Failed to unmount %s: %m", what); + + return 0; +} + +int mount_option_mangle( + const char *options, + unsigned long mount_flags, + unsigned long *ret_mount_flags, + char **ret_remaining_options) { + + const struct libmnt_optmap *map; + _cleanup_free_ char *ret = NULL; + const char *p; + int r; + + /* This extracts mount flags from the mount options, and store + * non-mount-flag options to '*ret_remaining_options'. + * E.g., + * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000" + * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and + * "size=1630748k,mode=700,uid=1000,gid=1000". + * See more examples in test-mount-utils.c. + * + * Note that if 'options' does not contain any non-mount-flag options, + * then '*ret_remaining_options' is set to NULL instead of empty string. + * Note that this does not check validity of options stored in + * '*ret_remaining_options'. + * Note that if 'options' is NULL, then this just copies 'mount_flags' + * to '*ret_mount_flags'. */ + + assert(ret_mount_flags); + assert(ret_remaining_options); + + map = mnt_get_builtin_optmap(MNT_LINUX_MAP); + if (!map) + return -EINVAL; + + p = options; + for (;;) { + _cleanup_free_ char *word = NULL; + const struct libmnt_optmap *ent; + + r = extract_first_word(&p, &word, ",", EXTRACT_UNQUOTE); + if (r < 0) + return r; + if (r == 0) + break; + + for (ent = map; ent->name; ent++) { + /* All entries in MNT_LINUX_MAP do not take any argument. + * Thus, ent->name does not contain "=" or "[=]". */ + if (!streq(word, ent->name)) + continue; + + if (!(ent->mask & MNT_INVERT)) + mount_flags |= ent->id; + else if (mount_flags & ent->id) + mount_flags ^= ent->id; + + break; + } + + /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */ + if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL)) + return -ENOMEM; + } + + *ret_mount_flags = mount_flags; + *ret_remaining_options = TAKE_PTR(ret); + + return 0; +} diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h new file mode 100644 index 0000000..6202008 --- /dev/null +++ b/src/shared/mount-util.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <mntent.h> +#include <stdio.h> +#include <unistd.h> + +#include "errno-util.h" +#include "macro.h" + +/* 4MB for contents of regular files, 64k inodes for directories, symbolic links and device specials, using + * large storage array systems as a baseline */ +#define TMPFS_LIMITS_DEV ",size=4m,nr_inodes=64k" + +/* Very little, if any use expected */ +#define TMPFS_LIMITS_EMPTY_OR_ALMOST ",size=4m,nr_inodes=1k" +#define TMPFS_LIMITS_SYS TMPFS_LIMITS_EMPTY_OR_ALMOST +#define TMPFS_LIMITS_SYS_FS_CGROUP TMPFS_LIMITS_EMPTY_OR_ALMOST + +/* On an extremely small device with only 256MB of RAM, 20% of RAM should be enough for the re-execution of + * PID1 because 16MB of free space is required. */ +#define TMPFS_LIMITS_RUN ",size=20%,nr_inodes=800k" + +/* The limit used for various nested tmpfs mounts, in paricular for guests started by systemd-nspawn. + * 10% of RAM (using 16GB of RAM as a baseline) translates to 400k inodes (assuming 4k each) and 25% + * translates to 1M inodes. + * (On the host, /tmp is configured through a .mount unit file.) */ +#define NESTED_TMPFS_LIMITS ",size=10%,nr_inodes=400k" + +/* More space for volatile root and /var */ +#define TMPFS_LIMITS_VAR ",size=25%,nr_inodes=1m" +#define TMPFS_LIMITS_ROOTFS TMPFS_LIMITS_VAR +#define TMPFS_LIMITS_VOLATILE_STATE TMPFS_LIMITS_VAR + +int mount_fd(const char *source, int target_fd, const char *filesystemtype, unsigned long mountflags, const void *data); +int mount_nofollow(const char *source, const char *target, const char *filesystemtype, unsigned long mountflags, const void *data); + +int repeat_unmount(const char *path, int flags); +int umount_recursive(const char *target, int flags); +int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list); +int bind_remount_recursive_with_mountinfo(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list, FILE *proc_self_mountinfo); +int bind_remount_one_with_mountinfo(const char *path, unsigned long new_flags, unsigned long flags_mask, FILE *proc_self_mountinfo); + +int mount_move_root(const char *path); + +DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, endmntent); +#define _cleanup_endmntent_ _cleanup_(endmntentp) + +int mount_verbose_full( + int error_log_level, + const char *what, + const char *where, + const char *type, + unsigned long flags, + const char *options, + bool follow_symlink); + +static inline int mount_follow_verbose( + int error_log_level, + const char *what, + const char *where, + const char *type, + unsigned long flags, + const char *options) { + return mount_verbose_full(error_log_level, what, where, type, flags, options, true); +} + +static inline int mount_nofollow_verbose( + int error_log_level, + const char *what, + const char *where, + const char *type, + unsigned long flags, + const char *options) { + return mount_verbose_full(error_log_level, what, where, type, flags, options, false); +} + +int umount_verbose( + int error_log_level, + const char *where, + int flags); + +int mount_option_mangle( + const char *options, + unsigned long mount_flags, + unsigned long *ret_mount_flags, + char **ret_remaining_options); + +int mode_to_inaccessible_node(const char *runtime_dir, mode_t mode, char **dest); + +/* Useful for usage with _cleanup_(), unmounts, removes a directory and frees the pointer */ +static inline char* umount_and_rmdir_and_free(char *p) { + PROTECT_ERRNO; + (void) umount_recursive(p, 0); + (void) rmdir(p); + free(p); + return NULL; +} +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, umount_and_rmdir_and_free); diff --git a/src/shared/netif-naming-scheme.c b/src/shared/netif-naming-scheme.c new file mode 100644 index 0000000..df520ab --- /dev/null +++ b/src/shared/netif-naming-scheme.c @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "netif-naming-scheme.h" +#include "proc-cmdline.h" +#include "string-util.h" + +static const NamingScheme naming_schemes[] = { + { "v238", NAMING_V238 }, + { "v239", NAMING_V239 }, + { "v240", NAMING_V240 }, + { "v241", NAMING_V241 }, + { "v243", NAMING_V243 }, + { "v245", NAMING_V245 }, + { "v247", NAMING_V247 }, + /* … add more schemes here, as the logic to name devices is updated … */ +}; + +static const NamingScheme* naming_scheme_from_name(const char *name) { + size_t i; + + if (streq(name, "latest")) + return naming_schemes + ELEMENTSOF(naming_schemes) - 1; + + for (i = 0; i < ELEMENTSOF(naming_schemes); i++) + if (streq(naming_schemes[i].name, name)) + return naming_schemes + i; + + return NULL; +} + +const NamingScheme* naming_scheme(void) { + static const NamingScheme *cache = NULL; + _cleanup_free_ char *buffer = NULL; + const char *e, *k; + + if (cache) + return cache; + + /* Acquire setting from the kernel command line */ + (void) proc_cmdline_get_key("net.naming-scheme", 0, &buffer); + + /* Also acquire it from an env var */ + e = getenv("NET_NAMING_SCHEME"); + if (e) { + if (*e == ':') { + /* If prefixed with ':' the kernel cmdline takes precedence */ + k = buffer ?: e + 1; + } else + k = e; /* Otherwise the env var takes precedence */ + } else + k = buffer; + + if (k) { + cache = naming_scheme_from_name(k); + if (cache) { + log_info("Using interface naming scheme '%s'.", cache->name); + return cache; + } + + log_warning("Unknown interface naming scheme '%s' requested, ignoring.", k); + } + + cache = naming_scheme_from_name(DEFAULT_NET_NAMING_SCHEME); + assert(cache); + log_info("Using default interface naming scheme '%s'.", cache->name); + + return cache; +} diff --git a/src/shared/netif-naming-scheme.h b/src/shared/netif-naming-scheme.h new file mode 100644 index 0000000..503a74e --- /dev/null +++ b/src/shared/netif-naming-scheme.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "macro.h" + +/* So here's the deal: net_id is supposed to be an exercise in providing stable names for network devices. However, we + * also want to keep updating the naming scheme used in future versions of net_id. These two goals of course are + * contradictory: on one hand we want things to not change and on the other hand we want them to improve. Our way out + * of this dilemma is to introduce the "naming scheme" concept: each time we improve the naming logic we define a new + * flag for it. Then, we keep a list of schemes, each identified by a name associated with the flags it implements. Via + * a kernel command line and environment variable we then allow the user to pick the scheme they want us to follow: + * installers could "freeze" the used scheme at the moment of installation this way. + * + * Developers: each time you tweak the naming logic here, define a new flag below, and condition the tweak with + * it. Each time we do a release we'll then add a new scheme entry and include all newly defined flags. + * + * Note that this is only half a solution to the problem though: not only udev/net_id gets updated all the time, the + * kernel gets too. And thus a kernel that previously didn't expose some sysfs attribute we look for might eventually + * do, and thus affect our naming scheme too. Thus, enforcing a naming scheme will make interfacing more stable across + * OS versions, but not fully stabilize them. */ +typedef enum NamingSchemeFlags { + /* First, the individual features */ + NAMING_SR_IOV_V = 1 << 0, /* Use "v" suffix for SR-IOV, see 609948c7043a */ + NAMING_NPAR_ARI = 1 << 1, /* Use NPAR "ARI", see 6bc04997b6ea */ + NAMING_INFINIBAND = 1 << 2, /* Use "ib" prefix for infiniband, see 938d30aa98df */ + NAMING_ZERO_ACPI_INDEX = 1 << 3, /* Use zero acpi_index field, see d81186ef4f6a */ + NAMING_ALLOW_RERENAMES = 1 << 4, /* Allow re-renaming of devices, see #9006 */ + NAMING_STABLE_VIRTUAL_MACS = 1 << 5, /* Use device name to generate MAC, see 6d3646406560 */ + NAMING_NETDEVSIM = 1 << 6, /* Generate names for netdevsim devices, see eaa9d507d855 */ + NAMING_LABEL_NOPREFIX = 1 << 7, /* Don't prepend ID_NET_LABEL_ONBOARD with interface type prefix */ + NAMING_NSPAWN_LONG_HASH = 1 << 8, /* Shorten nspawn interfaces by including 24bit hash, instead of simple truncation */ + NAMING_BRIDGE_NO_SLOT = 1 << 9, /* Don't use PCI hotplug slot information if the corresponding device is a PCI bridge */ + + /* And now the masks that combine the features above */ + NAMING_V238 = 0, + NAMING_V239 = NAMING_V238 | NAMING_SR_IOV_V | NAMING_NPAR_ARI, + NAMING_V240 = NAMING_V239 | NAMING_INFINIBAND | NAMING_ZERO_ACPI_INDEX | NAMING_ALLOW_RERENAMES, + NAMING_V241 = NAMING_V240 | NAMING_STABLE_VIRTUAL_MACS, + NAMING_V243 = NAMING_V241 | NAMING_NETDEVSIM | NAMING_LABEL_NOPREFIX, + NAMING_V245 = NAMING_V243 | NAMING_NSPAWN_LONG_HASH, + NAMING_V247 = NAMING_V245 | NAMING_BRIDGE_NO_SLOT, + + _NAMING_SCHEME_FLAGS_INVALID = -1, +} NamingSchemeFlags; + +typedef struct NamingScheme { + const char *name; + NamingSchemeFlags flags; +} NamingScheme; + +const NamingScheme* naming_scheme(void); + +static inline bool naming_scheme_has(NamingSchemeFlags flags) { + return FLAGS_SET(naming_scheme()->flags, flags); +} diff --git a/src/shared/nscd-flush.c b/src/shared/nscd-flush.c new file mode 100644 index 0000000..dfc47c4 --- /dev/null +++ b/src/shared/nscd-flush.c @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include <sys/poll.h> + +#include "fd-util.h" +#include "io-util.h" +#include "nscd-flush.h" +#include "socket-util.h" +#include "strv.h" +#include "time-util.h" + +#define NSCD_FLUSH_CACHE_TIMEOUT_USEC (5*USEC_PER_SEC) + +struct nscdInvalidateRequest { + int32_t version; + int32_t type; /* in glibc this is an enum. We don't replicate this here 1:1. Also, wtf, how unportable is that + * even? */ + int32_t key_len; + char dbname[]; +}; + +static const union sockaddr_union nscd_sa = { + .un.sun_family = AF_UNIX, + .un.sun_path = "/run/nscd/socket", +}; + +static int nscd_flush_cache_one(const char *database, usec_t end) { + size_t req_size, has_written = 0, has_read = 0, l; + struct nscdInvalidateRequest *req; + _cleanup_close_ int fd = -1; + int32_t resp; + int events; + + assert(database); + + l = strlen(database); + req_size = offsetof(struct nscdInvalidateRequest, dbname) + l + 1; + + req = alloca(req_size); + *req = (struct nscdInvalidateRequest) { + .version = 2, + .type = 10, + .key_len = l + 1, + }; + + strcpy(req->dbname, database); + + fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (fd < 0) + return log_debug_errno(errno, "Failed to allocate nscd socket: %m"); + + /* Note: connect() returns EINPROGRESS if O_NONBLOCK is set and establishing a connection takes time. The + * kernel lets us know this way that the connection is now being established, and we should watch with poll() + * to learn when it is fully established. That said, AF_UNIX on Linux never triggers this IRL (connect() is + * always instant on AF_UNIX), hence handling this is mostly just an exercise in defensive, protocol-agnostic + * programming. + * + * connect() returns EAGAIN if the socket's backlog limit has been reached. When we see this we give up right + * away, after all this entire function here is written in a defensive style so that a non-responding nscd + * doesn't stall us for good. (Even if we wanted to handle this better: the Linux kernel doesn't really have a + * nice way to connect() to a server synchronously with a time limit that would also cover dealing with the + * backlog limit. After all SO_RCVTIMEO and SR_SNDTIMEO don't apply to connect(), and alarm() is frickin' ugly + * and not really reasonably usable from threads-aware code.) */ + if (connect(fd, &nscd_sa.sa, SOCKADDR_UN_LEN(nscd_sa.un)) < 0) { + if (errno == EAGAIN) + return log_debug_errno(errno, "nscd is overloaded (backlog limit reached) and refuses to take further connections: %m"); + if (errno != EINPROGRESS) + return log_debug_errno(errno, "Failed to connect to nscd socket: %m"); + + /* Continue in case of EINPROGRESS, but don't bother with send() or recv() until being notified that + * establishing the connection is complete. */ + events = 0; + } else + events = POLLIN|POLLOUT; /* Let's assume initially that we can write and read to the fd, to suppress + * one poll() invocation */ + for (;;) { + usec_t p; + + if (events & POLLOUT) { + ssize_t m; + + assert(has_written < req_size); + + m = send(fd, (uint8_t*) req + has_written, req_size - has_written, MSG_NOSIGNAL); + if (m < 0) { + if (errno != EAGAIN) /* Note that EAGAIN is returned by the kernel whenever it can't + * take the data right now, and that includes if the connect() is + * asynchronous and we saw EINPROGRESS on it, and it hasn't + * completed yet. */ + return log_debug_errno(errno, "Failed to write to nscd socket: %m"); + } else + has_written += m; + } + + if (events & (POLLIN|POLLERR|POLLHUP)) { + ssize_t m; + + if (has_read >= sizeof(resp)) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Response from nscd longer than expected: %m"); + + m = recv(fd, (uint8_t*) &resp + has_read, sizeof(resp) - has_read, 0); + if (m < 0) { + if (errno != EAGAIN) + return log_debug_errno(errno, "Failed to read from nscd socket: %m"); + } else if (m == 0) { /* EOF */ + if (has_read == 0 && has_written >= req_size) /* Older nscd immediately terminated the + * connection, accept that as OK */ + return 1; + + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "nscd prematurely ended connection."); + } else + has_read += m; + } + + if (has_written >= req_size && has_read >= sizeof(resp)) { /* done? */ + if (resp < 0) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "nscd sent us a negative error number: %i", resp); + if (resp > 0) + return log_debug_errno(resp, "nscd return failure code on invalidating '%s'.", database); + return 1; + } + + p = now(CLOCK_MONOTONIC); + if (p >= end) + return -ETIMEDOUT; + + events = fd_wait_for_event(fd, POLLIN | (has_written < req_size ? POLLOUT : 0), end - p); + if (events < 0) + return events; + } +} + +int nscd_flush_cache(char **databases) { + usec_t end; + int r = 0; + char **i; + + /* Tries to invalidate the specified database in nscd. We do this carefully, with a 5s timeout, so that we + * don't block indefinitely on another service. */ + + end = usec_add(now(CLOCK_MONOTONIC), NSCD_FLUSH_CACHE_TIMEOUT_USEC); + + STRV_FOREACH(i, databases) { + int k; + + k = nscd_flush_cache_one(*i, end); + if (k < 0 && r >= 0) + r = k; + } + + return r; +} diff --git a/src/shared/nscd-flush.h b/src/shared/nscd-flush.h new file mode 100644 index 0000000..5aafa9a --- /dev/null +++ b/src/shared/nscd-flush.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +int nscd_flush_cache(char **databases); diff --git a/src/shared/nsflags.c b/src/shared/nsflags.c new file mode 100644 index 0000000..2845041 --- /dev/null +++ b/src/shared/nsflags.c @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> + +#include "alloc-util.h" +#include "extract-word.h" +#include "nsflags.h" +#include "string-util.h" + +const struct namespace_flag_map namespace_flag_map[] = { + { CLONE_NEWCGROUP, "cgroup" }, + { CLONE_NEWIPC, "ipc" }, + { CLONE_NEWNET, "net" }, + /* So, the mount namespace flag is called CLONE_NEWNS for historical reasons. Let's expose it here under a more + * explanatory name: "mnt". This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */ + { CLONE_NEWNS, "mnt" }, + { CLONE_NEWPID, "pid" }, + { CLONE_NEWUSER, "user" }, + { CLONE_NEWUTS, "uts" }, + {} +}; + +int namespace_flags_from_string(const char *name, unsigned long *ret) { + unsigned long flags = 0; + int r; + + assert_se(ret); + + for (;;) { + _cleanup_free_ char *word = NULL; + unsigned long f = 0; + unsigned i; + + r = extract_first_word(&name, &word, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + for (i = 0; namespace_flag_map[i].name; i++) + if (streq(word, namespace_flag_map[i].name)) { + f = namespace_flag_map[i].flag; + break; + } + + if (f == 0) + return -EINVAL; + + flags |= f; + } + + *ret = flags; + return 0; +} + +int namespace_flags_to_string(unsigned long flags, char **ret) { + _cleanup_free_ char *s = NULL; + unsigned i; + + for (i = 0; namespace_flag_map[i].name; i++) { + if ((flags & namespace_flag_map[i].flag) != namespace_flag_map[i].flag) + continue; + + if (!strextend_with_separator(&s, " ", namespace_flag_map[i].name, NULL)) + return -ENOMEM; + } + + *ret = TAKE_PTR(s); + + return 0; +} diff --git a/src/shared/nsflags.h b/src/shared/nsflags.h new file mode 100644 index 0000000..3d774c7 --- /dev/null +++ b/src/shared/nsflags.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "missing_sched.h" + +/* The combination of all namespace flags defined by the kernel. The right type for this isn't clear. setns() and + * unshare() expect these flags to be passed as (signed) "int", while clone() wants them as "unsigned long". The latter + * is definitely more appropriate for a flags parameter, and also the larger type of the two, hence let's stick to that + * here. */ +#define NAMESPACE_FLAGS_ALL \ + ((unsigned long) (CLONE_NEWCGROUP| \ + CLONE_NEWIPC| \ + CLONE_NEWNET| \ + CLONE_NEWNS| \ + CLONE_NEWPID| \ + CLONE_NEWUSER| \ + CLONE_NEWUTS)) + +#define NAMESPACE_FLAGS_INITIAL ((unsigned long) -1) + +int namespace_flags_from_string(const char *name, unsigned long *ret); +int namespace_flags_to_string(unsigned long flags, char **ret); + +struct namespace_flag_map { + unsigned long flag; + const char *name; +}; + +extern const struct namespace_flag_map namespace_flag_map[]; diff --git a/src/shared/numa-util.c b/src/shared/numa-util.c new file mode 100644 index 0000000..7e41d68 --- /dev/null +++ b/src/shared/numa-util.c @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <sched.h> + +#include "alloc-util.h" +#include "cpu-set-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "macro.h" +#include "missing_syscall.h" +#include "numa-util.h" +#include "stdio-util.h" +#include "string-table.h" + +bool numa_policy_is_valid(const NUMAPolicy *policy) { + assert(policy); + + if (!mpol_is_valid(numa_policy_get_type(policy))) + return false; + + if (!policy->nodes.set && + !IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL, MPOL_PREFERRED)) + return false; + + if (policy->nodes.set && + numa_policy_get_type(policy) == MPOL_PREFERRED && + CPU_COUNT_S(policy->nodes.allocated, policy->nodes.set) != 1) + return false; + + return true; +} + +static int numa_policy_to_mempolicy(const NUMAPolicy *policy, unsigned long *ret_maxnode, unsigned long **ret_nodes) { + unsigned node, bits = 0, ulong_bits; + _cleanup_free_ unsigned long *out = NULL; + + assert(policy); + assert(ret_maxnode); + assert(ret_nodes); + + if (IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL) || + (numa_policy_get_type(policy) == MPOL_PREFERRED && !policy->nodes.set)) { + *ret_nodes = NULL; + *ret_maxnode = 0; + return 0; + } + + bits = policy->nodes.allocated * 8; + ulong_bits = sizeof(unsigned long) * 8; + + out = new0(unsigned long, DIV_ROUND_UP(policy->nodes.allocated, sizeof(unsigned long))); + if (!out) + return -ENOMEM; + + /* We don't make any assumptions about internal type libc is using to store NUMA node mask. + Hence we need to convert the node mask to the representation expected by set_mempolicy() */ + for (node = 0; node < bits; node++) + if (CPU_ISSET_S(node, policy->nodes.allocated, policy->nodes.set)) + out[node / ulong_bits] |= 1ul << (node % ulong_bits); + + *ret_nodes = TAKE_PTR(out); + *ret_maxnode = bits + 1; + return 0; +} + +int apply_numa_policy(const NUMAPolicy *policy) { + int r; + _cleanup_free_ unsigned long *nodes = NULL; + unsigned long maxnode; + + assert(policy); + + if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS) + return -EOPNOTSUPP; + + if (!numa_policy_is_valid(policy)) + return -EINVAL; + + r = numa_policy_to_mempolicy(policy, &maxnode, &nodes); + if (r < 0) + return r; + + r = set_mempolicy(numa_policy_get_type(policy), nodes, maxnode); + if (r < 0) + return -errno; + + return 0; +} + +int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *ret) { + int r; + size_t i; + _cleanup_(cpu_set_reset) CPUSet s = {}; + + assert(policy); + assert(ret); + + for (i = 0; i < policy->nodes.allocated * 8; i++) { + _cleanup_free_ char *l = NULL; + char p[STRLEN("/sys/devices/system/node/node//cpulist") + DECIMAL_STR_MAX(size_t) + 1]; + _cleanup_(cpu_set_reset) CPUSet part = {}; + + if (!CPU_ISSET_S(i, policy->nodes.allocated, policy->nodes.set)) + continue; + + xsprintf(p, "/sys/devices/system/node/node%zu/cpulist", i); + + r = read_one_line_file(p, &l); + if (r < 0) + return r; + + r = parse_cpu_set(l, &part); + if (r < 0) + return r; + + r = cpu_set_add_all(&s, &part); + if (r < 0) + return r; + } + + *ret = s; + s = (CPUSet) {}; + + return 0; +} + +static int numa_max_node(void) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + int r, max_node = 0; + + d = opendir("/sys/devices/system/node"); + if (!d) + return -errno; + + FOREACH_DIRENT(de, d, break) { + int node; + const char *n; + + (void) dirent_ensure_type(d, de); + + if (de->d_type != DT_DIR) + continue; + + n = startswith(de->d_name, "node"); + if (!n) + continue; + + r = safe_atoi(n, &node); + if (r < 0) + continue; + + if (node > max_node) + max_node = node; + } + + return max_node; +} + +int numa_mask_add_all(CPUSet *mask) { + int m; + + assert(mask); + + m = numa_max_node(); + if (m < 0) { + log_debug_errno(m, "Failed to determine maximum NUMA node index, assuming 1023: %m"); + m = 1023; /* CONFIG_NODES_SHIFT is set to 10 on x86_64, i.e. 1024 NUMA nodes in total */ + } + + for (int i = 0; i <= m; i++) { + int r; + + r = cpu_set_add(mask, i); + if (r < 0) + return r; + } + + return 0; +} + +static const char* const mpol_table[] = { + [MPOL_DEFAULT] = "default", + [MPOL_PREFERRED] = "preferred", + [MPOL_BIND] = "bind", + [MPOL_INTERLEAVE] = "interleave", + [MPOL_LOCAL] = "local", +}; + +DEFINE_STRING_TABLE_LOOKUP(mpol, int); diff --git a/src/shared/numa-util.h b/src/shared/numa-util.h new file mode 100644 index 0000000..2f736c9 --- /dev/null +++ b/src/shared/numa-util.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "cpu-set-util.h" +#include "missing_syscall.h" + +static inline bool mpol_is_valid(int t) { + return t >= MPOL_DEFAULT && t <= MPOL_LOCAL; +} + +typedef struct NUMAPolicy { + /* Always use numa_policy_get_type() to read the value */ + int type; + CPUSet nodes; +} NUMAPolicy; + +bool numa_policy_is_valid(const NUMAPolicy *p); + +static inline int numa_policy_get_type(const NUMAPolicy *p) { + return p->type < 0 ? (p->nodes.set ? MPOL_PREFERRED : -1) : p->type; +} + +static inline void numa_policy_reset(NUMAPolicy *p) { + assert(p); + cpu_set_reset(&p->nodes); + p->type = -1; +} + +int apply_numa_policy(const NUMAPolicy *policy); +int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *set); + +int numa_mask_add_all(CPUSet *mask); + +const char* mpol_to_string(int i) _const_; +int mpol_from_string(const char *s) _pure_; diff --git a/src/shared/offline-passwd.c b/src/shared/offline-passwd.c new file mode 100644 index 0000000..b607aac --- /dev/null +++ b/src/shared/offline-passwd.c @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "fd-util.h" +#include "fs-util.h" +#include "offline-passwd.h" +#include "path-util.h" +#include "user-util.h" + +DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(uid_gid_hash_ops, char, string_hash_func, string_compare_func, free); + +static int open_passwd_file(const char *root, const char *fname, FILE **ret_file) { + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = -1; + + fd = chase_symlinks_and_open(fname, root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC, &p); + if (fd < 0) + return fd; + + FILE *f = fdopen(fd, "r"); + if (!f) + return -errno; + + TAKE_FD(fd); + + log_debug("Reading %s entries from %s...", basename(fname), p); + + *ret_file = f; + return 0; +} + +static int populate_uid_cache(const char *root, Hashmap **ret) { + _cleanup_(hashmap_freep) Hashmap *cache = NULL; + int r; + + cache = hashmap_new(&uid_gid_hash_ops); + if (!cache) + return -ENOMEM; + + /* The directory list is hardcoded here: /etc is the standard, and rpm-ostree uses /usr/lib. This + * could be made configurable, but I don't see the point right now. */ + + const char *fname; + FOREACH_STRING(fname, "/etc/passwd", "/usr/lib/passwd") { + _cleanup_fclose_ FILE *f = NULL; + + r = open_passwd_file(root, fname, &f); + if (r == -ENOENT) + continue; + if (r < 0) + return r; + + struct passwd *pw; + while ((r = fgetpwent_sane(f, &pw)) > 0) { + _cleanup_free_ char *n = NULL; + + n = strdup(pw->pw_name); + if (!n) + return -ENOMEM; + + r = hashmap_put(cache, n, UID_TO_PTR(pw->pw_uid)); + if (IN_SET(r, 0 -EEXIST)) + continue; + if (r < 0) + return r; + TAKE_PTR(n); + } + } + + *ret = TAKE_PTR(cache); + return 0; +} + +static int populate_gid_cache(const char *root, Hashmap **ret) { + _cleanup_(hashmap_freep) Hashmap *cache = NULL; + int r; + + cache = hashmap_new(&uid_gid_hash_ops); + if (!cache) + return -ENOMEM; + + const char *fname; + FOREACH_STRING(fname, "/etc/group", "/usr/lib/group") { + _cleanup_fclose_ FILE *f = NULL; + + r = open_passwd_file(root, fname, &f); + if (r == -ENOENT) + continue; + if (r < 0) + return r; + + struct group *gr; + while ((r = fgetgrent_sane(f, &gr)) > 0) { + _cleanup_free_ char *n = NULL; + + n = strdup(gr->gr_name); + if (!n) + return -ENOMEM; + + r = hashmap_put(cache, n, GID_TO_PTR(gr->gr_gid)); + if (IN_SET(r, 0, -EEXIST)) + continue; + if (r < 0) + return r; + TAKE_PTR(n); + } + } + + *ret = TAKE_PTR(cache); + return 0; +} + +int name_to_uid_offline( + const char *root, + const char *user, + uid_t *ret_uid, + Hashmap **cache) { + + void *found; + int r; + + assert(user); + assert(ret_uid); + assert(cache); + + if (!*cache) { + r = populate_uid_cache(root, cache); + if (r < 0) + return r; + } + + found = hashmap_get(*cache, user); + if (!found) + return -ESRCH; + + *ret_uid = PTR_TO_UID(found); + return 0; +} + +int name_to_gid_offline( + const char *root, + const char *group, + gid_t *ret_gid, + Hashmap **cache) { + + void *found; + int r; + + assert(group); + assert(ret_gid); + assert(cache); + + if (!*cache) { + r = populate_gid_cache(root, cache); + if (r < 0) + return r; + } + + found = hashmap_get(*cache, group); + if (!found) + return -ESRCH; + + *ret_gid = PTR_TO_GID(found); + return 0; +} diff --git a/src/shared/offline-passwd.h b/src/shared/offline-passwd.h new file mode 100644 index 0000000..587af7b --- /dev/null +++ b/src/shared/offline-passwd.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <sys/types.h> + +#include "hashmap.h" + +int name_to_uid_offline(const char *root, const char *user, uid_t *ret_uid, Hashmap **cache); +int name_to_gid_offline(const char *root, const char *group, gid_t *ret_gid, Hashmap **cache); diff --git a/src/shared/openssl-util.h b/src/shared/openssl-util.h new file mode 100644 index 0000000..1b49834 --- /dev/null +++ b/src/shared/openssl-util.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#if HAVE_OPENSSL +# include <openssl/pem.h> + +DEFINE_TRIVIAL_CLEANUP_FUNC(X509*, X509_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(X509_NAME*, X509_NAME_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(EVP_PKEY_CTX*, EVP_PKEY_CTX_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(EVP_CIPHER_CTX*, EVP_CIPHER_CTX_free); + +#endif diff --git a/src/shared/os-util.c b/src/shared/os-util.c new file mode 100644 index 0000000..3b7e495 --- /dev/null +++ b/src/shared/os-util.c @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "env-file.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "macro.h" +#include "os-util.h" +#include "string-util.h" +#include "strv.h" + +int path_is_os_tree(const char *path) { + int r; + + assert(path); + + /* Does the path exist at all? If not, generate an error immediately. This is useful so that a missing root dir + * always results in -ENOENT, and we can properly distinguish the case where the whole root doesn't exist from + * the case where just the os-release file is missing. */ + if (laccess(path, F_OK) < 0) + return -errno; + + /* We use {/etc|/usr/lib}/os-release as flag file if something is an OS */ + r = open_os_release(path, NULL, NULL); + if (r == -ENOENT) /* We got nothing */ + return 0; + if (r < 0) + return r; + + return 1; +} + +int open_os_release(const char *root, char **ret_path, int *ret_fd) { + _cleanup_free_ char *q = NULL; + const char *p; + int r, fd; + + FOREACH_STRING(p, "/etc/os-release", "/usr/lib/os-release") { + r = chase_symlinks(p, root, CHASE_PREFIX_ROOT, + ret_path ? &q : NULL, + ret_fd ? &fd : NULL); + if (r != -ENOENT) + break; + } + if (r < 0) + return r; + + if (ret_fd) { + int real_fd; + + /* Convert the O_PATH fd into a proper, readable one */ + real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NOCTTY); + safe_close(fd); + if (real_fd < 0) + return real_fd; + + *ret_fd = real_fd; + } + + if (ret_path) + *ret_path = TAKE_PTR(q); + + return 0; +} + +int fopen_os_release(const char *root, char **ret_path, FILE **ret_file) { + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = -1; + FILE *f; + int r; + + if (!ret_file) + return open_os_release(root, ret_path, NULL); + + r = open_os_release(root, ret_path ? &p : NULL, &fd); + if (r < 0) + return r; + + f = take_fdopen(&fd, "r"); + if (!f) + return -errno; + + *ret_file = f; + + if (ret_path) + *ret_path = TAKE_PTR(p); + + return 0; +} + +int parse_os_release(const char *root, ...) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *p = NULL; + va_list ap; + int r; + + r = fopen_os_release(root, &p, &f); + if (r < 0) + return r; + + va_start(ap, root); + r = parse_env_filev(f, p, ap); + va_end(ap); + + return r; +} + +int load_os_release_pairs(const char *root, char ***ret) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *p = NULL; + int r; + + r = fopen_os_release(root, &p, &f); + if (r < 0) + return r; + + return load_env_file_pairs(f, p, ret); +} + +int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret) { + _cleanup_strv_free_ char **os_release_pairs = NULL, **os_release_pairs_prefixed = NULL; + char **p, **q; + int r; + + r = load_os_release_pairs(root, &os_release_pairs); + if (r < 0) + return r; + + STRV_FOREACH_PAIR(p, q, os_release_pairs) { + char *line; + + /* We strictly return only the four main ID fields and ignore the rest */ + if (!STR_IN_SET(*p, "ID", "VERSION_ID", "BUILD_ID", "VARIANT_ID")) + continue; + + ascii_strlower(*p); + line = strjoin(prefix, *p, "=", *q); + if (!line) + return -ENOMEM; + r = strv_consume(&os_release_pairs_prefixed, line); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(os_release_pairs_prefixed); + + return 0; +} diff --git a/src/shared/os-util.h b/src/shared/os-util.h new file mode 100644 index 0000000..1d9b0b1 --- /dev/null +++ b/src/shared/os-util.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdio.h> + +int path_is_os_tree(const char *path); + +int open_os_release(const char *root, char **ret_path, int *ret_fd); +int fopen_os_release(const char *root, char **ret_path, FILE **ret_file); + +int parse_os_release(const char *root, ...) _sentinel_; +int load_os_release_pairs(const char *root, char ***ret); +int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret); diff --git a/src/shared/output-mode.c b/src/shared/output-mode.c new file mode 100644 index 0000000..1645b75 --- /dev/null +++ b/src/shared/output-mode.c @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "output-mode.h" +#include "string-table.h" + +JsonFormatFlags output_mode_to_json_format_flags(OutputMode m) { + + switch (m) { + + case OUTPUT_JSON_SSE: + return JSON_FORMAT_SSE; + + case OUTPUT_JSON_SEQ: + return JSON_FORMAT_SEQ; + + case OUTPUT_JSON_PRETTY: + return JSON_FORMAT_PRETTY; + + default: + return JSON_FORMAT_NEWLINE; + } +} + +static const char *const output_mode_table[_OUTPUT_MODE_MAX] = { + [OUTPUT_SHORT] = "short", + [OUTPUT_SHORT_FULL] = "short-full", + [OUTPUT_SHORT_ISO] = "short-iso", + [OUTPUT_SHORT_ISO_PRECISE] = "short-iso-precise", + [OUTPUT_SHORT_PRECISE] = "short-precise", + [OUTPUT_SHORT_MONOTONIC] = "short-monotonic", + [OUTPUT_SHORT_UNIX] = "short-unix", + [OUTPUT_VERBOSE] = "verbose", + [OUTPUT_EXPORT] = "export", + [OUTPUT_JSON] = "json", + [OUTPUT_JSON_PRETTY] = "json-pretty", + [OUTPUT_JSON_SSE] = "json-sse", + [OUTPUT_JSON_SEQ] = "json-seq", + [OUTPUT_CAT] = "cat", + [OUTPUT_WITH_UNIT] = "with-unit", +}; + +DEFINE_STRING_TABLE_LOOKUP(output_mode, OutputMode); diff --git a/src/shared/output-mode.h b/src/shared/output-mode.h new file mode 100644 index 0000000..a879054 --- /dev/null +++ b/src/shared/output-mode.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "json.h" +#include "macro.h" + +typedef enum OutputMode { + OUTPUT_SHORT, + OUTPUT_SHORT_FULL, + OUTPUT_SHORT_ISO, + OUTPUT_SHORT_ISO_PRECISE, + OUTPUT_SHORT_PRECISE, + OUTPUT_SHORT_MONOTONIC, + OUTPUT_SHORT_UNIX, + OUTPUT_VERBOSE, + OUTPUT_EXPORT, + OUTPUT_JSON, + OUTPUT_JSON_PRETTY, + OUTPUT_JSON_SSE, + OUTPUT_JSON_SEQ, + OUTPUT_CAT, + OUTPUT_WITH_UNIT, + _OUTPUT_MODE_MAX, + _OUTPUT_MODE_INVALID = -1 +} OutputMode; + +static inline bool OUTPUT_MODE_IS_JSON(OutputMode m) { + return IN_SET(m, OUTPUT_JSON, OUTPUT_JSON_PRETTY, OUTPUT_JSON_SSE, OUTPUT_JSON_SEQ); +} + +/* The output flags definitions are shared by the logs and process tree output. Some apply to both, some only to the + * logs output, others only to the process tree output. */ + +typedef enum OutputFlags { + OUTPUT_SHOW_ALL = 1 << 0, + OUTPUT_WARN_CUTOFF = 1 << 1, + OUTPUT_FULL_WIDTH = 1 << 2, + OUTPUT_COLOR = 1 << 3, + OUTPUT_CATALOG = 1 << 4, + OUTPUT_BEGIN_NEWLINE = 1 << 5, + OUTPUT_UTC = 1 << 6, + OUTPUT_KERNEL_THREADS = 1 << 7, + OUTPUT_NO_HOSTNAME = 1 << 8, +} OutputFlags; + +JsonFormatFlags output_mode_to_json_format_flags(OutputMode m); + +const char* output_mode_to_string(OutputMode m) _const_; +OutputMode output_mode_from_string(const char *s) _pure_; diff --git a/src/shared/pager.c b/src/shared/pager.c new file mode 100644 index 0000000..f689d9f --- /dev/null +++ b/src/shared/pager.c @@ -0,0 +1,331 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/prctl.h> +#include <unistd.h> + +#include "sd-login.h" + +#include "copy.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "io-util.h" +#include "locale-util.h" +#include "log.h" +#include "macro.h" +#include "pager.h" +#include "process-util.h" +#include "rlimit-util.h" +#include "signal-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "util.h" + +static pid_t pager_pid = 0; + +static int stored_stdout = -1; +static int stored_stderr = -1; +static bool stdout_redirected = false; +static bool stderr_redirected = false; + +_noreturn_ static void pager_fallback(void) { + int r; + + r = copy_bytes(STDIN_FILENO, STDOUT_FILENO, (uint64_t) -1, 0); + if (r < 0) { + log_error_errno(r, "Internal pager failed: %m"); + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); +} + +static int no_quit_on_interrupt(int exe_name_fd, const char *less_opts) { + _cleanup_fclose_ FILE *file = NULL; + _cleanup_free_ char *line = NULL; + int r; + + assert(exe_name_fd >= 0); + assert(less_opts); + + /* This takes ownership of exe_name_fd */ + file = fdopen(exe_name_fd, "r"); + if (!file) { + safe_close(exe_name_fd); + return log_error_errno(errno, "Failed to create FILE object: %m"); + } + + /* Find the last line */ + for (;;) { + _cleanup_free_ char *t = NULL; + + r = read_line(file, LONG_LINE_MAX, &t); + if (r < 0) + return log_error_errno(r, "Failed to read from socket: %m"); + if (r == 0) + break; + + free_and_replace(line, t); + } + + /* We only treat "less" specially. + * Return true whenever option K is *not* set. */ + r = streq_ptr(line, "less") && !strchr(less_opts, 'K'); + + log_debug("Pager executable is \"%s\", options \"%s\", quit_on_interrupt: %s", + strnull(line), less_opts, yes_no(!r)); + return r; +} + +int pager_open(PagerFlags flags) { + _cleanup_close_pair_ int fd[2] = { -1, -1 }, exe_name_pipe[2] = { -1, -1 }; + _cleanup_strv_free_ char **pager_args = NULL; + const char *pager, *less_opts; + int r; + + if (flags & PAGER_DISABLE) + return 0; + + if (pager_pid > 0) + return 1; + + if (terminal_is_dumb()) + return 0; + + if (!is_main_thread()) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Pager invoked from wrong thread."); + + pager = getenv("SYSTEMD_PAGER"); + if (!pager) + pager = getenv("PAGER"); + + if (pager) { + pager_args = strv_split(pager, WHITESPACE); + if (!pager_args) + return log_oom(); + + /* If the pager is explicitly turned off, honour it */ + if (strv_isempty(pager_args) || strv_equal(pager_args, STRV_MAKE("cat"))) + return 0; + } + + /* Determine and cache number of columns/lines before we spawn the pager so that we get the value from the + * actual tty */ + (void) columns(); + (void) lines(); + + if (pipe2(fd, O_CLOEXEC) < 0) + return log_error_errno(errno, "Failed to create pager pipe: %m"); + + /* This is a pipe to feed the name of the executed pager binary into the parent */ + if (pipe2(exe_name_pipe, O_CLOEXEC) < 0) + return log_error_errno(errno, "Failed to create exe_name pipe: %m"); + + /* Initialize a good set of less options */ + less_opts = getenv("SYSTEMD_LESS"); + if (!less_opts) + less_opts = "FRSXMK"; + if (flags & PAGER_JUMP_TO_END) + less_opts = strjoina(less_opts, " +G"); + + /* We set SIGINT as PR_DEATHSIG signal here, to match the "K" parameter we set in $LESS, which enables SIGINT behaviour. */ + r = safe_fork("(pager)", FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGINT|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pager_pid); + if (r < 0) + return r; + if (r == 0) { + const char *less_charset, *exe; + + /* In the child start the pager */ + + if (dup2(fd[0], STDIN_FILENO) < 0) { + log_error_errno(errno, "Failed to duplicate file descriptor to STDIN: %m"); + _exit(EXIT_FAILURE); + } + + safe_close_pair(fd); + + if (setenv("LESS", less_opts, 1) < 0) { + log_error_errno(errno, "Failed to set environment variable LESS: %m"); + _exit(EXIT_FAILURE); + } + + /* Initialize a good charset for less. This is particularly important if we output UTF-8 + * characters. */ + less_charset = getenv("SYSTEMD_LESSCHARSET"); + if (!less_charset && is_locale_utf8()) + less_charset = "utf-8"; + if (less_charset && + setenv("LESSCHARSET", less_charset, 1) < 0) { + log_error_errno(errno, "Failed to set environment variable LESSCHARSET: %m"); + _exit(EXIT_FAILURE); + } + + /* People might invoke us from sudo, don't needlessly allow less to be a way to shell out + * privileged stuff. If the user set $SYSTEMD_PAGERSECURE, trust their configuration of the + * pager. If they didn't, use secure mode when under euid is changed. If $SYSTEMD_PAGERSECURE + * wasn't explicitly set, and we autodetect the need for secure mode, only use the pager we + * know to be good. */ + int use_secure_mode = getenv_bool_secure("SYSTEMD_PAGERSECURE"); + bool trust_pager = use_secure_mode >= 0; + if (use_secure_mode == -ENXIO) { + uid_t uid; + + r = sd_pid_get_owner_uid(0, &uid); + if (r < 0) + log_debug_errno(r, "sd_pid_get_owner_uid() failed, enabling pager secure mode: %m"); + + use_secure_mode = r < 0 || uid != geteuid(); + + } else if (use_secure_mode < 0) { + log_warning_errno(use_secure_mode, "Unable to parse $SYSTEMD_PAGERSECURE, assuming true: %m"); + use_secure_mode = true; + } + + /* We generally always set variables used by less, even if we end up using a different pager. + * They shouldn't hurt in any case, and ideally other pagers would look at them too. */ + r = set_unset_env("LESSSECURE", use_secure_mode ? "1" : NULL, true); + if (r < 0) { + log_error_errno(r, "Failed to adjust environment variable LESSSECURE: %m"); + _exit(EXIT_FAILURE); + } + + if (trust_pager && pager_args) { /* The pager config might be set globally, and we cannot + * know if the user adjusted it to be appropriate for the + * secure mode. Thus, start the pager specified through + * envvars only when $SYSTEMD_PAGERSECURE was explicitly set + * as well. */ + r = loop_write(exe_name_pipe[1], pager_args[0], strlen(pager_args[0]) + 1, false); + if (r < 0) { + log_error_errno(r, "Failed to write pager name to socket: %m"); + _exit(EXIT_FAILURE); + } + + execvp(pager_args[0], pager_args); + log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno, + "Failed to execute '%s', using fallback pagers: %m", pager_args[0]); + } + + /* Debian's alternatives command for pagers is called 'pager'. Note that we do not call + * sensible-pagers here, since that is just a shell script that implements a logic that is + * similar to this one anyway, but is Debian-specific. */ + FOREACH_STRING(exe, "pager", "less", "more") { + /* Only less implements secure mode right now. */ + if (use_secure_mode && !streq(exe, "less")) + continue; + + r = loop_write(exe_name_pipe[1], exe, strlen(exe) + 1, false); + if (r < 0) { + log_error_errno(r, "Failed to write pager name to socket: %m"); + _exit(EXIT_FAILURE); + } + execlp(exe, exe, NULL); + log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno, + "Failed to execute '%s', using next fallback pager: %m", exe); + } + + /* Our builtin is also very secure. */ + r = loop_write(exe_name_pipe[1], "(built-in)", strlen("(built-in)") + 1, false); + if (r < 0) { + log_error_errno(r, "Failed to write pager name to socket: %m"); + _exit(EXIT_FAILURE); + } + /* Close pipe to signal the parent to start sending data */ + safe_close_pair(exe_name_pipe); + pager_fallback(); + /* not reached */ + } + + /* Return in the parent */ + stored_stdout = fcntl(STDOUT_FILENO, F_DUPFD_CLOEXEC, 3); + if (dup2(fd[1], STDOUT_FILENO) < 0) { + stored_stdout = safe_close(stored_stdout); + return log_error_errno(errno, "Failed to duplicate pager pipe: %m"); + } + stdout_redirected = true; + + stored_stderr = fcntl(STDERR_FILENO, F_DUPFD_CLOEXEC, 3); + if (dup2(fd[1], STDERR_FILENO) < 0) { + stored_stderr = safe_close(stored_stderr); + return log_error_errno(errno, "Failed to duplicate pager pipe: %m"); + } + stderr_redirected = true; + + exe_name_pipe[1] = safe_close(exe_name_pipe[1]); + + r = no_quit_on_interrupt(TAKE_FD(exe_name_pipe[0]), less_opts); + if (r < 0) + return r; + if (r > 0) + (void) ignore_signals(SIGINT, -1); + + return 1; +} + +void pager_close(void) { + + if (pager_pid <= 0) + return; + + /* Inform pager that we are done */ + (void) fflush(stdout); + if (stdout_redirected) + if (stored_stdout < 0 || dup2(stored_stdout, STDOUT_FILENO) < 0) + (void) close(STDOUT_FILENO); + stored_stdout = safe_close(stored_stdout); + (void) fflush(stderr); + if (stderr_redirected) + if (stored_stderr < 0 || dup2(stored_stderr, STDERR_FILENO) < 0) + (void) close(STDERR_FILENO); + stored_stderr = safe_close(stored_stderr); + stdout_redirected = stderr_redirected = false; + + (void) kill(pager_pid, SIGCONT); + (void) wait_for_terminate(pager_pid, NULL); + pager_pid = 0; +} + +bool pager_have(void) { + return pager_pid > 0; +} + +int show_man_page(const char *desc, bool null_stdio) { + const char *args[4] = { "man", NULL, NULL, NULL }; + char *e = NULL; + pid_t pid; + size_t k; + int r; + + k = strlen(desc); + + if (desc[k-1] == ')') + e = strrchr(desc, '('); + + if (e) { + char *page = NULL, *section = NULL; + + page = strndupa(desc, e - desc); + section = strndupa(e + 1, desc + k - e - 2); + + args[1] = section; + args[2] = page; + } else + args[1] = desc; + + r = safe_fork("(man)", FORK_RESET_SIGNALS|FORK_DEATHSIG|(null_stdio ? FORK_NULL_STDIO : 0)|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid); + if (r < 0) + return r; + if (r == 0) { + /* Child */ + execvp(args[0], (char**) args); + log_error_errno(errno, "Failed to execute man: %m"); + _exit(EXIT_FAILURE); + } + + return wait_for_terminate_and_check(NULL, pid, 0); +} diff --git a/src/shared/pager.h b/src/shared/pager.h new file mode 100644 index 0000000..b3b1b4f --- /dev/null +++ b/src/shared/pager.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "macro.h" + +typedef enum PagerFlags { + PAGER_DISABLE = 1 << 0, + PAGER_JUMP_TO_END = 1 << 1, +} PagerFlags; + +int pager_open(PagerFlags flags); +void pager_close(void); +bool pager_have(void) _pure_; + +int show_man_page(const char *page, bool null_stdio); diff --git a/src/shared/pam-util.c b/src/shared/pam-util.c new file mode 100644 index 0000000..621e7fe --- /dev/null +++ b/src/shared/pam-util.c @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <security/pam_ext.h> +#include <syslog.h> +#include <stdlib.h> + +#include "alloc-util.h" +#include "errno-util.h" +#include "macro.h" +#include "pam-util.h" + +int pam_log_oom(pam_handle_t *handle) { + /* This is like log_oom(), but uses PAM logging */ + pam_syslog(handle, LOG_ERR, "Out of memory."); + return PAM_BUF_ERR; +} + +int pam_bus_log_create_error(pam_handle_t *handle, int r) { + /* This is like bus_log_create_error(), but uses PAM logging */ + pam_syslog(handle, LOG_ERR, "Failed to create bus message: %s", strerror_safe(r)); + return PAM_BUF_ERR; +} + +int pam_bus_log_parse_error(pam_handle_t *handle, int r) { + /* This is like bus_log_parse_error(), but uses PAM logging */ + pam_syslog(handle, LOG_ERR, "Failed to parse bus message: %s", strerror_safe(r)); + return PAM_BUF_ERR; +} + +static void cleanup_system_bus(pam_handle_t *handle, void *data, int error_status) { + sd_bus_flush_close_unref(data); +} + +int pam_acquire_bus_connection(pam_handle_t *handle, sd_bus **ret) { + _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL; + int r; + + assert(handle); + assert(ret); + + /* We cache the bus connection so that we can share it between the session and the authentication hooks */ + r = pam_get_data(handle, "systemd-system-bus", (const void**) &bus); + if (r == PAM_SUCCESS && bus) { + *ret = sd_bus_ref(TAKE_PTR(bus)); /* Increase the reference counter, so that the PAM data stays valid */ + return PAM_SUCCESS; + } + if (!IN_SET(r, PAM_SUCCESS, PAM_NO_MODULE_DATA)) { + pam_syslog(handle, LOG_ERR, "Failed to get bus connection: %s", pam_strerror(handle, r)); + return r; + } + + r = sd_bus_open_system(&bus); + if (r < 0) { + pam_syslog(handle, LOG_ERR, "Failed to connect to system bus: %s", strerror_safe(r)); + return PAM_SERVICE_ERR; + } + + r = pam_set_data(handle, "systemd-system-bus", bus, cleanup_system_bus); + if (r != PAM_SUCCESS) { + pam_syslog(handle, LOG_ERR, "Failed to set PAM bus data: %s", pam_strerror(handle, r)); + return r; + } + + sd_bus_ref(bus); + *ret = TAKE_PTR(bus); + + return PAM_SUCCESS; +} + +int pam_release_bus_connection(pam_handle_t *handle) { + int r; + + r = pam_set_data(handle, "systemd-system-bus", NULL, NULL); + if (r != PAM_SUCCESS) + pam_syslog(handle, LOG_ERR, "Failed to release PAM user record data: %s", pam_strerror(handle, r)); + + return r; +} + +void pam_cleanup_free(pam_handle_t *handle, void *data, int error_status) { + /* A generic destructor for pam_set_data() that just frees the specified data */ + free(data); +} diff --git a/src/shared/pam-util.h b/src/shared/pam-util.h new file mode 100644 index 0000000..41f1835 --- /dev/null +++ b/src/shared/pam-util.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <security/pam_modules.h> + +#include "sd-bus.h" + +int pam_log_oom(pam_handle_t *handle); +int pam_bus_log_create_error(pam_handle_t *handle, int r); +int pam_bus_log_parse_error(pam_handle_t *handle, int r); + +int pam_acquire_bus_connection(pam_handle_t *handle, sd_bus **ret); +int pam_release_bus_connection(pam_handle_t *handle); + +void pam_cleanup_free(pam_handle_t *handle, void *data, int error_status); diff --git a/src/shared/pe-header.h b/src/shared/pe-header.h new file mode 100644 index 0000000..54433c7 --- /dev/null +++ b/src/shared/pe-header.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#pragma once + +#include <inttypes.h> + +#include "macro.h" +#include "sparse-endian.h" + +struct DosFileHeader { + uint8_t Magic[2]; + le16_t LastSize; + le16_t nBlocks; + le16_t nReloc; + le16_t HdrSize; + le16_t MinAlloc; + le16_t MaxAlloc; + le16_t ss; + le16_t sp; + le16_t Checksum; + le16_t ip; + le16_t cs; + le16_t RelocPos; + le16_t nOverlay; + le16_t reserved[4]; + le16_t OEMId; + le16_t OEMInfo; + le16_t reserved2[10]; + le32_t ExeHeader; +} _packed_; + +#define PE_HEADER_MACHINE_I386 0x014cU +#define PE_HEADER_MACHINE_X64 0x8664U + +struct PeFileHeader { + le16_t Machine; + le16_t NumberOfSections; + le32_t TimeDateStamp; + le32_t PointerToSymbolTable; + le32_t NumberOfSymbols; + le16_t SizeOfOptionalHeader; + le16_t Characteristics; +} _packed_; + +struct PeHeader { + uint8_t Magic[4]; + struct PeFileHeader FileHeader; +} _packed_; + +struct PeSectionHeader { + uint8_t Name[8]; + le32_t VirtualSize; + le32_t VirtualAddress; + le32_t SizeOfRawData; + le32_t PointerToRawData; + le32_t PointerToRelocations; + le32_t PointerToLinenumbers; + le16_t NumberOfRelocations; + le16_t NumberOfLinenumbers; + le32_t Characteristics; + } _packed_; diff --git a/src/shared/pkcs11-util.c b/src/shared/pkcs11-util.c new file mode 100644 index 0000000..e74f0be --- /dev/null +++ b/src/shared/pkcs11-util.c @@ -0,0 +1,932 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> + +#include "ask-password-api.h" +#include "escape.h" +#include "fd-util.h" +#include "io-util.h" +#include "memory-util.h" +#if HAVE_OPENSSL +#include "openssl-util.h" +#endif +#include "pkcs11-util.h" +#include "random-util.h" +#include "string-util.h" +#include "strv.h" + +bool pkcs11_uri_valid(const char *uri) { + const char *p; + + /* A very superficial checker for RFC7512 PKCS#11 URI syntax */ + + if (isempty(uri)) + return false; + + p = startswith(uri, "pkcs11:"); + if (!p) + return false; + + if (isempty(p)) + return false; + + if (!in_charset(p, ALPHANUMERICAL "-_?;&%=")) + return false; + + return true; +} + +#if HAVE_P11KIT + +int uri_from_string(const char *p, P11KitUri **ret) { + _cleanup_(p11_kit_uri_freep) P11KitUri *uri = NULL; + + assert(p); + assert(ret); + + uri = p11_kit_uri_new(); + if (!uri) + return -ENOMEM; + + if (p11_kit_uri_parse(p, P11_KIT_URI_FOR_ANY, uri) != P11_KIT_URI_OK) + return -EINVAL; + + *ret = TAKE_PTR(uri); + return 0; +} + +P11KitUri *uri_from_module_info(const CK_INFO *info) { + P11KitUri *uri; + + assert(info); + + uri = p11_kit_uri_new(); + if (!uri) + return NULL; + + *p11_kit_uri_get_module_info(uri) = *info; + return uri; +} + +P11KitUri *uri_from_slot_info(const CK_SLOT_INFO *slot_info) { + P11KitUri *uri; + + assert(slot_info); + + uri = p11_kit_uri_new(); + if (!uri) + return NULL; + + *p11_kit_uri_get_slot_info(uri) = *slot_info; + return uri; +} + +P11KitUri *uri_from_token_info(const CK_TOKEN_INFO *token_info) { + P11KitUri *uri; + + assert(token_info); + + uri = p11_kit_uri_new(); + if (!uri) + return NULL; + + *p11_kit_uri_get_token_info(uri) = *token_info; + return uri; +} + +CK_RV pkcs11_get_slot_list_malloc( + CK_FUNCTION_LIST *m, + CK_SLOT_ID **ret_slotids, + CK_ULONG *ret_n_slotids) { + + CK_RV rv; + + assert(m); + assert(ret_slotids); + assert(ret_n_slotids); + + for (unsigned tries = 0; tries < 16; tries++) { + _cleanup_free_ CK_SLOT_ID *slotids = NULL; + CK_ULONG n_slotids = 0; + + rv = m->C_GetSlotList(0, NULL, &n_slotids); + if (rv != CKR_OK) + return rv; + if (n_slotids == 0) { + *ret_slotids = NULL; + *ret_n_slotids = 0; + return CKR_OK; + } + + slotids = new(CK_SLOT_ID, n_slotids); + if (!slotids) + return CKR_HOST_MEMORY; + + rv = m->C_GetSlotList(0, slotids, &n_slotids); + if (rv == CKR_OK) { + *ret_slotids = TAKE_PTR(slotids); + *ret_n_slotids = n_slotids; + return CKR_OK; + } + + if (rv != CKR_BUFFER_TOO_SMALL) + return rv; + + /* Hu? Maybe somebody plugged something in and things changed? Let's try again */ + } + + return CKR_BUFFER_TOO_SMALL; +} + +char *pkcs11_token_label(const CK_TOKEN_INFO *token_info) { + char *t; + + /* The label is not NUL terminated and likely padded with spaces, let's make a copy here, so that we + * can strip that. */ + t = strndup((char*) token_info->label, sizeof(token_info->label)); + if (!t) + return NULL; + + strstrip(t); + return t; +} + +char *pkcs11_token_manufacturer_id(const CK_TOKEN_INFO *token_info) { + char *t; + + t = strndup((char*) token_info->manufacturerID, sizeof(token_info->manufacturerID)); + if (!t) + return NULL; + + strstrip(t); + return t; +} + +char *pkcs11_token_model(const CK_TOKEN_INFO *token_info) { + char *t; + + t = strndup((char*) token_info->model, sizeof(token_info->model)); + if (!t) + return NULL; + + strstrip(t); + return t; +} + +int pkcs11_token_login( + CK_FUNCTION_LIST *m, + CK_SESSION_HANDLE session, + CK_SLOT_ID slotid, + const CK_TOKEN_INFO *token_info, + const char *friendly_name, + const char *icon_name, + const char *keyname, + usec_t until, + char **ret_used_pin) { + + _cleanup_free_ char *token_uri_string = NULL, *token_uri_escaped = NULL, *id = NULL, *token_label = NULL; + _cleanup_(p11_kit_uri_freep) P11KitUri *token_uri = NULL; + CK_TOKEN_INFO updated_token_info; + int uri_result, r; + CK_RV rv; + + assert(m); + assert(token_info); + + token_label = pkcs11_token_label(token_info); + if (!token_label) + return log_oom(); + + token_uri = uri_from_token_info(token_info); + if (!token_uri) + return log_oom(); + + uri_result = p11_kit_uri_format(token_uri, P11_KIT_URI_FOR_ANY, &token_uri_string); + if (uri_result != P11_KIT_URI_OK) + return log_warning_errno(SYNTHETIC_ERRNO(EAGAIN), "Failed to format slot URI: %s", p11_kit_uri_message(uri_result)); + + if (FLAGS_SET(token_info->flags, CKF_PROTECTED_AUTHENTICATION_PATH)) { + rv = m->C_Login(session, CKU_USER, NULL, 0); + if (rv != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to log into security token '%s': %s", token_label, p11_kit_strerror(rv)); + + log_info("Successfully logged into security token '%s' via protected authentication path.", token_label); + if (ret_used_pin) + *ret_used_pin = NULL; + return 0; + } + + if (!FLAGS_SET(token_info->flags, CKF_LOGIN_REQUIRED)) { + log_info("No login into security token '%s' required.", token_label); + if (ret_used_pin) + *ret_used_pin = NULL; + return 0; + } + + token_uri_escaped = cescape(token_uri_string); + if (!token_uri_escaped) + return log_oom(); + + id = strjoin("pkcs11:", token_uri_escaped); + if (!id) + return log_oom(); + + for (unsigned tries = 0; tries < 3; tries++) { + _cleanup_strv_free_erase_ char **passwords = NULL; + char **i, *e; + + e = getenv("PIN"); + if (e) { + passwords = strv_new(e); + if (!passwords) + return log_oom(); + + string_erase(e); + if (unsetenv("PIN") < 0) + return log_error_errno(errno, "Failed to unset $PIN: %m"); + } else { + _cleanup_free_ char *text = NULL; + + if (FLAGS_SET(token_info->flags, CKF_USER_PIN_FINAL_TRY)) + r = asprintf(&text, + "Please enter correct PIN for security token '%s' in order to unlock %s (final try):", + token_label, friendly_name); + else if (FLAGS_SET(token_info->flags, CKF_USER_PIN_COUNT_LOW)) + r = asprintf(&text, + "PIN has been entered incorrectly previously, please enter correct PIN for security token '%s' in order to unlock %s:", + token_label, friendly_name); + else if (tries == 0) + r = asprintf(&text, + "Please enter PIN for security token '%s' in order to unlock %s:", + token_label, friendly_name); + else + r = asprintf(&text, + "Please enter PIN for security token '%s' in order to unlock %s (try #%u):", + token_label, friendly_name, tries+1); + if (r < 0) + return log_oom(); + + /* We never cache PINs, simply because it's fatal if we use wrong PINs, since usually there are only 3 tries */ + r = ask_password_auto(text, icon_name, id, keyname, until, 0, &passwords); + if (r < 0) + return log_error_errno(r, "Failed to query PIN for security token '%s': %m", token_label); + } + + STRV_FOREACH(i, passwords) { + rv = m->C_Login(session, CKU_USER, (CK_UTF8CHAR*) *i, strlen(*i)); + if (rv == CKR_OK) { + + if (ret_used_pin) { + char *c; + + c = strdup(*i); + if (!c) + return log_oom(); + + *ret_used_pin = c; + } + + log_info("Successfully logged into security token '%s'.", token_label); + return 0; + } + if (rv == CKR_PIN_LOCKED) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), + "PIN has been locked, please reset PIN of security token '%s'.", token_label); + if (!IN_SET(rv, CKR_PIN_INCORRECT, CKR_PIN_LEN_RANGE)) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to log into security token '%s': %s", token_label, p11_kit_strerror(rv)); + + /* Referesh the token info, so that we can prompt knowing the new flags if they changed. */ + rv = m->C_GetTokenInfo(slotid, &updated_token_info); + if (rv != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to acquire updated security token information for slot %lu: %s", + slotid, p11_kit_strerror(rv)); + + token_info = &updated_token_info; + log_notice("PIN for token '%s' is incorrect, please try again.", token_label); + } + } + + return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Too many attempts to log into token '%s'.", token_label); +} + +int pkcs11_token_find_x509_certificate( + CK_FUNCTION_LIST *m, + CK_SESSION_HANDLE session, + P11KitUri *search_uri, + CK_OBJECT_HANDLE *ret_object) { + + bool found_class = false, found_certificate_type = false; + _cleanup_free_ CK_ATTRIBUTE *attributes_buffer = NULL; + CK_ULONG n_attributes, a, n_objects; + CK_ATTRIBUTE *attributes = NULL; + CK_OBJECT_HANDLE objects[2]; + CK_RV rv, rv2; + + assert(m); + assert(search_uri); + assert(ret_object); + + attributes = p11_kit_uri_get_attributes(search_uri, &n_attributes); + for (a = 0; a < n_attributes; a++) { + + /* We use the URI's included match attributes, but make them more strict. This allows users + * to specify a token URL instead of an object URL and the right thing should happen if + * there's only one suitable key on the token. */ + + switch (attributes[a].type) { + + case CKA_CLASS: { + CK_OBJECT_CLASS c; + + if (attributes[a].ulValueLen != sizeof(c)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_CLASS attribute size."); + + memcpy(&c, attributes[a].pValue, sizeof(c)); + if (c != CKO_CERTIFICATE) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Selected PKCS#11 object is not an X.509 certificate, refusing."); + + found_class = true; + break; + } + + case CKA_CERTIFICATE_TYPE: { + CK_CERTIFICATE_TYPE t; + + if (attributes[a].ulValueLen != sizeof(t)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_CERTIFICATE_TYPE attribute size."); + + memcpy(&t, attributes[a].pValue, sizeof(t)); + if (t != CKC_X_509) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Selected PKCS#11 object is not an X.509 certificate, refusing."); + + found_certificate_type = true; + break; + }} + } + + if (!found_class || !found_certificate_type) { + /* Hmm, let's slightly extend the attribute list we search for */ + + attributes_buffer = new(CK_ATTRIBUTE, n_attributes + !found_class + !found_certificate_type); + if (!attributes_buffer) + return log_oom(); + + memcpy(attributes_buffer, attributes, sizeof(CK_ATTRIBUTE) * n_attributes); + + if (!found_class) { + static const CK_OBJECT_CLASS class = CKO_CERTIFICATE; + + attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) { + .type = CKA_CLASS, + .pValue = (CK_OBJECT_CLASS*) &class, + .ulValueLen = sizeof(class), + }; + } + + if (!found_certificate_type) { + static const CK_CERTIFICATE_TYPE type = CKC_X_509; + + attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) { + .type = CKA_CERTIFICATE_TYPE, + .pValue = (CK_CERTIFICATE_TYPE*) &type, + .ulValueLen = sizeof(type), + }; + } + + attributes = attributes_buffer; + } + + rv = m->C_FindObjectsInit(session, attributes, n_attributes); + if (rv != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to initialize object find call: %s", p11_kit_strerror(rv)); + + rv = m->C_FindObjects(session, objects, ELEMENTSOF(objects), &n_objects); + rv2 = m->C_FindObjectsFinal(session); + if (rv != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to find objects: %s", p11_kit_strerror(rv)); + if (rv2 != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to finalize object find call: %s", p11_kit_strerror(rv)); + if (n_objects == 0) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), + "Failed to find selected X509 certificate on token."); + if (n_objects > 1) + return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ), + "Configured URI matches multiple certificates, refusing."); + + *ret_object = objects[0]; + return 0; +} + +#if HAVE_OPENSSL +int pkcs11_token_read_x509_certificate( + CK_FUNCTION_LIST *m, + CK_SESSION_HANDLE session, + CK_OBJECT_HANDLE object, + X509 **ret_cert) { + + _cleanup_free_ void *buffer = NULL; + _cleanup_free_ char *t = NULL; + CK_ATTRIBUTE attribute = { + .type = CKA_VALUE + }; + CK_RV rv; + _cleanup_(X509_freep) X509 *x509 = NULL; + X509_NAME *name = NULL; + const unsigned char *p; + + rv = m->C_GetAttributeValue(session, object, &attribute, 1); + if (rv != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to read X.509 certificate size off token: %s", p11_kit_strerror(rv)); + + buffer = malloc(attribute.ulValueLen); + if (!buffer) + return log_oom(); + + attribute.pValue = buffer; + + rv = m->C_GetAttributeValue(session, object, &attribute, 1); + if (rv != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to read X.509 certificate data off token: %s", p11_kit_strerror(rv)); + + p = attribute.pValue; + x509 = d2i_X509(NULL, &p, attribute.ulValueLen); + if (!x509) + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed parse X.509 certificate."); + + name = X509_get_subject_name(x509); + if (!name) + return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed to acquire X.509 subject name."); + + t = X509_NAME_oneline(name, NULL, 0); + if (!t) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to format X.509 subject name as string."); + + log_debug("Using X.509 certificate issued for '%s'.", t); + + *ret_cert = TAKE_PTR(x509); + return 0; +} +#endif + +int pkcs11_token_find_private_key( + CK_FUNCTION_LIST *m, + CK_SESSION_HANDLE session, + P11KitUri *search_uri, + CK_OBJECT_HANDLE *ret_object) { + + bool found_decrypt = false, found_class = false, found_key_type = false; + _cleanup_free_ CK_ATTRIBUTE *attributes_buffer = NULL; + CK_ULONG n_attributes, a, n_objects; + CK_ATTRIBUTE *attributes = NULL; + CK_OBJECT_HANDLE objects[2]; + CK_RV rv, rv2; + + assert(m); + assert(search_uri); + assert(ret_object); + + attributes = p11_kit_uri_get_attributes(search_uri, &n_attributes); + for (a = 0; a < n_attributes; a++) { + + /* We use the URI's included match attributes, but make them more strict. This allows users + * to specify a token URL instead of an object URL and the right thing should happen if + * there's only one suitable key on the token. */ + + switch (attributes[a].type) { + + case CKA_CLASS: { + CK_OBJECT_CLASS c; + + if (attributes[a].ulValueLen != sizeof(c)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_CLASS attribute size."); + + memcpy(&c, attributes[a].pValue, sizeof(c)); + if (c != CKO_PRIVATE_KEY) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Selected PKCS#11 object is not a private key, refusing."); + + found_class = true; + break; + } + + case CKA_DECRYPT: { + CK_BBOOL b; + + if (attributes[a].ulValueLen != sizeof(b)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_DECRYPT attribute size."); + + memcpy(&b, attributes[a].pValue, sizeof(b)); + if (!b) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Selected PKCS#11 object is not suitable for decryption, refusing."); + + found_decrypt = true; + break; + } + + case CKA_KEY_TYPE: { + CK_KEY_TYPE t; + + if (attributes[a].ulValueLen != sizeof(t)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_KEY_TYPE attribute size."); + + memcpy(&t, attributes[a].pValue, sizeof(t)); + if (t != CKK_RSA) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Selected PKCS#11 object is not an RSA key, refusing."); + + found_key_type = true; + break; + }} + } + + if (!found_decrypt || !found_class || !found_key_type) { + /* Hmm, let's slightly extend the attribute list we search for */ + + attributes_buffer = new(CK_ATTRIBUTE, n_attributes + !found_decrypt + !found_class + !found_key_type); + if (!attributes_buffer) + return log_oom(); + + memcpy(attributes_buffer, attributes, sizeof(CK_ATTRIBUTE) * n_attributes); + + if (!found_decrypt) { + static const CK_BBOOL yes = true; + + attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) { + .type = CKA_DECRYPT, + .pValue = (CK_BBOOL*) &yes, + .ulValueLen = sizeof(yes), + }; + } + + if (!found_class) { + static const CK_OBJECT_CLASS class = CKO_PRIVATE_KEY; + + attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) { + .type = CKA_CLASS, + .pValue = (CK_OBJECT_CLASS*) &class, + .ulValueLen = sizeof(class), + }; + } + + if (!found_key_type) { + static const CK_KEY_TYPE type = CKK_RSA; + + attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) { + .type = CKA_KEY_TYPE, + .pValue = (CK_KEY_TYPE*) &type, + .ulValueLen = sizeof(type), + }; + } + + attributes = attributes_buffer; + } + + rv = m->C_FindObjectsInit(session, attributes, n_attributes); + if (rv != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to initialize object find call: %s", p11_kit_strerror(rv)); + + rv = m->C_FindObjects(session, objects, ELEMENTSOF(objects), &n_objects); + rv2 = m->C_FindObjectsFinal(session); + if (rv != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to find objects: %s", p11_kit_strerror(rv)); + if (rv2 != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to finalize object find call: %s", p11_kit_strerror(rv)); + if (n_objects == 0) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), + "Failed to find selected private key suitable for decryption on token."); + if (n_objects > 1) + return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ), + "Configured private key URI matches multiple keys, refusing."); + + *ret_object = objects[0]; + return 0; +} + +int pkcs11_token_decrypt_data( + CK_FUNCTION_LIST *m, + CK_SESSION_HANDLE session, + CK_OBJECT_HANDLE object, + const void *encrypted_data, + size_t encrypted_data_size, + void **ret_decrypted_data, + size_t *ret_decrypted_data_size) { + + static const CK_MECHANISM mechanism = { + .mechanism = CKM_RSA_PKCS + }; + _cleanup_(erase_and_freep) CK_BYTE *dbuffer = NULL; + CK_ULONG dbuffer_size = 0; + CK_RV rv; + + assert(m); + assert(encrypted_data); + assert(encrypted_data_size > 0); + assert(ret_decrypted_data); + assert(ret_decrypted_data_size); + + rv = m->C_DecryptInit(session, (CK_MECHANISM*) &mechanism, object); + if (rv != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to initialize decryption on security token: %s", p11_kit_strerror(rv)); + + dbuffer_size = encrypted_data_size; /* Start with something reasonable */ + dbuffer = malloc(dbuffer_size); + if (!dbuffer) + return log_oom(); + + rv = m->C_Decrypt(session, (CK_BYTE*) encrypted_data, encrypted_data_size, dbuffer, &dbuffer_size); + if (rv == CKR_BUFFER_TOO_SMALL) { + erase_and_free(dbuffer); + + dbuffer = malloc(dbuffer_size); + if (!dbuffer) + return log_oom(); + + rv = m->C_Decrypt(session, (CK_BYTE*) encrypted_data, encrypted_data_size, dbuffer, &dbuffer_size); + } + if (rv != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to decrypt key on security token: %s", p11_kit_strerror(rv)); + + log_info("Successfully decrypted key with security token."); + + *ret_decrypted_data = TAKE_PTR(dbuffer); + *ret_decrypted_data_size = dbuffer_size; + return 0; +} + +int pkcs11_token_acquire_rng( + CK_FUNCTION_LIST *m, + CK_SESSION_HANDLE session) { + + _cleanup_free_ void *buffer = NULL; + _cleanup_close_ int fd = -1; + size_t rps; + CK_RV rv; + int r; + + assert(m); + + /* While we are at it, let's read some RNG data from the PKCS#11 token and pass it to the kernel + * random pool. This should be cheap if we are talking to the device already. Note that we don't + * credit any entropy, since we don't know about the quality of the pkcs#11 token's RNG. Why bother + * at all? There are two sides to the argument whether to generate private keys on tokens or on the + * host. By crediting some data from the token RNG to the host's pool we at least can say that any + * key generated from it is at least as good as both sources individually. */ + + rps = random_pool_size(); + + buffer = malloc(rps); + if (!buffer) + return log_oom(); + + rv = m->C_GenerateRandom(session, buffer, rps); + if (rv != CKR_OK) + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "Failed to generate RNG data on security token: %s", p11_kit_strerror(rv)); + + fd = open("/dev/urandom", O_WRONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + return log_debug_errno(errno, "Failed to open /dev/urandom for writing: %m"); + + r = loop_write(fd, buffer, rps, false); + if (r < 0) + return log_debug_errno(r, "Failed to write PKCS#11 acquired random data to /dev/urandom: %m"); + + log_debug("Successfully written %zu bytes random data acquired via PKCS#11 to kernel random pool.", rps); + + return 0; +} + +static int token_process( + CK_FUNCTION_LIST *m, + CK_SLOT_ID slotid, + const CK_SLOT_INFO *slot_info, + const CK_TOKEN_INFO *token_info, + P11KitUri *search_uri, + pkcs11_find_token_callback_t callback, + void *userdata) { + + _cleanup_free_ char *token_label = NULL; + CK_SESSION_HANDLE session; + CK_RV rv; + int r; + + assert(m); + assert(slot_info); + assert(token_info); + + token_label = pkcs11_token_label(token_info); + if (!token_label) + return log_oom(); + + rv = m->C_OpenSession(slotid, CKF_SERIAL_SESSION, NULL, NULL, &session); + if (rv != CKR_OK) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Failed to create session for security token '%s': %s", token_label, p11_kit_strerror(rv)); + + if (callback) + r = callback(m, session, slotid, slot_info, token_info, search_uri, userdata); + else + r = 1; /* if not callback was specified, just say we found what we were looking for */ + + rv = m->C_CloseSession(session); + if (rv != CKR_OK) + log_warning("Failed to close session on PKCS#11 token, ignoring: %s", p11_kit_strerror(rv)); + + return r; +} + +static int slot_process( + CK_FUNCTION_LIST *m, + CK_SLOT_ID slotid, + P11KitUri *search_uri, + pkcs11_find_token_callback_t callback, + void *userdata) { + + _cleanup_(p11_kit_uri_freep) P11KitUri* slot_uri = NULL, *token_uri = NULL; + _cleanup_free_ char *token_uri_string = NULL; + CK_TOKEN_INFO token_info; + CK_SLOT_INFO slot_info; + int uri_result; + CK_RV rv; + + assert(m); + + /* We return -EAGAIN for all failures we can attribute to a specific slot in some way, so that the + * caller might try other slots before giving up. */ + + rv = m->C_GetSlotInfo(slotid, &slot_info); + if (rv != CKR_OK) { + log_warning("Failed to acquire slot info for slot %lu, ignoring slot: %s", slotid, p11_kit_strerror(rv)); + return -EAGAIN; + } + + slot_uri = uri_from_slot_info(&slot_info); + if (!slot_uri) + return log_oom(); + + if (DEBUG_LOGGING) { + _cleanup_free_ char *slot_uri_string = NULL; + + uri_result = p11_kit_uri_format(slot_uri, P11_KIT_URI_FOR_ANY, &slot_uri_string); + if (uri_result != P11_KIT_URI_OK) { + log_warning("Failed to format slot URI, ignoring slot: %s", p11_kit_uri_message(uri_result)); + return -EAGAIN; + } + + log_debug("Found slot with URI %s", slot_uri_string); + } + + rv = m->C_GetTokenInfo(slotid, &token_info); + if (rv == CKR_TOKEN_NOT_PRESENT) { + return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN), + "Token not present in slot, ignoring."); + } else if (rv != CKR_OK) { + log_warning("Failed to acquire token info for slot %lu, ignoring slot: %s", slotid, p11_kit_strerror(rv)); + return -EAGAIN; + } + + token_uri = uri_from_token_info(&token_info); + if (!token_uri) + return log_oom(); + + uri_result = p11_kit_uri_format(token_uri, P11_KIT_URI_FOR_ANY, &token_uri_string); + if (uri_result != P11_KIT_URI_OK) { + log_warning("Failed to format slot URI: %s", p11_kit_uri_message(uri_result)); + return -EAGAIN; + } + + if (search_uri && !p11_kit_uri_match_token_info(search_uri, &token_info)) + return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN), + "Found non-matching token with URI %s.", + token_uri_string); + + log_debug("Found matching token with URI %s.", token_uri_string); + + return token_process( + m, + slotid, + &slot_info, + &token_info, + search_uri, + callback, + userdata); +} + +static int module_process( + CK_FUNCTION_LIST *m, + P11KitUri *search_uri, + pkcs11_find_token_callback_t callback, + void *userdata) { + + _cleanup_free_ char *name = NULL, *module_uri_string = NULL; + _cleanup_(p11_kit_uri_freep) P11KitUri* module_uri = NULL; + _cleanup_free_ CK_SLOT_ID *slotids = NULL; + CK_ULONG n_slotids = 0; + int uri_result; + CK_INFO info; + size_t k; + CK_RV rv; + int r; + + assert(m); + + /* We ignore most errors from modules here, in order to skip over faulty modules: one faulty module + * should not have the effect that we don't try the others anymore. We indicate such per-module + * failures with -EAGAIN, which let's the caller try the next module. */ + + name = p11_kit_module_get_name(m); + if (!name) + return log_oom(); + + log_debug("Trying PKCS#11 module %s.", name); + + rv = m->C_GetInfo(&info); + if (rv != CKR_OK) { + log_warning("Failed to get info on PKCS#11 module, ignoring module: %s", p11_kit_strerror(rv)); + return -EAGAIN; + } + + module_uri = uri_from_module_info(&info); + if (!module_uri) + return log_oom(); + + uri_result = p11_kit_uri_format(module_uri, P11_KIT_URI_FOR_ANY, &module_uri_string); + if (uri_result != P11_KIT_URI_OK) { + log_warning("Failed to format module URI, ignoring module: %s", p11_kit_uri_message(uri_result)); + return -EAGAIN; + } + + log_debug("Found module with URI %s", module_uri_string); + + rv = pkcs11_get_slot_list_malloc(m, &slotids, &n_slotids); + if (rv != CKR_OK) { + log_warning("Failed to get slot list, ignoring module: %s", p11_kit_strerror(rv)); + return -EAGAIN; + } + if (n_slotids == 0) + return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN), + "This module has no slots? Ignoring module."); + + for (k = 0; k < n_slotids; k++) { + r = slot_process( + m, + slotids[k], + search_uri, + callback, + userdata); + if (r != -EAGAIN) + return r; + } + + return -EAGAIN; +} + +int pkcs11_find_token( + const char *pkcs11_uri, + pkcs11_find_token_callback_t callback, + void *userdata) { + + _cleanup_(p11_kit_modules_finalize_and_releasep) CK_FUNCTION_LIST **modules = NULL; + _cleanup_(p11_kit_uri_freep) P11KitUri *search_uri = NULL; + int r; + + /* Execute the specified callback for each matching token found. If nothing is found returns + * -EAGAIN. Logs about all errors, except for EAGAIN, which the caller has to log about. */ + + if (pkcs11_uri) { + r = uri_from_string(pkcs11_uri, &search_uri); + if (r < 0) + return log_error_errno(r, "Failed to parse PKCS#11 URI '%s': %m", pkcs11_uri); + } + + modules = p11_kit_modules_load_and_initialize(0); + if (!modules) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to initialize pkcs11 modules"); + + for (CK_FUNCTION_LIST **i = modules; *i; i++) { + r = module_process( + *i, + search_uri, + callback, + userdata); + if (r != -EAGAIN) + return r; + } + + return -EAGAIN; +} + +#endif diff --git a/src/shared/pkcs11-util.h b/src/shared/pkcs11-util.h new file mode 100644 index 0000000..f14607d --- /dev/null +++ b/src/shared/pkcs11-util.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#if HAVE_P11KIT +# include <p11-kit/p11-kit.h> +# include <p11-kit/uri.h> +#endif + +#include "macro.h" +#include "openssl-util.h" +#include "time-util.h" + +bool pkcs11_uri_valid(const char *uri); + +#if HAVE_P11KIT +int uri_from_string(const char *p, P11KitUri **ret); + +P11KitUri *uri_from_module_info(const CK_INFO *info); +P11KitUri *uri_from_slot_info(const CK_SLOT_INFO *slot_info); +P11KitUri *uri_from_token_info(const CK_TOKEN_INFO *token_info); + +DEFINE_TRIVIAL_CLEANUP_FUNC(P11KitUri*, p11_kit_uri_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(CK_FUNCTION_LIST**, p11_kit_modules_finalize_and_release); + +CK_RV pkcs11_get_slot_list_malloc(CK_FUNCTION_LIST *m, CK_SLOT_ID **ret_slotids, CK_ULONG *ret_n_slotids); + +char *pkcs11_token_label(const CK_TOKEN_INFO *token_info); +char *pkcs11_token_manufacturer_id(const CK_TOKEN_INFO *token_info); +char *pkcs11_token_model(const CK_TOKEN_INFO *token_info); + +int pkcs11_token_login(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_SLOT_ID slotid, const CK_TOKEN_INFO *token_info, const char *friendly_name, const char *icon_name, const char *keyname, usec_t until, char **ret_used_pin); + +int pkcs11_token_find_x509_certificate(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, P11KitUri *search_uri, CK_OBJECT_HANDLE *ret_object); +#if HAVE_OPENSSL +int pkcs11_token_read_x509_certificate(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_OBJECT_HANDLE object, X509 **ret_cert); +#endif + +int pkcs11_token_find_private_key(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, P11KitUri *search_uri, CK_OBJECT_HANDLE *ret_object); +int pkcs11_token_decrypt_data(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_OBJECT_HANDLE object, const void *encrypted_data, size_t encrypted_data_size, void **ret_decrypted_data, size_t *ret_decrypted_data_size); + +int pkcs11_token_acquire_rng(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session); + +typedef int (*pkcs11_find_token_callback_t)(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_SLOT_ID slotid, const CK_SLOT_INFO *slot_info, const CK_TOKEN_INFO *token_info, P11KitUri *uri, void *userdata); +int pkcs11_find_token(const char *pkcs11_uri, pkcs11_find_token_callback_t callback, void *userdata); +#endif diff --git a/src/shared/pretty-print.c b/src/shared/pretty-print.c new file mode 100644 index 0000000..ca5b25a --- /dev/null +++ b/src/shared/pretty-print.c @@ -0,0 +1,325 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/utsname.h> +#include <errno.h> +#include <stdio.h> + +#include "alloc-util.h" +#include "conf-files.h" +#include "def.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "pager.h" +#include "path-util.h" +#include "pretty-print.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "util.h" + +bool urlify_enabled(void) { + static int cached_urlify_enabled = -1; + + /* Unfortunately 'less' doesn't support links like this yet 😭, hence let's disable this as long as there's a + * pager in effect. Let's drop this check as soon as less got fixed a and enough time passed so that it's safe + * to assume that a link-enabled 'less' version has hit most installations. */ + + if (cached_urlify_enabled < 0) { + int val; + + val = getenv_bool("SYSTEMD_URLIFY"); + if (val >= 0) + cached_urlify_enabled = val; + else + cached_urlify_enabled = colors_enabled() && !pager_have(); + } + + return cached_urlify_enabled; +} + +int terminal_urlify(const char *url, const char *text, char **ret) { + char *n; + + assert(url); + + /* Takes an URL and a pretty string and formats it as clickable link for the terminal. See + * https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda for details. */ + + if (isempty(text)) + text = url; + + if (urlify_enabled()) + n = strjoin("\x1B]8;;", url, "\a", text, "\x1B]8;;\a"); + else + n = strdup(text); + if (!n) + return -ENOMEM; + + *ret = n; + return 0; +} + +int file_url_from_path(const char *path, char **ret) { + _cleanup_free_ char *absolute = NULL; + struct utsname u; + char *url = NULL; + int r; + + if (uname(&u) < 0) + return -errno; + + if (!path_is_absolute(path)) { + r = path_make_absolute_cwd(path, &absolute); + if (r < 0) + return r; + + path = absolute; + } + + /* As suggested by https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda, let's include the local + * hostname here. Note that we don't use gethostname_malloc() or gethostname_strict() since we are interested + * in the raw string the kernel has set, whatever it may be, under the assumption that terminals are not overly + * careful with validating the strings either. */ + + url = strjoin("file://", u.nodename, path); + if (!url) + return -ENOMEM; + + *ret = url; + return 0; +} + +int terminal_urlify_path(const char *path, const char *text, char **ret) { + _cleanup_free_ char *url = NULL; + int r; + + assert(path); + + /* Much like terminal_urlify() above, but takes a file system path as input + * and turns it into a proper file:// URL first. */ + + if (isempty(path)) + return -EINVAL; + + if (isempty(text)) + text = path; + + if (!urlify_enabled()) { + char *n; + + n = strdup(text); + if (!n) + return -ENOMEM; + + *ret = n; + return 0; + } + + r = file_url_from_path(path, &url); + if (r < 0) + return r; + + return terminal_urlify(url, text, ret); +} + +int terminal_urlify_man(const char *page, const char *section, char **ret) { + const char *url, *text; + + url = strjoina("man:", page, "(", section, ")"); + text = strjoina(page, "(", section, ") man page"); + + return terminal_urlify(url, text, ret); +} + +static int cat_file(const char *filename, bool newline) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *urlified = NULL; + int r; + + f = fopen(filename, "re"); + if (!f) + return -errno; + + r = terminal_urlify_path(filename, NULL, &urlified); + if (r < 0) + return r; + + printf("%s%s# %s%s\n", + newline ? "\n" : "", + ansi_highlight_blue(), + urlified, + ansi_normal()); + fflush(stdout); + + for (;;) { + _cleanup_free_ char *line = NULL; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_error_errno(r, "Failed to read \"%s\": %m", filename); + if (r == 0) + break; + + puts(line); + } + + return 0; +} + +int cat_files(const char *file, char **dropins, CatFlags flags) { + char **path; + int r; + + if (file) { + r = cat_file(file, false); + if (r == -ENOENT && (flags & CAT_FLAGS_MAIN_FILE_OPTIONAL)) + printf("%s# Configuration file %s not found%s\n", + ansi_highlight_magenta(), + file, + ansi_normal()); + else if (r < 0) + return log_warning_errno(r, "Failed to cat %s: %m", file); + } + + STRV_FOREACH(path, dropins) { + r = cat_file(*path, file || path != dropins); + if (r < 0) + return log_warning_errno(r, "Failed to cat %s: %m", *path); + } + + return 0; +} + +void print_separator(void) { + + /* Outputs a separator line that resolves to whitespace when copied from the terminal. We do that by outputting + * one line filled with spaces with ANSI underline set, followed by a second (empty) line. */ + + if (underline_enabled()) { + size_t i, c; + + c = columns(); + + flockfile(stdout); + fputs_unlocked(ANSI_UNDERLINE, stdout); + + for (i = 0; i < c; i++) + fputc_unlocked(' ', stdout); + + fputs_unlocked(ANSI_NORMAL "\n\n", stdout); + funlockfile(stdout); + } else + fputs("\n\n", stdout); +} + +static int guess_type(const char **name, char ***prefixes, bool *is_collection, const char **extension) { + /* Try to figure out if name is like tmpfiles.d/ or systemd/system-presets/, + * i.e. a collection of directories without a main config file. */ + + _cleanup_free_ char *n = NULL; + bool usr = false, run = false, coll = false; + const char *ext = ".conf"; + /* This is static so that the array doesn't get deallocated when we exit the function */ + static const char* const std_prefixes[] = { CONF_PATHS(""), NULL }; + static const char* const usr_prefixes[] = { CONF_PATHS_USR(""), NULL }; + static const char* const run_prefixes[] = { "/run/", NULL }; + + if (path_equal(*name, "environment.d")) + /* Special case: we need to include /etc/environment in the search path, even + * though the whole concept is called environment.d. */ + *name = "environment"; + + n = strdup(*name); + if (!n) + return log_oom(); + + /* All systemd-style config files should support the /usr-/etc-/run split and + * dropins. Let's add a blanket rule that allows us to support them without keeping + * an explicit list. */ + if (path_startswith(n, "systemd") && endswith(n, ".conf")) + usr = true; + + delete_trailing_chars(n, "/"); + + if (endswith(n, ".d")) + coll = true; + + if (path_equal(n, "environment")) + usr = true; + + if (path_equal(n, "udev/hwdb.d")) + ext = ".hwdb"; + + if (path_equal(n, "udev/rules.d")) + ext = ".rules"; + + if (path_equal(n, "kernel/install.d")) + ext = ".install"; + + if (path_equal(n, "systemd/ntp-units.d")) { + coll = true; + ext = ".list"; + } + + if (path_equal(n, "systemd/relabel-extra.d")) { + coll = run = true; + ext = ".relabel"; + } + + if (PATH_IN_SET(n, "systemd/system-preset", "systemd/user-preset")) { + coll = true; + ext = ".preset"; + } + + if (path_equal(n, "systemd/user-preset")) + usr = true; + + *prefixes = (char**) (usr ? usr_prefixes : run ? run_prefixes : std_prefixes); + *is_collection = coll; + *extension = ext; + return 0; +} + +int conf_files_cat(const char *root, const char *name) { + _cleanup_strv_free_ char **dirs = NULL, **files = NULL; + _cleanup_free_ char *path = NULL; + char **prefixes, **prefix; + bool is_collection; + const char *extension; + char **t; + int r; + + r = guess_type(&name, &prefixes, &is_collection, &extension); + if (r < 0) + return r; + + STRV_FOREACH(prefix, prefixes) { + assert(endswith(*prefix, "/")); + r = strv_extendf(&dirs, "%s%s%s", *prefix, name, + is_collection ? "" : ".d"); + if (r < 0) + return log_error_errno(r, "Failed to build directory list: %m"); + } + + r = conf_files_list_strv(&files, extension, root, 0, (const char* const*) dirs); + if (r < 0) + return log_error_errno(r, "Failed to query file list: %m"); + + if (!is_collection) { + path = path_join(root, "/etc", name); + if (!path) + return log_oom(); + } + + if (DEBUG_LOGGING) { + log_debug("Looking for configuration in:"); + if (path) + log_debug(" %s", path); + STRV_FOREACH(t, dirs) + log_debug(" %s/*%s", *t, extension); + } + + /* show */ + return cat_files(path, files, CAT_FLAGS_MAIN_FILE_OPTIONAL); +} diff --git a/src/shared/pretty-print.h b/src/shared/pretty-print.h new file mode 100644 index 0000000..4619f4e --- /dev/null +++ b/src/shared/pretty-print.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +void print_separator(void); + +int file_url_from_path(const char *path, char **ret); + +bool urlify_enabled(void); + +int terminal_urlify(const char *url, const char *text, char **ret); +int terminal_urlify_path(const char *path, const char *text, char **ret); +int terminal_urlify_man(const char *page, const char *section, char **ret); + +typedef enum CatFlags { + CAT_FLAGS_MAIN_FILE_OPTIONAL = 1 << 0, +} CatFlags; + +int cat_files(const char *file, char **dropins, CatFlags flags); +int conf_files_cat(const char *root, const char *name); diff --git a/src/shared/psi-util.c b/src/shared/psi-util.c new file mode 100644 index 0000000..7a184d5 --- /dev/null +++ b/src/shared/psi-util.c @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdio.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "parse-util.h" +#include "psi-util.h" +#include "string-util.h" +#include "stat-util.h" +#include "strv.h" + +int read_resource_pressure(const char *path, PressureType type, ResourcePressure *ret) { + _cleanup_free_ char *line = NULL; + _cleanup_fclose_ FILE *f = NULL; + unsigned field_filled = 0; + ResourcePressure rp = {}; + const char *t, *cline; + char *word; + int r; + + assert(path); + assert(IN_SET(type, PRESSURE_TYPE_SOME, PRESSURE_TYPE_FULL)); + assert(ret); + + if (type == PRESSURE_TYPE_SOME) + t = "some"; + else if (type == PRESSURE_TYPE_FULL) + t = "full"; + else + return -EINVAL; + + r = fopen_unlocked(path, "re", &f); + if (r < 0) + return r; + + for (;;) { + _cleanup_free_ char *l = NULL; + char *w; + + r = read_line(f, LONG_LINE_MAX, &l); + if (r < 0) + return r; + if (r == 0) + break; + + w = first_word(l, t); + if (w) { + line = TAKE_PTR(l); + cline = w; + break; + } + } + + if (!line) + return -ENODATA; + + /* extracts either avgX=Y.Z or total=X */ + while ((r = extract_first_word(&cline, &word, NULL, 0)) > 0) { + _cleanup_free_ char *w = word; + const char *v; + + if ((v = startswith(w, "avg10="))) { + if (field_filled & (1U << 0)) + return -EINVAL; + + field_filled |= 1U << 0; + r = parse_loadavg_fixed_point(v, &rp.avg10); + } else if ((v = startswith(w, "avg60="))) { + if (field_filled & (1U << 1)) + return -EINVAL; + + field_filled |= 1U << 1; + r = parse_loadavg_fixed_point(v, &rp.avg60); + } else if ((v = startswith(w, "avg300="))) { + if (field_filled & (1U << 2)) + return -EINVAL; + + field_filled |= 1U << 2; + r = parse_loadavg_fixed_point(v, &rp.avg300); + } else if ((v = startswith(w, "total="))) { + if (field_filled & (1U << 3)) + return -EINVAL; + + field_filled |= 1U << 3; + r = safe_atou64(v, &rp.total); + } else + continue; + + if (r < 0) + return r; + } + + if (r < 0) + return r; + + if (field_filled != 15U) + return -EINVAL; + + *ret = rp; + return 0; +} + +int is_pressure_supported(void) { + const char *p; + + FOREACH_STRING(p, "/proc/pressure/cpu", "/proc/pressure/io", "/proc/pressure/memory") + if (access(p, F_OK) < 0) { + if (errno == ENOENT) + return 0; + return -errno; + } + + return 1; +} diff --git a/src/shared/psi-util.h b/src/shared/psi-util.h new file mode 100644 index 0000000..415fbbd --- /dev/null +++ b/src/shared/psi-util.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "parse-util.h" +#include "time-util.h" + +typedef enum PressureType { + PRESSURE_TYPE_SOME, + PRESSURE_TYPE_FULL, +} PressureType; + +/* Averages are stored in fixed-point with 11 bit fractions */ +typedef struct ResourcePressure { + loadavg_t avg10; + loadavg_t avg60; + loadavg_t avg300; + usec_t total; +} ResourcePressure; + +/** Upstream 4.20+ format + * + * some avg10=0.22 avg60=0.17 avg300=1.11 total=58761459 + * full avg10=0.23 avg60=0.16 avg300=1.08 total=58464525 + */ +int read_resource_pressure(const char *path, PressureType type, ResourcePressure *ret); + +/* Was the kernel compiled with CONFIG_PSI=y? 1 if yes, 0 if not, negative on error. */ +int is_pressure_supported(void); diff --git a/src/shared/ptyfwd.c b/src/shared/ptyfwd.c new file mode 100644 index 0000000..754b4f5 --- /dev/null +++ b/src/shared/ptyfwd.c @@ -0,0 +1,681 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <signal.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <termios.h> +#include <unistd.h> + +#include "sd-event.h" + +#include "alloc-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "log.h" +#include "macro.h" +#include "ptyfwd.h" +#include "terminal-util.h" +#include "time-util.h" + +struct PTYForward { + sd_event *event; + + int input_fd; + int output_fd; + int master; + + PTYForwardFlags flags; + + sd_event_source *stdin_event_source; + sd_event_source *stdout_event_source; + sd_event_source *master_event_source; + + sd_event_source *sigwinch_event_source; + + struct termios saved_stdin_attr; + struct termios saved_stdout_attr; + + bool close_input_fd:1; + bool close_output_fd:1; + + bool saved_stdin:1; + bool saved_stdout:1; + + bool stdin_readable:1; + bool stdin_hangup:1; + bool stdout_writable:1; + bool stdout_hangup:1; + bool master_readable:1; + bool master_writable:1; + bool master_hangup:1; + + bool read_from_master:1; + + bool done:1; + bool drain:1; + + bool last_char_set:1; + char last_char; + + char in_buffer[LINE_MAX], out_buffer[LINE_MAX]; + size_t in_buffer_full, out_buffer_full; + + usec_t escape_timestamp; + unsigned escape_counter; + + PTYForwardHandler handler; + void *userdata; +}; + +#define ESCAPE_USEC (1*USEC_PER_SEC) + +static void pty_forward_disconnect(PTYForward *f) { + + if (!f) + return; + + f->stdin_event_source = sd_event_source_unref(f->stdin_event_source); + f->stdout_event_source = sd_event_source_unref(f->stdout_event_source); + + f->master_event_source = sd_event_source_unref(f->master_event_source); + f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source); + f->event = sd_event_unref(f->event); + + if (f->output_fd >= 0) { + if (f->saved_stdout) + (void) tcsetattr(f->output_fd, TCSANOW, &f->saved_stdout_attr); + + /* STDIN/STDOUT should not be non-blocking normally, so let's reset it */ + (void) fd_nonblock(f->output_fd, false); + if (f->close_output_fd) + f->output_fd = safe_close(f->output_fd); + } + + if (f->input_fd >= 0) { + if (f->saved_stdin) + (void) tcsetattr(f->input_fd, TCSANOW, &f->saved_stdin_attr); + + (void) fd_nonblock(f->input_fd, false); + if (f->close_input_fd) + f->input_fd = safe_close(f->input_fd); + } + + f->saved_stdout = f->saved_stdin = false; +} + +static int pty_forward_done(PTYForward *f, int rcode) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + assert(f); + + if (f->done) + return 0; + + e = sd_event_ref(f->event); + + f->done = true; + pty_forward_disconnect(f); + + if (f->handler) + return f->handler(f, rcode, f->userdata); + else + return sd_event_exit(e, rcode < 0 ? EXIT_FAILURE : rcode); +} + +static bool look_for_escape(PTYForward *f, const char *buffer, size_t n) { + const char *p; + + assert(f); + assert(buffer); + assert(n > 0); + + for (p = buffer; p < buffer + n; p++) { + + /* Check for ^] */ + if (*p == 0x1D) { + usec_t nw = now(CLOCK_MONOTONIC); + + if (f->escape_counter == 0 || nw > f->escape_timestamp + ESCAPE_USEC) { + f->escape_timestamp = nw; + f->escape_counter = 1; + } else { + (f->escape_counter)++; + + if (f->escape_counter >= 3) + return true; + } + } else { + f->escape_timestamp = 0; + f->escape_counter = 0; + } + } + + return false; +} + +static bool ignore_vhangup(PTYForward *f) { + assert(f); + + if (f->flags & PTY_FORWARD_IGNORE_VHANGUP) + return true; + + if ((f->flags & PTY_FORWARD_IGNORE_INITIAL_VHANGUP) && !f->read_from_master) + return true; + + return false; +} + +static bool drained(PTYForward *f) { + int q = 0; + + assert(f); + + if (f->out_buffer_full > 0) + return false; + + if (f->master_readable) + return false; + + if (ioctl(f->master, TIOCINQ, &q) < 0) + log_debug_errno(errno, "TIOCINQ failed on master: %m"); + else if (q > 0) + return false; + + if (ioctl(f->master, TIOCOUTQ, &q) < 0) + log_debug_errno(errno, "TIOCOUTQ failed on master: %m"); + else if (q > 0) + return false; + + return true; +} + +static int shovel(PTYForward *f) { + ssize_t k; + + assert(f); + + while ((f->stdin_readable && f->in_buffer_full <= 0) || + (f->master_writable && f->in_buffer_full > 0) || + (f->master_readable && f->out_buffer_full <= 0) || + (f->stdout_writable && f->out_buffer_full > 0)) { + + if (f->stdin_readable && f->in_buffer_full < LINE_MAX) { + + k = read(f->input_fd, f->in_buffer + f->in_buffer_full, LINE_MAX - f->in_buffer_full); + if (k < 0) { + + if (errno == EAGAIN) + f->stdin_readable = false; + else if (errno == EIO || ERRNO_IS_DISCONNECT(errno)) { + f->stdin_readable = false; + f->stdin_hangup = true; + + f->stdin_event_source = sd_event_source_unref(f->stdin_event_source); + } else { + log_error_errno(errno, "read(): %m"); + return pty_forward_done(f, -errno); + } + } else if (k == 0) { + /* EOF on stdin */ + f->stdin_readable = false; + f->stdin_hangup = true; + + f->stdin_event_source = sd_event_source_unref(f->stdin_event_source); + } else { + /* Check if ^] has been pressed three times within one second. If we get this we quite + * immediately. */ + if (look_for_escape(f, f->in_buffer + f->in_buffer_full, k)) + return pty_forward_done(f, -ECANCELED); + + f->in_buffer_full += (size_t) k; + } + } + + if (f->master_writable && f->in_buffer_full > 0) { + + k = write(f->master, f->in_buffer, f->in_buffer_full); + if (k < 0) { + + if (IN_SET(errno, EAGAIN, EIO)) + f->master_writable = false; + else if (IN_SET(errno, EPIPE, ECONNRESET)) { + f->master_writable = f->master_readable = false; + f->master_hangup = true; + + f->master_event_source = sd_event_source_unref(f->master_event_source); + } else { + log_error_errno(errno, "write(): %m"); + return pty_forward_done(f, -errno); + } + } else { + assert(f->in_buffer_full >= (size_t) k); + memmove(f->in_buffer, f->in_buffer + k, f->in_buffer_full - k); + f->in_buffer_full -= k; + } + } + + if (f->master_readable && f->out_buffer_full < LINE_MAX) { + + k = read(f->master, f->out_buffer + f->out_buffer_full, LINE_MAX - f->out_buffer_full); + if (k < 0) { + + /* Note that EIO on the master device + * might be caused by vhangup() or + * temporary closing of everything on + * the other side, we treat it like + * EAGAIN here and try again, unless + * ignore_vhangup is off. */ + + if (errno == EAGAIN || (errno == EIO && ignore_vhangup(f))) + f->master_readable = false; + else if (IN_SET(errno, EPIPE, ECONNRESET, EIO)) { + f->master_readable = f->master_writable = false; + f->master_hangup = true; + + f->master_event_source = sd_event_source_unref(f->master_event_source); + } else { + log_error_errno(errno, "read(): %m"); + return pty_forward_done(f, -errno); + } + } else { + f->read_from_master = true; + f->out_buffer_full += (size_t) k; + } + } + + if (f->stdout_writable && f->out_buffer_full > 0) { + + k = write(f->output_fd, f->out_buffer, f->out_buffer_full); + if (k < 0) { + + if (errno == EAGAIN) + f->stdout_writable = false; + else if (errno == EIO || ERRNO_IS_DISCONNECT(errno)) { + f->stdout_writable = false; + f->stdout_hangup = true; + f->stdout_event_source = sd_event_source_unref(f->stdout_event_source); + } else { + log_error_errno(errno, "write(): %m"); + return pty_forward_done(f, -errno); + } + + } else { + + if (k > 0) { + f->last_char = f->out_buffer[k-1]; + f->last_char_set = true; + } + + assert(f->out_buffer_full >= (size_t) k); + memmove(f->out_buffer, f->out_buffer + k, f->out_buffer_full - k); + f->out_buffer_full -= k; + } + } + } + + if (f->stdin_hangup || f->stdout_hangup || f->master_hangup) { + /* Exit the loop if any side hung up and if there's + * nothing more to write or nothing we could write. */ + + if ((f->out_buffer_full <= 0 || f->stdout_hangup) && + (f->in_buffer_full <= 0 || f->master_hangup)) + return pty_forward_done(f, 0); + } + + /* If we were asked to drain, and there's nothing more to handle from the master, then call the callback + * too. */ + if (f->drain && drained(f)) + return pty_forward_done(f, 0); + + return 0; +} + +static int on_master_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) { + PTYForward *f = userdata; + + assert(f); + assert(e); + assert(e == f->master_event_source); + assert(fd >= 0); + assert(fd == f->master); + + if (revents & (EPOLLIN|EPOLLHUP)) + f->master_readable = true; + + if (revents & (EPOLLOUT|EPOLLHUP)) + f->master_writable = true; + + return shovel(f); +} + +static int on_stdin_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) { + PTYForward *f = userdata; + + assert(f); + assert(e); + assert(e == f->stdin_event_source); + assert(fd >= 0); + assert(fd == f->input_fd); + + if (revents & (EPOLLIN|EPOLLHUP)) + f->stdin_readable = true; + + return shovel(f); +} + +static int on_stdout_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) { + PTYForward *f = userdata; + + assert(f); + assert(e); + assert(e == f->stdout_event_source); + assert(fd >= 0); + assert(fd == f->output_fd); + + if (revents & (EPOLLOUT|EPOLLHUP)) + f->stdout_writable = true; + + return shovel(f); +} + +static int on_sigwinch_event(sd_event_source *e, const struct signalfd_siginfo *si, void *userdata) { + PTYForward *f = userdata; + struct winsize ws; + + assert(f); + assert(e); + assert(e == f->sigwinch_event_source); + + /* The window size changed, let's forward that. */ + if (ioctl(f->output_fd, TIOCGWINSZ, &ws) >= 0) + (void) ioctl(f->master, TIOCSWINSZ, &ws); + + return 0; +} + +int pty_forward_new( + sd_event *event, + int master, + PTYForwardFlags flags, + PTYForward **ret) { + + _cleanup_(pty_forward_freep) PTYForward *f = NULL; + struct winsize ws; + int r; + + f = new(PTYForward, 1); + if (!f) + return -ENOMEM; + + *f = (struct PTYForward) { + .flags = flags, + .master = -1, + .input_fd = -1, + .output_fd = -1, + }; + + if (event) + f->event = sd_event_ref(event); + else { + r = sd_event_default(&f->event); + if (r < 0) + return r; + } + + if (FLAGS_SET(flags, PTY_FORWARD_READ_ONLY)) + f->output_fd = STDOUT_FILENO; + else { + /* If we shall be invoked in interactive mode, let's switch on non-blocking mode, so that we + * never end up staving one direction while we block on the other. However, let's be careful + * here and not turn on O_NONBLOCK for stdin/stdout directly, but of re-opened copies of + * them. This has two advantages: when we are killed abruptly the stdin/stdout fds won't be + * left in O_NONBLOCK state for the next process using them. In addition, if some process + * running in the background wants to continue writing to our stdout it can do so without + * being confused by O_NONBLOCK. */ + + f->input_fd = fd_reopen(STDIN_FILENO, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); + if (f->input_fd < 0) { + /* Handle failures gracefully, after all certain fd types cannot be reopened + * (sockets, …) */ + log_debug_errno(f->input_fd, "Failed to reopen stdin, using original fd: %m"); + + r = fd_nonblock(STDIN_FILENO, true); + if (r < 0) + return r; + + f->input_fd = STDIN_FILENO; + } else + f->close_input_fd = true; + + f->output_fd = fd_reopen(STDOUT_FILENO, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); + if (f->output_fd < 0) { + log_debug_errno(f->output_fd, "Failed to reopen stdout, using original fd: %m"); + + r = fd_nonblock(STDOUT_FILENO, true); + if (r < 0) + return r; + + f->output_fd = STDOUT_FILENO; + } else + f->close_output_fd = true; + } + + r = fd_nonblock(master, true); + if (r < 0) + return r; + + f->master = master; + + if (ioctl(f->output_fd, TIOCGWINSZ, &ws) < 0) + /* If we can't get the resolution from the output fd, then use our internal, regular width/height, + * i.e. something derived from $COLUMNS and $LINES if set. */ + ws = (struct winsize) { + .ws_row = lines(), + .ws_col = columns(), + }; + + (void) ioctl(master, TIOCSWINSZ, &ws); + + if (!(flags & PTY_FORWARD_READ_ONLY)) { + assert(f->input_fd >= 0); + + if (tcgetattr(f->input_fd, &f->saved_stdin_attr) >= 0) { + struct termios raw_stdin_attr; + + f->saved_stdin = true; + + raw_stdin_attr = f->saved_stdin_attr; + cfmakeraw(&raw_stdin_attr); + raw_stdin_attr.c_oflag = f->saved_stdin_attr.c_oflag; + tcsetattr(f->input_fd, TCSANOW, &raw_stdin_attr); + } + + if (tcgetattr(f->output_fd, &f->saved_stdout_attr) >= 0) { + struct termios raw_stdout_attr; + + f->saved_stdout = true; + + raw_stdout_attr = f->saved_stdout_attr; + cfmakeraw(&raw_stdout_attr); + raw_stdout_attr.c_iflag = f->saved_stdout_attr.c_iflag; + raw_stdout_attr.c_lflag = f->saved_stdout_attr.c_lflag; + tcsetattr(f->output_fd, TCSANOW, &raw_stdout_attr); + } + + r = sd_event_add_io(f->event, &f->stdin_event_source, f->input_fd, EPOLLIN|EPOLLET, on_stdin_event, f); + if (r < 0 && r != -EPERM) + return r; + + if (r >= 0) + (void) sd_event_source_set_description(f->stdin_event_source, "ptyfwd-stdin"); + } + + r = sd_event_add_io(f->event, &f->stdout_event_source, f->output_fd, EPOLLOUT|EPOLLET, on_stdout_event, f); + if (r == -EPERM) + /* stdout without epoll support. Likely redirected to regular file. */ + f->stdout_writable = true; + else if (r < 0) + return r; + else + (void) sd_event_source_set_description(f->stdout_event_source, "ptyfwd-stdout"); + + r = sd_event_add_io(f->event, &f->master_event_source, master, EPOLLIN|EPOLLOUT|EPOLLET, on_master_event, f); + if (r < 0) + return r; + + (void) sd_event_source_set_description(f->master_event_source, "ptyfwd-master"); + + r = sd_event_add_signal(f->event, &f->sigwinch_event_source, SIGWINCH, on_sigwinch_event, f); + if (r < 0) + return r; + + (void) sd_event_source_set_description(f->sigwinch_event_source, "ptyfwd-sigwinch"); + + *ret = TAKE_PTR(f); + + return 0; +} + +PTYForward *pty_forward_free(PTYForward *f) { + pty_forward_disconnect(f); + return mfree(f); +} + +int pty_forward_get_last_char(PTYForward *f, char *ch) { + assert(f); + assert(ch); + + if (!f->last_char_set) + return -ENXIO; + + *ch = f->last_char; + return 0; +} + +int pty_forward_set_ignore_vhangup(PTYForward *f, bool b) { + int r; + + assert(f); + + if (!!(f->flags & PTY_FORWARD_IGNORE_VHANGUP) == b) + return 0; + + SET_FLAG(f->flags, PTY_FORWARD_IGNORE_VHANGUP, b); + + if (!ignore_vhangup(f)) { + + /* We shall now react to vhangup()s? Let's check + * immediately if we might be in one */ + + f->master_readable = true; + r = shovel(f); + if (r < 0) + return r; + } + + return 0; +} + +bool pty_forward_get_ignore_vhangup(PTYForward *f) { + assert(f); + + return !!(f->flags & PTY_FORWARD_IGNORE_VHANGUP); +} + +bool pty_forward_is_done(PTYForward *f) { + assert(f); + + return f->done; +} + +void pty_forward_set_handler(PTYForward *f, PTYForwardHandler cb, void *userdata) { + assert(f); + + f->handler = cb; + f->userdata = userdata; +} + +bool pty_forward_drain(PTYForward *f) { + assert(f); + + /* Starts draining the forwarder. Specifically: + * + * - Returns true if there are no unprocessed bytes from the pty, false otherwise + * + * - Makes sure the handler function is called the next time the number of unprocessed bytes hits zero + */ + + f->drain = true; + return drained(f); +} + +int pty_forward_set_priority(PTYForward *f, int64_t priority) { + int r; + assert(f); + + if (f->stdin_event_source) { + r = sd_event_source_set_priority(f->stdin_event_source, priority); + if (r < 0) + return r; + } + + r = sd_event_source_set_priority(f->stdout_event_source, priority); + if (r < 0) + return r; + + r = sd_event_source_set_priority(f->master_event_source, priority); + if (r < 0) + return r; + + r = sd_event_source_set_priority(f->sigwinch_event_source, priority); + if (r < 0) + return r; + + return 0; +} + +int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height) { + struct winsize ws; + + assert(f); + + if (width == (unsigned) -1 && height == (unsigned) -1) + return 0; /* noop */ + + if (width != (unsigned) -1 && + (width == 0 || width > USHRT_MAX)) + return -ERANGE; + + if (height != (unsigned) -1 && + (height == 0 || height > USHRT_MAX)) + return -ERANGE; + + if (width == (unsigned) -1 || height == (unsigned) -1) { + if (ioctl(f->master, TIOCGWINSZ, &ws) < 0) + return -errno; + + if (width != (unsigned) -1) + ws.ws_col = width; + if (height != (unsigned) -1) + ws.ws_row = height; + } else + ws = (struct winsize) { + .ws_row = height, + .ws_col = width, + }; + + if (ioctl(f->master, TIOCSWINSZ, &ws) < 0) + return -errno; + + /* Make sure we ignore SIGWINCH window size events from now on */ + f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source); + + return 0; +} diff --git a/src/shared/ptyfwd.h b/src/shared/ptyfwd.h new file mode 100644 index 0000000..f0ae6e9 --- /dev/null +++ b/src/shared/ptyfwd.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "sd-event.h" + +#include "macro.h" + +typedef struct PTYForward PTYForward; + +typedef enum PTYForwardFlags { + PTY_FORWARD_READ_ONLY = 1, + + /* Continue reading after hangup? */ + PTY_FORWARD_IGNORE_VHANGUP = 2, + + /* Continue reading after hangup but only if we never read anything else? */ + PTY_FORWARD_IGNORE_INITIAL_VHANGUP = 4, +} PTYForwardFlags; + +typedef int (*PTYForwardHandler)(PTYForward *f, int rcode, void *userdata); + +int pty_forward_new(sd_event *event, int master, PTYForwardFlags flags, PTYForward **f); +PTYForward *pty_forward_free(PTYForward *f); + +int pty_forward_get_last_char(PTYForward *f, char *ch); + +int pty_forward_set_ignore_vhangup(PTYForward *f, bool ignore_vhangup); +bool pty_forward_get_ignore_vhangup(PTYForward *f); + +bool pty_forward_is_done(PTYForward *f); + +void pty_forward_set_handler(PTYForward *f, PTYForwardHandler handler, void *userdata); + +bool pty_forward_drain(PTYForward *f); + +int pty_forward_set_priority(PTYForward *f, int64_t priority); + +int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height); + +DEFINE_TRIVIAL_CLEANUP_FUNC(PTYForward*, pty_forward_free); diff --git a/src/shared/pwquality-util.c b/src/shared/pwquality-util.c new file mode 100644 index 0000000..4000bef --- /dev/null +++ b/src/shared/pwquality-util.c @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <unistd.h> + +#include "dlfcn-util.h" +#include "errno-util.h" +#include "log.h" +#include "macro.h" +#include "memory-util.h" +#include "pwquality-util.h" +#include "strv.h" + +#if HAVE_PWQUALITY + +static void *pwquality_dl = NULL; + +int (*sym_pwquality_check)(pwquality_settings_t *pwq, const char *password, const char *oldpassword, const char *user, void **auxerror); +pwquality_settings_t *(*sym_pwquality_default_settings)(void); +void (*sym_pwquality_free_settings)(pwquality_settings_t *pwq); +int (*sym_pwquality_generate)(pwquality_settings_t *pwq, int entropy_bits, char **password); +int (*sym_pwquality_get_str_value)(pwquality_settings_t *pwq, int setting, const char **value); +int (*sym_pwquality_read_config)(pwquality_settings_t *pwq, const char *cfgfile, void **auxerror); +int (*sym_pwquality_set_int_value)(pwquality_settings_t *pwq, int setting, int value); +const char* (*sym_pwquality_strerror)(char *buf, size_t len, int errcode, void *auxerror); + +int dlopen_pwquality(void) { + _cleanup_(dlclosep) void *dl = NULL; + int r; + + if (pwquality_dl) + return 0; /* Already loaded */ + + dl = dlopen("libpwquality.so.1", RTLD_LAZY); + if (!dl) + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "libpwquality support is not installed: %s", dlerror()); + + r = dlsym_many_and_warn( + dl, + LOG_DEBUG, + &sym_pwquality_check, "pwquality_check", + &sym_pwquality_default_settings, "pwquality_default_settings", + &sym_pwquality_free_settings, "pwquality_free_settings", + &sym_pwquality_generate, "pwquality_generate", + &sym_pwquality_get_str_value, "pwquality_get_str_value", + &sym_pwquality_read_config, "pwquality_read_config", + &sym_pwquality_set_int_value, "pwquality_set_int_value", + &sym_pwquality_strerror, "pwquality_strerror", + NULL); + if (r < 0) + return r; + + /* Note that we never release the reference here, because there's no real reason to, after all this + * was traditionally a regular shared library dependency which lives forever too. */ + pwquality_dl = TAKE_PTR(dl); + return 1; +} + +void pwq_maybe_disable_dictionary(pwquality_settings_t *pwq) { + char buf[PWQ_MAX_ERROR_MESSAGE_LEN]; + const char *path; + int r; + + assert(pwq); + + r = sym_pwquality_get_str_value(pwq, PWQ_SETTING_DICT_PATH, &path); + if (r < 0) { + log_debug("Failed to read libpwquality dictionary path, ignoring: %s", + sym_pwquality_strerror(buf, sizeof(buf), r, NULL)); + return; + } + + // REMOVE THIS AS SOON AS https://github.com/libpwquality/libpwquality/pull/21 IS MERGED AND RELEASED + if (isempty(path)) + path = "/usr/share/cracklib/pw_dict.pwd.gz"; + + if (isempty(path)) { + log_debug("Weird, no dictionary file configured, ignoring."); + return; + } + + if (access(path, F_OK) >= 0) + return; + + if (errno != ENOENT) { + log_debug_errno(errno, "Failed to check if dictionary file %s exists, ignoring: %m", path); + return; + } + + r = sym_pwquality_set_int_value(pwq, PWQ_SETTING_DICT_CHECK, 0); + if (r < 0) + log_debug("Failed to disable libpwquality dictionary check, ignoring: %s", + sym_pwquality_strerror(buf, sizeof(buf), r, NULL)); +} + +int pwq_allocate_context(pwquality_settings_t **ret) { + _cleanup_(sym_pwquality_free_settingsp) pwquality_settings_t *pwq = NULL; + char buf[PWQ_MAX_ERROR_MESSAGE_LEN]; + void *auxerror; + int r; + + assert(ret); + + r = dlopen_pwquality(); + if (r < 0) + return r; + + pwq = sym_pwquality_default_settings(); + if (!pwq) + return -ENOMEM; + + r = sym_pwquality_read_config(pwq, NULL, &auxerror); + if (r < 0) + log_debug("Failed to read libpwquality configuration, ignoring: %s", + sym_pwquality_strerror(buf, sizeof(buf), r, auxerror)); + + pwq_maybe_disable_dictionary(pwq); + + *ret = TAKE_PTR(pwq); + return 0; +} + +#define N_SUGGESTIONS 6 + +int suggest_passwords(void) { + _cleanup_(sym_pwquality_free_settingsp) pwquality_settings_t *pwq = NULL; + _cleanup_strv_free_erase_ char **suggestions = NULL; + _cleanup_(erase_and_freep) char *joined = NULL; + char buf[PWQ_MAX_ERROR_MESSAGE_LEN]; + size_t i; + int r; + + r = pwq_allocate_context(&pwq); + if (ERRNO_IS_NOT_SUPPORTED(r)) + return 0; + if (r < 0) + return log_error_errno(r, "Failed to allocate libpwquality context: %m"); + + suggestions = new0(char*, N_SUGGESTIONS+1); + if (!suggestions) + return log_oom(); + + for (i = 0; i < N_SUGGESTIONS; i++) { + r = sym_pwquality_generate(pwq, 64, suggestions + i); + if (r < 0) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to generate password, ignoring: %s", + sym_pwquality_strerror(buf, sizeof(buf), r, NULL)); + } + + joined = strv_join(suggestions, " "); + if (!joined) + return log_oom(); + + log_info("Password suggestions: %s", joined); + return 1; +} + +int quality_check_password(const char *password, const char *username, char **ret_error) { + _cleanup_(sym_pwquality_free_settingsp) pwquality_settings_t *pwq = NULL; + char buf[PWQ_MAX_ERROR_MESSAGE_LEN]; + void *auxerror; + int r; + + assert(password); + + r = pwq_allocate_context(&pwq); + if (ERRNO_IS_NOT_SUPPORTED(r)) + return 0; + if (r < 0) + return log_debug_errno(r, "Failed to allocate libpwquality context: %m"); + + r = sym_pwquality_check(pwq, password, NULL, username, &auxerror); + if (r < 0) { + + if (ret_error) { + _cleanup_free_ char *e = NULL; + + e = strdup(sym_pwquality_strerror(buf, sizeof(buf), r, auxerror)); + if (!e) + return -ENOMEM; + + *ret_error = TAKE_PTR(e); + } + + return 0; /* all bad */ + } + + return 1; /* all good */ +} + +#endif diff --git a/src/shared/pwquality-util.h b/src/shared/pwquality-util.h new file mode 100644 index 0000000..de288bb --- /dev/null +++ b/src/shared/pwquality-util.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "macro.h" + +#if HAVE_PWQUALITY +/* pwquality.h uses size_t but doesn't include sys/types.h on its own */ +#include <sys/types.h> +#include <pwquality.h> + +extern int (*sym_pwquality_check)(pwquality_settings_t *pwq, const char *password, const char *oldpassword, const char *user, void **auxerror); +extern pwquality_settings_t *(*sym_pwquality_default_settings)(void); +extern void (*sym_pwquality_free_settings)(pwquality_settings_t *pwq); +extern int (*sym_pwquality_generate)(pwquality_settings_t *pwq, int entropy_bits, char **password); +extern int (*sym_pwquality_get_str_value)(pwquality_settings_t *pwq, int setting, const char **value); +extern int (*sym_pwquality_read_config)(pwquality_settings_t *pwq, const char *cfgfile, void **auxerror); +extern int (*sym_pwquality_set_int_value)(pwquality_settings_t *pwq, int setting, int value); +extern const char* (*sym_pwquality_strerror)(char *buf, size_t len, int errcode, void *auxerror); + +int dlopen_pwquality(void); + +DEFINE_TRIVIAL_CLEANUP_FUNC(pwquality_settings_t*, sym_pwquality_free_settings); + +void pwq_maybe_disable_dictionary(pwquality_settings_t *pwq); +int pwq_allocate_context(pwquality_settings_t **ret); +int suggest_passwords(void); +int quality_check_password(const char *password, const char *username, char **ret_error); + +#else + +static inline int suggest_passwords(void) { + return 0; +} + +static inline int quality_check_password(const char *password, const char *username, char **ret_error) { + if (ret_error) + *ret_error = NULL; + return 1; /* all good */ +} + +#endif diff --git a/src/shared/qrcode-util.c b/src/shared/qrcode-util.c new file mode 100644 index 0000000..7050e18 --- /dev/null +++ b/src/shared/qrcode-util.c @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "qrcode-util.h" + +#if HAVE_QRENCODE +#include <qrencode.h> + +#include "dlfcn-util.h" +#include "locale-util.h" +#include "terminal-util.h" + +#define ANSI_WHITE_ON_BLACK "\033[40;37;1m" + +static void print_border(FILE *output, unsigned width) { + /* Four rows of border */ + for (unsigned y = 0; y < 4; y += 2) { + fputs(ANSI_WHITE_ON_BLACK, output); + + for (unsigned x = 0; x < 4 + width + 4; x++) + fputs("\342\226\210", output); + + fputs(ANSI_NORMAL "\n", output); + } +} + +static void write_qrcode(FILE *output, QRcode *qr) { + assert(qr); + + if (!output) + output = stdout; + + print_border(output, qr->width); + + for (unsigned y = 0; y < (unsigned) qr->width; y += 2) { + const uint8_t *row1 = qr->data + qr->width * y; + const uint8_t *row2 = row1 + qr->width; + + fputs(ANSI_WHITE_ON_BLACK, output); + for (unsigned x = 0; x < 4; x++) + fputs("\342\226\210", output); + + for (unsigned x = 0; x < (unsigned) qr->width; x++) { + bool a, b; + + a = row1[x] & 1; + b = (y+1) < (unsigned) qr->width ? (row2[x] & 1) : false; + + if (a && b) + fputc(' ', output); + else if (a) + fputs("\342\226\204", output); + else if (b) + fputs("\342\226\200", output); + else + fputs("\342\226\210", output); + } + + for (unsigned x = 0; x < 4; x++) + fputs("\342\226\210", output); + fputs(ANSI_NORMAL "\n", output); + } + + print_border(output, qr->width); + fflush(output); +} + +int print_qrcode(FILE *out, const char *header, const char *string) { + QRcode* (*sym_QRcode_encodeString)(const char *string, int version, QRecLevel level, QRencodeMode hint, int casesensitive); + void (*sym_QRcode_free)(QRcode *qrcode); + _cleanup_(dlclosep) void *dl = NULL; + QRcode* qr; + int r; + + /* If this is not an UTF-8 system or ANSI colors aren't supported/disabled don't print any QR + * codes */ + if (!is_locale_utf8() || !colors_enabled()) + return -EOPNOTSUPP; + + dl = dlopen("libqrencode.so.4", RTLD_LAZY); + if (!dl) + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "QRCODE support is not installed: %s", dlerror()); + + r = dlsym_many_and_warn( + dl, + LOG_DEBUG, + &sym_QRcode_encodeString, "QRcode_encodeString", + &sym_QRcode_free, "QRcode_free", + NULL); + if (r < 0) + return r; + + qr = sym_QRcode_encodeString(string, 0, QR_ECLEVEL_L, QR_MODE_8, 0); + if (!qr) + return -ENOMEM; + + if (header) + fprintf(out, "\n%s:\n\n", header); + + write_qrcode(out, qr); + + fputc('\n', out); + + sym_QRcode_free(qr); + return 0; +} +#endif diff --git a/src/shared/qrcode-util.h b/src/shared/qrcode-util.h new file mode 100644 index 0000000..6fc45c9 --- /dev/null +++ b/src/shared/qrcode-util.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#pragma once +#include <stdio.h> +#include <errno.h> + +#if HAVE_QRENCODE +int print_qrcode(FILE *out, const char *header, const char *string); +#else +static inline int print_qrcode(FILE *out, const char *header, const char *string) { + return -EOPNOTSUPP; +} +#endif diff --git a/src/shared/reboot-util.c b/src/shared/reboot-util.c new file mode 100644 index 0000000..756f9d3 --- /dev/null +++ b/src/shared/reboot-util.c @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "fileio.h" +#include "log.h" +#include "proc-cmdline.h" +#include "raw-reboot.h" +#include "reboot-util.h" +#include "string-util.h" +#include "umask-util.h" +#include "virt.h" + +int update_reboot_parameter_and_warn(const char *parameter, bool keep) { + int r; + + if (isempty(parameter)) { + if (keep) + return 0; + + if (unlink("/run/systemd/reboot-param") < 0) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m"); + } + + return 0; + } + + RUN_WITH_UMASK(0022) { + r = write_string_file("/run/systemd/reboot-param", parameter, + WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC); + if (r < 0) + return log_warning_errno(r, "Failed to write reboot parameter file: %m"); + } + + return 0; +} + +int read_reboot_parameter(char **parameter) { + int r; + + assert(parameter); + + r = read_one_line_file("/run/systemd/reboot-param", parameter); + if (r < 0 && r != -ENOENT) + return log_debug_errno(r, "Failed to read /run/systemd/reboot-param: %m"); + + return 0; +} + +int reboot_with_parameter(RebootFlags flags) { + int r; + + /* Reboots the system with a parameter that is read from /run/systemd/reboot-param. Returns 0 if + * REBOOT_DRY_RUN was set and the actual reboot operation was hence skipped. If REBOOT_FALLBACK is + * set and the reboot with parameter doesn't work out a fallback to classic reboot() is attempted. If + * REBOOT_FALLBACK is not set, 0 is returned instead, which should be considered indication for the + * caller to fall back to reboot() on its own, or somehow else deal with this. If REBOOT_LOG is + * specified will log about what it is going to do, as well as all errors. */ + + if (detect_container() == 0) { + _cleanup_free_ char *parameter = NULL; + + r = read_one_line_file("/run/systemd/reboot-param", ¶meter); + if (r < 0 && r != -ENOENT) + log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, r, + "Failed to read reboot parameter file, ignoring: %m"); + + if (!isempty(parameter)) { + log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG, + "Rebooting with argument '%s'.", parameter); + + if (flags & REBOOT_DRY_RUN) + return 0; + + (void) raw_reboot(LINUX_REBOOT_CMD_RESTART2, parameter); + + log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, errno, + "Failed to reboot with parameter, retrying without: %m"); + } + } + + if (!(flags & REBOOT_FALLBACK)) + return 0; + + log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG, "Rebooting."); + + if (flags & REBOOT_DRY_RUN) + return 0; + + (void) reboot(RB_AUTOBOOT); + + return log_full_errno(flags & REBOOT_LOG ? LOG_ERR : LOG_DEBUG, errno, "Failed to reboot: %m"); +} + +int shall_restore_state(void) { + bool ret; + int r; + + r = proc_cmdline_get_bool("systemd.restore_state", &ret); + if (r < 0) + return r; + + return r > 0 ? ret : true; +} diff --git a/src/shared/reboot-util.h b/src/shared/reboot-util.h new file mode 100644 index 0000000..bbca8b8 --- /dev/null +++ b/src/shared/reboot-util.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +int update_reboot_parameter_and_warn(const char *parameter, bool keep); + +typedef enum RebootFlags { + REBOOT_LOG = 1 << 0, /* log about what we are going to do and all errors */ + REBOOT_DRY_RUN = 1 << 1, /* return 0 right before actually doing the reboot */ + REBOOT_FALLBACK = 1 << 2, /* fall back to plain reboot() if argument-based reboot doesn't work, isn't configured or doesn't apply otherwise */ +} RebootFlags; + +int read_reboot_parameter(char **parameter); +int reboot_with_parameter(RebootFlags flags); + +int shall_restore_state(void); diff --git a/src/shared/resize-fs.c b/src/shared/resize-fs.c new file mode 100644 index 0000000..33cb78b --- /dev/null +++ b/src/shared/resize-fs.c @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <linux/btrfs.h> +#include <linux/magic.h> +#include <sys/ioctl.h> +#include <sys/vfs.h> + +#include "blockdev-util.h" +#include "fs-util.h" +#include "missing_fs.h" +#include "missing_magic.h" +#include "missing_xfs.h" +#include "resize-fs.h" +#include "stat-util.h" + +int resize_fs(int fd, uint64_t sz, uint64_t *ret_size) { + struct statfs sfs; + + assert(fd >= 0); + + /* Rounds down to next block size */ + + if (sz <= 0 || sz == UINT64_MAX) + return -ERANGE; + + if (fstatfs(fd, &sfs) < 0) + return -errno; + + if (is_fs_type(&sfs, EXT4_SUPER_MAGIC)) { + uint64_t u; + + if (sz < EXT4_MINIMAL_SIZE) + return -ERANGE; + + u = sz / sfs.f_bsize; + + if (ioctl(fd, EXT4_IOC_RESIZE_FS, &u) < 0) + return -errno; + + if (ret_size) + *ret_size = u * sfs.f_bsize; + + } else if (is_fs_type(&sfs, BTRFS_SUPER_MAGIC)) { + struct btrfs_ioctl_vol_args args = {}; + + /* 256M is the minimize size enforced by the btrfs kernel code when resizing (which is + * strange btw, as mkfs.btrfs is fine creating file systems > 109M). It will return EINVAL in + * that case, let's catch this error beforehand though, and report a more explanatory + * error. */ + + if (sz < BTRFS_MINIMAL_SIZE) + return -ERANGE; + + sz -= sz % sfs.f_bsize; + + xsprintf(args.name, "%" PRIu64, sz); + + if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0) + return -errno; + + if (ret_size) + *ret_size = sz; + + } else if (is_fs_type(&sfs, XFS_SB_MAGIC)) { + xfs_fsop_geom_t geo; + xfs_growfs_data_t d; + + if (sz < XFS_MINIMAL_SIZE) + return -ERANGE; + + if (ioctl(fd, XFS_IOC_FSGEOMETRY, &geo) < 0) + return -errno; + + d = (xfs_growfs_data_t) { + .imaxpct = geo.imaxpct, + .newblocks = sz / geo.blocksize, + }; + + if (ioctl(fd, XFS_IOC_FSGROWFSDATA, &d) < 0) + return -errno; + + if (ret_size) + *ret_size = d.newblocks * geo.blocksize; + + } else + return -EOPNOTSUPP; + + return 0; +} + +uint64_t minimal_size_by_fs_magic(statfs_f_type_t magic) { + + switch (magic) { + + case (statfs_f_type_t) EXT4_SUPER_MAGIC: + return EXT4_MINIMAL_SIZE; + + case (statfs_f_type_t) XFS_SB_MAGIC: + return XFS_MINIMAL_SIZE; + + case (statfs_f_type_t) BTRFS_SUPER_MAGIC: + return BTRFS_MINIMAL_SIZE; + + default: + return UINT64_MAX; + } +} + +uint64_t minimal_size_by_fs_name(const char *name) { + + if (streq_ptr(name, "ext4")) + return EXT4_MINIMAL_SIZE; + + if (streq_ptr(name, "xfs")) + return XFS_MINIMAL_SIZE; + + if (streq_ptr(name, "btrfs")) + return BTRFS_MINIMAL_SIZE; + + return UINT64_MAX; +} diff --git a/src/shared/resize-fs.h b/src/shared/resize-fs.h new file mode 100644 index 0000000..8831fd8 --- /dev/null +++ b/src/shared/resize-fs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <inttypes.h> + +#include "stat-util.h" + +int resize_fs(int fd, uint64_t sz, uint64_t *ret_size); + +#define BTRFS_MINIMAL_SIZE (256U*1024U*1024U) +#define XFS_MINIMAL_SIZE (14U*1024U*1024U) +#define EXT4_MINIMAL_SIZE (1024U*1024U) + +uint64_t minimal_size_by_fs_magic(statfs_f_type_t magic); +uint64_t minimal_size_by_fs_name(const char *str); diff --git a/src/shared/resolve-util.c b/src/shared/resolve-util.c new file mode 100644 index 0000000..1023b62 --- /dev/null +++ b/src/shared/resolve-util.c @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "conf-parser.h" +#include "resolve-util.h" +#include "string-table.h" + +DEFINE_CONFIG_PARSE_ENUM(config_parse_resolve_support, resolve_support, ResolveSupport, "Failed to parse resolve support setting"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_dnssec_mode, dnssec_mode, DnssecMode, "Failed to parse DNSSEC mode setting"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_dns_over_tls_mode, dns_over_tls_mode, DnsOverTlsMode, "Failed to parse DNS-over-TLS mode setting"); + +static const char* const resolve_support_table[_RESOLVE_SUPPORT_MAX] = { + [RESOLVE_SUPPORT_NO] = "no", + [RESOLVE_SUPPORT_YES] = "yes", + [RESOLVE_SUPPORT_RESOLVE] = "resolve", +}; +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(resolve_support, ResolveSupport, RESOLVE_SUPPORT_YES); + +static const char* const dnssec_mode_table[_DNSSEC_MODE_MAX] = { + [DNSSEC_NO] = "no", + [DNSSEC_ALLOW_DOWNGRADE] = "allow-downgrade", + [DNSSEC_YES] = "yes", +}; +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dnssec_mode, DnssecMode, DNSSEC_YES); + +static const char* const dns_over_tls_mode_table[_DNS_OVER_TLS_MODE_MAX] = { + [DNS_OVER_TLS_NO] = "no", + [DNS_OVER_TLS_OPPORTUNISTIC] = "opportunistic", + [DNS_OVER_TLS_YES] = "yes", +}; +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_over_tls_mode, DnsOverTlsMode, DNS_OVER_TLS_YES); + +bool dns_server_address_valid(int family, const union in_addr_union *sa) { + + /* Refuses the 0 IP addresses as well as 127.0.0.53 (which is our own DNS stub) */ + + if (in_addr_is_null(family, sa)) + return false; + + if (family == AF_INET && sa->in.s_addr == htobe32(INADDR_DNS_STUB)) + return false; + + return true; +} + +DEFINE_CONFIG_PARSE_ENUM(config_parse_dns_cache_mode, dns_cache_mode, DnsCacheMode, "Failed to parse DNS cache mode setting") + +static const char* const dns_cache_mode_table[_DNS_CACHE_MODE_MAX] = { + [DNS_CACHE_MODE_YES] = "yes", + [DNS_CACHE_MODE_NO] = "no", + [DNS_CACHE_MODE_NO_NEGATIVE] = "no-negative", +}; +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_cache_mode, DnsCacheMode, DNS_CACHE_MODE_YES); diff --git a/src/shared/resolve-util.h b/src/shared/resolve-util.h new file mode 100644 index 0000000..4ea24a6 --- /dev/null +++ b/src/shared/resolve-util.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "conf-parser.h" +#include "in-addr-util.h" +#include "macro.h" + +/* 127.0.0.53 in native endian */ +#define INADDR_DNS_STUB ((in_addr_t) 0x7f000035U) + +typedef enum DnsCacheMode DnsCacheMode; + +enum DnsCacheMode { + DNS_CACHE_MODE_NO, + DNS_CACHE_MODE_YES, + DNS_CACHE_MODE_NO_NEGATIVE, + _DNS_CACHE_MODE_MAX, + _DNS_CACHE_MODE_INVALID = 1 +}; + +typedef enum ResolveSupport ResolveSupport; +typedef enum DnssecMode DnssecMode; +typedef enum DnsOverTlsMode DnsOverTlsMode; + +enum ResolveSupport { + RESOLVE_SUPPORT_NO, + RESOLVE_SUPPORT_YES, + RESOLVE_SUPPORT_RESOLVE, + _RESOLVE_SUPPORT_MAX, + _RESOLVE_SUPPORT_INVALID = -1 +}; + +enum DnssecMode { + /* No DNSSEC validation is done */ + DNSSEC_NO, + + /* Validate locally, if the server knows DO, but if not, + * don't. Don't trust the AD bit. If the server doesn't do + * DNSSEC properly, downgrade to non-DNSSEC operation. Of + * course, we then are vulnerable to a downgrade attack, but + * that's life and what is configured. */ + DNSSEC_ALLOW_DOWNGRADE, + + /* Insist on DNSSEC server support, and rather fail than downgrading. */ + DNSSEC_YES, + + _DNSSEC_MODE_MAX, + _DNSSEC_MODE_INVALID = -1 +}; + +enum DnsOverTlsMode { + /* No connection is made for DNS-over-TLS */ + DNS_OVER_TLS_NO, + + /* Try to connect using DNS-over-TLS, but if connection fails, + * fall back to using an unencrypted connection */ + DNS_OVER_TLS_OPPORTUNISTIC, + + /* Enforce DNS-over-TLS and require valid server certificates */ + DNS_OVER_TLS_YES, + + _DNS_OVER_TLS_MODE_MAX, + _DNS_OVER_TLS_MODE_INVALID = -1 +}; + +CONFIG_PARSER_PROTOTYPE(config_parse_resolve_support); +CONFIG_PARSER_PROTOTYPE(config_parse_dnssec_mode); +CONFIG_PARSER_PROTOTYPE(config_parse_dns_over_tls_mode); +CONFIG_PARSER_PROTOTYPE(config_parse_dns_cache_mode); + +const char* resolve_support_to_string(ResolveSupport p) _const_; +ResolveSupport resolve_support_from_string(const char *s) _pure_; + +const char* dnssec_mode_to_string(DnssecMode p) _const_; +DnssecMode dnssec_mode_from_string(const char *s) _pure_; + +const char* dns_over_tls_mode_to_string(DnsOverTlsMode p) _const_; +DnsOverTlsMode dns_over_tls_mode_from_string(const char *s) _pure_; + +bool dns_server_address_valid(int family, const union in_addr_union *sa); + +const char* dns_cache_mode_to_string(DnsCacheMode p) _const_; +DnsCacheMode dns_cache_mode_from_string(const char *s) _pure_; + +/* A resolv.conf file containing the DNS server and domain data we learnt from uplink, i.e. the full uplink data */ +#define PRIVATE_UPLINK_RESOLV_CONF "/run/systemd/resolve/resolv.conf" + +/* A resolv.conf file containing the domain data we learnt from uplink, but our own DNS server address. */ +#define PRIVATE_STUB_RESOLV_CONF "/run/systemd/resolve/stub-resolv.conf" + +/* A static resolv.conf file containing no domains, but only our own DNS server address */ +#define PRIVATE_STATIC_RESOLV_CONF ROOTLIBEXECDIR "/resolv.conf" diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c new file mode 100644 index 0000000..ccae9d4 --- /dev/null +++ b/src/shared/seccomp-util.c @@ -0,0 +1,2140 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <linux/seccomp.h> +#include <seccomp.h> +#include <stddef.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/shm.h> +#include <sys/stat.h> + +#include "af-list.h" +#include "alloc-util.h" +#include "env-util.h" +#include "errno-list.h" +#include "macro.h" +#include "nsflags.h" +#include "nulstr-util.h" +#include "process-util.h" +#include "seccomp-util.h" +#include "set.h" +#include "string-util.h" +#include "strv.h" + +const uint32_t seccomp_local_archs[] = { + + /* Note: always list the native arch we are compiled as last, so that users can deny-list seccomp(), but our own calls to it still succeed */ + +#if defined(__x86_64__) && defined(__ILP32__) + SCMP_ARCH_X86, + SCMP_ARCH_X86_64, + SCMP_ARCH_X32, /* native */ +#elif defined(__x86_64__) && !defined(__ILP32__) + SCMP_ARCH_X86, + SCMP_ARCH_X32, + SCMP_ARCH_X86_64, /* native */ +#elif defined(__i386__) + SCMP_ARCH_X86, +#elif defined(__aarch64__) + SCMP_ARCH_ARM, + SCMP_ARCH_AARCH64, /* native */ +#elif defined(__arm__) + SCMP_ARCH_ARM, +#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32 + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32 + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64 + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL64N32, + SCMP_ARCH_MIPS64N32, + SCMP_ARCH_MIPSEL64, + SCMP_ARCH_MIPS64, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64 + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS64N32, + SCMP_ARCH_MIPSEL64N32, + SCMP_ARCH_MIPS64, + SCMP_ARCH_MIPSEL64, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32 + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL64, + SCMP_ARCH_MIPS64, + SCMP_ARCH_MIPSEL64N32, + SCMP_ARCH_MIPS64N32, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32 + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS64, + SCMP_ARCH_MIPSEL64, + SCMP_ARCH_MIPS64N32, + SCMP_ARCH_MIPSEL64N32, /* native */ +#elif defined(__powerpc64__) && __BYTE_ORDER == __BIG_ENDIAN + SCMP_ARCH_PPC, + SCMP_ARCH_PPC64LE, + SCMP_ARCH_PPC64, /* native */ +#elif defined(__powerpc64__) && __BYTE_ORDER == __LITTLE_ENDIAN + SCMP_ARCH_PPC, + SCMP_ARCH_PPC64, + SCMP_ARCH_PPC64LE, /* native */ +#elif defined(__powerpc__) + SCMP_ARCH_PPC, +#elif defined(__riscv) && __riscv_xlen == 64 && defined(SCMP_ARCH_RISCV64) + SCMP_ARCH_RISCV64, +#elif defined(__s390x__) + SCMP_ARCH_S390, + SCMP_ARCH_S390X, /* native */ +#elif defined(__s390__) + SCMP_ARCH_S390, +#endif + (uint32_t) -1 + }; + +const char* seccomp_arch_to_string(uint32_t c) { + /* Maintain order used in <seccomp.h>. + * + * Names used here should be the same as those used for ConditionArchitecture=, + * except for "subarchitectures" like x32. */ + + switch(c) { + case SCMP_ARCH_NATIVE: + return "native"; + case SCMP_ARCH_X86: + return "x86"; + case SCMP_ARCH_X86_64: + return "x86-64"; + case SCMP_ARCH_X32: + return "x32"; + case SCMP_ARCH_ARM: + return "arm"; + case SCMP_ARCH_AARCH64: + return "arm64"; + case SCMP_ARCH_MIPS: + return "mips"; + case SCMP_ARCH_MIPS64: + return "mips64"; + case SCMP_ARCH_MIPS64N32: + return "mips64-n32"; + case SCMP_ARCH_MIPSEL: + return "mips-le"; + case SCMP_ARCH_MIPSEL64: + return "mips64-le"; + case SCMP_ARCH_MIPSEL64N32: + return "mips64-le-n32"; + case SCMP_ARCH_PPC: + return "ppc"; + case SCMP_ARCH_PPC64: + return "ppc64"; + case SCMP_ARCH_PPC64LE: + return "ppc64-le"; +#ifdef SCMP_ARCH_RISCV64 + case SCMP_ARCH_RISCV64: + return "riscv64"; +#endif + case SCMP_ARCH_S390: + return "s390"; + case SCMP_ARCH_S390X: + return "s390x"; + default: + return NULL; + } +} + +int seccomp_arch_from_string(const char *n, uint32_t *ret) { + if (!n) + return -EINVAL; + + assert(ret); + + if (streq(n, "native")) + *ret = SCMP_ARCH_NATIVE; + else if (streq(n, "x86")) + *ret = SCMP_ARCH_X86; + else if (streq(n, "x86-64")) + *ret = SCMP_ARCH_X86_64; + else if (streq(n, "x32")) + *ret = SCMP_ARCH_X32; + else if (streq(n, "arm")) + *ret = SCMP_ARCH_ARM; + else if (streq(n, "arm64")) + *ret = SCMP_ARCH_AARCH64; + else if (streq(n, "mips")) + *ret = SCMP_ARCH_MIPS; + else if (streq(n, "mips64")) + *ret = SCMP_ARCH_MIPS64; + else if (streq(n, "mips64-n32")) + *ret = SCMP_ARCH_MIPS64N32; + else if (streq(n, "mips-le")) + *ret = SCMP_ARCH_MIPSEL; + else if (streq(n, "mips64-le")) + *ret = SCMP_ARCH_MIPSEL64; + else if (streq(n, "mips64-le-n32")) + *ret = SCMP_ARCH_MIPSEL64N32; + else if (streq(n, "ppc")) + *ret = SCMP_ARCH_PPC; + else if (streq(n, "ppc64")) + *ret = SCMP_ARCH_PPC64; + else if (streq(n, "ppc64-le")) + *ret = SCMP_ARCH_PPC64LE; +#ifdef SCMP_ARCH_RISCV64 + else if (streq(n, "riscv64")) + *ret = SCMP_ARCH_RISCV64; +#endif + else if (streq(n, "s390")) + *ret = SCMP_ARCH_S390; + else if (streq(n, "s390x")) + *ret = SCMP_ARCH_S390X; + else + return -EINVAL; + + return 0; +} + +int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + int r; + + /* Much like seccomp_init(), but initializes the filter for one specific architecture only, without affecting + * any others. Also, turns off the NNP fiddling. */ + + seccomp = seccomp_init(default_action); + if (!seccomp) + return -ENOMEM; + + if (arch != SCMP_ARCH_NATIVE && + arch != seccomp_arch_native()) { + + r = seccomp_arch_remove(seccomp, seccomp_arch_native()); + if (r < 0) + return r; + + r = seccomp_arch_add(seccomp, arch); + if (r < 0) + return r; + + assert(seccomp_arch_exist(seccomp, arch) >= 0); + assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) == -EEXIST); + assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) == -EEXIST); + } else { + assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) >= 0); + assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) >= 0); + } + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_ACT_BADARCH, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + return r; + +#if SCMP_VER_MAJOR >= 3 || (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 4) + if (getenv_bool("SYSTEMD_LOG_SECCOMP") > 0) { + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_LOG, 1); + if (r < 0) + log_debug_errno(r, "Failed to enable seccomp event logging: %m"); + } +#endif + + *ret = TAKE_PTR(seccomp); + return 0; +} + +static bool is_basic_seccomp_available(void) { + return prctl(PR_GET_SECCOMP, 0, 0, 0, 0) >= 0; +} + +static bool is_seccomp_filter_available(void) { + return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0) < 0 && + errno == EFAULT; +} + +bool is_seccomp_available(void) { + static int cached_enabled = -1; + + if (cached_enabled < 0) { + int b; + + b = getenv_bool_secure("SYSTEMD_SECCOMP"); + if (b != 0) { + if (b < 0 && b != -ENXIO) /* ENXIO: env var unset */ + log_debug_errno(b, "Failed to parse $SYSTEMD_SECCOMP value, ignoring."); + + cached_enabled = + is_basic_seccomp_available() && + is_seccomp_filter_available(); + } else + cached_enabled = false; + } + + return cached_enabled; +} + +const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { + [SYSCALL_FILTER_SET_DEFAULT] = { + .name = "@default", + .help = "System calls that are always permitted", + .value = + "brk\0" + "cacheflush\0" + "clock_getres\0" + "clock_getres_time64\0" + "clock_gettime\0" + "clock_gettime64\0" + "clock_nanosleep\0" + "clock_nanosleep_time64\0" + "execve\0" + "exit\0" + "exit_group\0" + "futex\0" + "futex_time64\0" + "get_robust_list\0" + "get_thread_area\0" + "getegid\0" + "getegid32\0" + "geteuid\0" + "geteuid32\0" + "getgid\0" + "getgid32\0" + "getgroups\0" + "getgroups32\0" + "getpgid\0" + "getpgrp\0" + "getpid\0" + "getppid\0" + "getresgid\0" + "getresgid32\0" + "getresuid\0" + "getresuid32\0" + "getrlimit\0" /* make sure processes can query stack size and such */ + "getsid\0" + "gettid\0" + "gettimeofday\0" + "getuid\0" + "getuid32\0" + "membarrier\0" + "mmap\0" + "mmap2\0" + "munmap\0" + "nanosleep\0" + "pause\0" + "prlimit64\0" + "restart_syscall\0" + "rseq\0" + "rt_sigreturn\0" + "sched_yield\0" + "set_robust_list\0" + "set_thread_area\0" + "set_tid_address\0" + "set_tls\0" + "sigreturn\0" + "time\0" + "ugetrlimit\0" + }, + [SYSCALL_FILTER_SET_AIO] = { + .name = "@aio", + .help = "Asynchronous IO", + .value = + "io_cancel\0" + "io_destroy\0" + "io_getevents\0" + "io_pgetevents\0" + "io_pgetevents_time64\0" + "io_setup\0" + "io_submit\0" + "io_uring_enter\0" + "io_uring_register\0" + "io_uring_setup\0" + }, + [SYSCALL_FILTER_SET_BASIC_IO] = { + .name = "@basic-io", + .help = "Basic IO", + .value = + "_llseek\0" + "close\0" + "close_range\0" + "dup\0" + "dup2\0" + "dup3\0" + "lseek\0" + "pread64\0" + "preadv\0" + "preadv2\0" + "pwrite64\0" + "pwritev\0" + "pwritev2\0" + "read\0" + "readv\0" + "write\0" + "writev\0" + }, + [SYSCALL_FILTER_SET_CHOWN] = { + .name = "@chown", + .help = "Change ownership of files and directories", + .value = + "chown\0" + "chown32\0" + "fchown\0" + "fchown32\0" + "fchownat\0" + "lchown\0" + "lchown32\0" + }, + [SYSCALL_FILTER_SET_CLOCK] = { + .name = "@clock", + .help = "Change the system time", + .value = + "adjtimex\0" + "clock_adjtime\0" + "clock_adjtime64\0" + "clock_settime\0" + "clock_settime64\0" + "settimeofday\0" + }, + [SYSCALL_FILTER_SET_CPU_EMULATION] = { + .name = "@cpu-emulation", + .help = "System calls for CPU emulation functionality", + .value = + "modify_ldt\0" + "subpage_prot\0" + "switch_endian\0" + "vm86\0" + "vm86old\0" + }, + [SYSCALL_FILTER_SET_DEBUG] = { + .name = "@debug", + .help = "Debugging, performance monitoring and tracing functionality", + .value = + "lookup_dcookie\0" + "perf_event_open\0" + "pidfd_getfd\0" + "ptrace\0" + "rtas\0" +#if defined __s390__ || defined __s390x__ + "s390_runtime_instr\0" +#endif + "sys_debug_setcontext\0" + }, + [SYSCALL_FILTER_SET_FILE_SYSTEM] = { + .name = "@file-system", + .help = "File system operations", + .value = + "access\0" + "chdir\0" + "chmod\0" + "close\0" + "creat\0" + "faccessat\0" + "faccessat2\0" + "fallocate\0" + "fchdir\0" + "fchmod\0" + "fchmodat\0" + "fcntl\0" + "fcntl64\0" + "fgetxattr\0" + "flistxattr\0" + "fremovexattr\0" + "fsetxattr\0" + "fstat\0" + "fstat64\0" + "fstatat64\0" + "fstatfs\0" + "fstatfs64\0" + "ftruncate\0" + "ftruncate64\0" + "futimesat\0" + "getcwd\0" + "getdents\0" + "getdents64\0" + "getxattr\0" + "inotify_add_watch\0" + "inotify_init\0" + "inotify_init1\0" + "inotify_rm_watch\0" + "lgetxattr\0" + "link\0" + "linkat\0" + "listxattr\0" + "llistxattr\0" + "lremovexattr\0" + "lsetxattr\0" + "lstat\0" + "lstat64\0" + "mkdir\0" + "mkdirat\0" + "mknod\0" + "mknodat\0" + "newfstatat\0" + "oldfstat\0" + "oldlstat\0" + "oldstat\0" + "open\0" + "openat\0" + "openat2\0" + "readlink\0" + "readlinkat\0" + "removexattr\0" + "rename\0" + "renameat\0" + "renameat2\0" + "rmdir\0" + "setxattr\0" + "stat\0" + "stat64\0" + "statfs\0" + "statfs64\0" + "statx\0" + "symlink\0" + "symlinkat\0" + "truncate\0" + "truncate64\0" + "unlink\0" + "unlinkat\0" + "utime\0" + "utimensat\0" + "utimensat_time64\0" + "utimes\0" + }, + [SYSCALL_FILTER_SET_IO_EVENT] = { + .name = "@io-event", + .help = "Event loop system calls", + .value = + "_newselect\0" + "epoll_create\0" + "epoll_create1\0" + "epoll_ctl\0" + "epoll_ctl_old\0" + "epoll_pwait\0" + "epoll_wait\0" + "epoll_wait_old\0" + "eventfd\0" + "eventfd2\0" + "poll\0" + "ppoll\0" + "ppoll_time64\0" + "pselect6\0" + "pselect6_time64\0" + "select\0" + }, + [SYSCALL_FILTER_SET_IPC] = { + .name = "@ipc", + .help = "SysV IPC, POSIX Message Queues or other IPC", + .value = + "ipc\0" + "memfd_create\0" + "mq_getsetattr\0" + "mq_notify\0" + "mq_open\0" + "mq_timedreceive\0" + "mq_timedreceive_time64\0" + "mq_timedsend\0" + "mq_timedsend_time64\0" + "mq_unlink\0" + "msgctl\0" + "msgget\0" + "msgrcv\0" + "msgsnd\0" + "pipe\0" + "pipe2\0" + "process_vm_readv\0" + "process_vm_writev\0" + "semctl\0" + "semget\0" + "semop\0" + "semtimedop\0" + "semtimedop_time64\0" + "shmat\0" + "shmctl\0" + "shmdt\0" + "shmget\0" + }, + [SYSCALL_FILTER_SET_KEYRING] = { + .name = "@keyring", + .help = "Kernel keyring access", + .value = + "add_key\0" + "keyctl\0" + "request_key\0" + }, + [SYSCALL_FILTER_SET_MEMLOCK] = { + .name = "@memlock", + .help = "Memory locking control", + .value = + "mlock\0" + "mlock2\0" + "mlockall\0" + "munlock\0" + "munlockall\0" + }, + [SYSCALL_FILTER_SET_MODULE] = { + .name = "@module", + .help = "Loading and unloading of kernel modules", + .value = + "delete_module\0" + "finit_module\0" + "init_module\0" + }, + [SYSCALL_FILTER_SET_MOUNT] = { + .name = "@mount", + .help = "Mounting and unmounting of file systems", + .value = + "chroot\0" + "fsconfig\0" + "fsmount\0" + "fsopen\0" + "fspick\0" + "mount\0" + "move_mount\0" + "open_tree\0" + "pivot_root\0" + "umount\0" + "umount2\0" + }, + [SYSCALL_FILTER_SET_NETWORK_IO] = { + .name = "@network-io", + .help = "Network or Unix socket IO, should not be needed if not network facing", + .value = + "accept\0" + "accept4\0" + "bind\0" + "connect\0" + "getpeername\0" + "getsockname\0" + "getsockopt\0" + "listen\0" + "recv\0" + "recvfrom\0" + "recvmmsg\0" + "recvmmsg_time64\0" + "recvmsg\0" + "send\0" + "sendmmsg\0" + "sendmsg\0" + "sendto\0" + "setsockopt\0" + "shutdown\0" + "socket\0" + "socketcall\0" + "socketpair\0" + }, + [SYSCALL_FILTER_SET_OBSOLETE] = { + /* some unknown even to libseccomp */ + .name = "@obsolete", + .help = "Unusual, obsolete or unimplemented system calls", + .value = + "_sysctl\0" + "afs_syscall\0" + "bdflush\0" + "break\0" + "create_module\0" + "ftime\0" + "get_kernel_syms\0" + "getpmsg\0" + "gtty\0" + "idle\0" + "lock\0" + "mpx\0" + "prof\0" + "profil\0" + "putpmsg\0" + "query_module\0" + "security\0" + "sgetmask\0" + "ssetmask\0" + "stime\0" + "stty\0" + "sysfs\0" + "tuxcall\0" + "ulimit\0" + "uselib\0" + "ustat\0" + "vserver\0" + }, + [SYSCALL_FILTER_SET_PKEY] = { + .name = "@pkey", + .help = "System calls used for memory protection keys", + .value = + "pkey_alloc\0" + "pkey_free\0" + "pkey_mprotect\0" + }, + [SYSCALL_FILTER_SET_PRIVILEGED] = { + .name = "@privileged", + .help = "All system calls which need super-user capabilities", + .value = + "@chown\0" + "@clock\0" + "@module\0" + "@raw-io\0" + "@reboot\0" + "@swap\0" + "_sysctl\0" + "acct\0" + "bpf\0" + "capset\0" + "chroot\0" + "fanotify_init\0" + "fanotify_mark\0" + "nfsservctl\0" + "open_by_handle_at\0" + "pivot_root\0" + "quotactl\0" + "setdomainname\0" + "setfsuid\0" + "setfsuid32\0" + "setgroups\0" + "setgroups32\0" + "sethostname\0" + "setresuid\0" + "setresuid32\0" + "setreuid\0" + "setreuid32\0" + "setuid\0" /* We list the explicit system calls here, as @setuid also includes setgid() which is not necessarily privileged */ + "setuid32\0" + "vhangup\0" + }, + [SYSCALL_FILTER_SET_PROCESS] = { + .name = "@process", + .help = "Process control, execution, namespacing operations", + .value = + "arch_prctl\0" + "capget\0" /* Able to query arbitrary processes */ + "clone\0" + "clone3\0" + "execveat\0" + "fork\0" + "getrusage\0" + "kill\0" + "pidfd_open\0" + "pidfd_send_signal\0" + "prctl\0" + "rt_sigqueueinfo\0" + "rt_tgsigqueueinfo\0" + "setns\0" + "swapcontext\0" /* Some archs e.g. powerpc32 are using it to do userspace context switches */ + "tgkill\0" + "times\0" + "tkill\0" + "unshare\0" + "vfork\0" + "wait4\0" + "waitid\0" + "waitpid\0" + }, + [SYSCALL_FILTER_SET_RAW_IO] = { + .name = "@raw-io", + .help = "Raw I/O port access", + .value = + "ioperm\0" + "iopl\0" + "pciconfig_iobase\0" + "pciconfig_read\0" + "pciconfig_write\0" +#if defined __s390__ || defined __s390x__ + "s390_pci_mmio_read\0" + "s390_pci_mmio_write\0" +#endif + }, + [SYSCALL_FILTER_SET_REBOOT] = { + .name = "@reboot", + .help = "Reboot and reboot preparation/kexec", + .value = + "kexec_file_load\0" + "kexec_load\0" + "reboot\0" + }, + [SYSCALL_FILTER_SET_RESOURCES] = { + .name = "@resources", + .help = "Alter resource settings", + .value = + "ioprio_set\0" + "mbind\0" + "migrate_pages\0" + "move_pages\0" + "nice\0" + "sched_setaffinity\0" + "sched_setattr\0" + "sched_setparam\0" + "sched_setscheduler\0" + "set_mempolicy\0" + "setpriority\0" + "setrlimit\0" + }, + [SYSCALL_FILTER_SET_SETUID] = { + .name = "@setuid", + .help = "Operations for changing user/group credentials", + .value = + "setgid\0" + "setgid32\0" + "setgroups\0" + "setgroups32\0" + "setregid\0" + "setregid32\0" + "setresgid\0" + "setresgid32\0" + "setresuid\0" + "setresuid32\0" + "setreuid\0" + "setreuid32\0" + "setuid\0" + "setuid32\0" + }, + [SYSCALL_FILTER_SET_SIGNAL] = { + .name = "@signal", + .help = "Process signal handling", + .value = + "rt_sigaction\0" + "rt_sigpending\0" + "rt_sigprocmask\0" + "rt_sigsuspend\0" + "rt_sigtimedwait\0" + "rt_sigtimedwait_time64\0" + "sigaction\0" + "sigaltstack\0" + "signal\0" + "signalfd\0" + "signalfd4\0" + "sigpending\0" + "sigprocmask\0" + "sigsuspend\0" + }, + [SYSCALL_FILTER_SET_SWAP] = { + .name = "@swap", + .help = "Enable/disable swap devices", + .value = + "swapoff\0" + "swapon\0" + }, + [SYSCALL_FILTER_SET_SYNC] = { + .name = "@sync", + .help = "Synchronize files and memory to storage", + .value = + "fdatasync\0" + "fsync\0" + "msync\0" + "sync\0" + "sync_file_range\0" + "sync_file_range2\0" + "syncfs\0" + }, + [SYSCALL_FILTER_SET_SYSTEM_SERVICE] = { + .name = "@system-service", + .help = "General system service operations", + .value = + "@aio\0" + "@basic-io\0" + "@chown\0" + "@default\0" + "@file-system\0" + "@io-event\0" + "@ipc\0" + "@keyring\0" + "@memlock\0" + "@network-io\0" + "@process\0" + "@resources\0" + "@setuid\0" + "@signal\0" + "@sync\0" + "@timer\0" + "capget\0" + "capset\0" + "copy_file_range\0" + "fadvise64\0" + "fadvise64_64\0" + "flock\0" + "get_mempolicy\0" + "getcpu\0" + "getpriority\0" + "getrandom\0" + "ioctl\0" + "ioprio_get\0" + "kcmp\0" + "madvise\0" + "mprotect\0" + "mremap\0" + "name_to_handle_at\0" + "oldolduname\0" + "olduname\0" + "personality\0" + "readahead\0" + "readdir\0" + "remap_file_pages\0" + "sched_get_priority_max\0" + "sched_get_priority_min\0" + "sched_getaffinity\0" + "sched_getattr\0" + "sched_getparam\0" + "sched_getscheduler\0" + "sched_rr_get_interval\0" + "sched_rr_get_interval_time64\0" + "sched_yield\0" + "sendfile\0" + "sendfile64\0" + "setfsgid\0" + "setfsgid32\0" + "setfsuid\0" + "setfsuid32\0" + "setpgid\0" + "setsid\0" + "splice\0" + "sysinfo\0" + "tee\0" + "umask\0" + "uname\0" + "userfaultfd\0" + "vmsplice\0" + }, + [SYSCALL_FILTER_SET_TIMER] = { + .name = "@timer", + .help = "Schedule operations by time", + .value = + "alarm\0" + "getitimer\0" + "setitimer\0" + "timer_create\0" + "timer_delete\0" + "timer_getoverrun\0" + "timer_gettime\0" + "timer_gettime64\0" + "timer_settime\0" + "timer_settime64\0" + "timerfd_create\0" + "timerfd_gettime\0" + "timerfd_gettime64\0" + "timerfd_settime\0" + "timerfd_settime64\0" + "times\0" + }, + [SYSCALL_FILTER_SET_KNOWN] = { + .name = "@known", + .help = "All known syscalls declared in the kernel", + .value = +#include "syscall-list.h" + }, +}; + +const SyscallFilterSet *syscall_filter_set_find(const char *name) { + if (isempty(name) || name[0] != '@') + return NULL; + + for (unsigned i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) + if (streq(syscall_filter_sets[i].name, name)) + return syscall_filter_sets + i; + + return NULL; +} + +static int add_syscall_filter_set( + scmp_filter_ctx seccomp, + const SyscallFilterSet *set, + uint32_t action, + char **exclude, + bool log_missing, + char ***added); + +int seccomp_add_syscall_filter_item( + scmp_filter_ctx *seccomp, + const char *name, + uint32_t action, + char **exclude, + bool log_missing, + char ***added) { + + assert(seccomp); + assert(name); + + if (strv_contains(exclude, name)) + return 0; + + /* Any syscalls that are handled are added to the *added strv. The pointer + * must be either NULL or point to a valid pre-initialized possibly-empty strv. */ + + if (name[0] == '@') { + const SyscallFilterSet *other; + + other = syscall_filter_set_find(name); + if (!other) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "Filter set %s is not known!", + name); + + return add_syscall_filter_set(seccomp, other, action, exclude, log_missing, added); + + } else { + int id, r; + + id = seccomp_syscall_resolve_name(name); + if (id == __NR_SCMP_ERROR) { + if (log_missing) + log_debug("System call %s is not known, ignoring.", name); + return 0; + } + + r = seccomp_rule_add_exact(seccomp, action, id, 0); + if (r < 0) { + /* If the system call is not known on this architecture, then that's fine, let's ignore it */ + bool ignore = r == -EDOM; + + if (!ignore || log_missing) + log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m", + name, id, ignore ? ", ignoring" : ""); + if (!ignore) + return r; + } + + if (added) { + r = strv_extend(added, name); + if (r < 0) + return r; + } + + return 0; + } +} + +static int add_syscall_filter_set( + scmp_filter_ctx seccomp, + const SyscallFilterSet *set, + uint32_t action, + char **exclude, + bool log_missing, + char ***added) { + + const char *sys; + int r; + + /* Any syscalls that are handled are added to the *added strv. It needs to be initialized. */ + + assert(seccomp); + assert(set); + + NULSTR_FOREACH(sys, set->value) { + r = seccomp_add_syscall_filter_item(seccomp, sys, action, exclude, log_missing, added); + if (r < 0) + return r; + } + + return 0; +} + +int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing) { + uint32_t arch; + int r; + + assert(set); + + /* The one-stop solution: allocate a seccomp object, add the specified filter to it, and apply it. Once for + * each local arch. */ + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + r = seccomp_init_for_arch(&seccomp, arch, default_action); + if (r < 0) + return r; + + r = add_syscall_filter_set(seccomp, set, action, NULL, log_missing, NULL); + if (r < 0) + return log_debug_errno(r, "Failed to add filter set: %m"); + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing) { + uint32_t arch; + int r; + + /* Similar to seccomp_load_syscall_filter_set(), but takes a raw Set* of syscalls, instead of a + * SyscallFilterSet* table. */ + + if (hashmap_isempty(set) && default_action == SCMP_ACT_ALLOW) + return 0; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + void *syscall_id, *val; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + r = seccomp_init_for_arch(&seccomp, arch, default_action); + if (r < 0) + return r; + + HASHMAP_FOREACH_KEY(val, syscall_id, set) { + uint32_t a = action; + int id = PTR_TO_INT(syscall_id) - 1; + int error = PTR_TO_INT(val); + + if (error == SECCOMP_ERROR_NUMBER_KILL) + a = scmp_act_kill_process(); +#ifdef SCMP_ACT_LOG + else if (action == SCMP_ACT_LOG) + a = SCMP_ACT_LOG; +#endif + else if (action != SCMP_ACT_ALLOW && error >= 0) + a = SCMP_ACT_ERRNO(error); + + r = seccomp_rule_add_exact(seccomp, a, id, 0); + if (r < 0) { + /* If the system call is not known on this architecture, then that's fine, let's ignore it */ + _cleanup_free_ char *n = NULL; + bool ignore; + + n = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, id); + ignore = r == -EDOM; + if (!ignore || log_missing) + log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m", + strna(n), id, ignore ? ", ignoring" : ""); + if (!ignore) + return r; + } + } + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_parse_syscall_filter( + const char *name, + int errno_num, + Hashmap *filter, + SeccompParseFlags flags, + const char *unit, + const char *filename, + unsigned line) { + + int r; + + assert(name); + assert(filter); + + if (name[0] == '@') { + const SyscallFilterSet *set; + const char *i; + + set = syscall_filter_set_find(name); + if (!set) { + if (!(flags & SECCOMP_PARSE_PERMISSIVE)) + return -EINVAL; + + log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0, + "Unknown system call group, ignoring: %s", name); + return 0; + } + + NULSTR_FOREACH(i, set->value) { + /* Call ourselves again, for the group to parse. Note that we downgrade logging here (i.e. take + * away the SECCOMP_PARSE_LOG flag) since any issues in the group table are our own problem, + * not a problem in user configuration data and we shouldn't pretend otherwise by complaining + * about them. */ + r = seccomp_parse_syscall_filter(i, errno_num, filter, flags &~ SECCOMP_PARSE_LOG, unit, filename, line); + if (r < 0) + return r; + } + } else { + int id; + + id = seccomp_syscall_resolve_name(name); + if (id == __NR_SCMP_ERROR) { + if (!(flags & SECCOMP_PARSE_PERMISSIVE)) + return -EINVAL; + + log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0, + "Failed to parse system call, ignoring: %s", name); + return 0; + } + + /* If we previously wanted to forbid a syscall and now + * we want to allow it, then remove it from the list. */ + if (!(flags & SECCOMP_PARSE_INVERT) == !!(flags & SECCOMP_PARSE_ALLOW_LIST)) { + r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num)); + if (r < 0) + switch (r) { + case -ENOMEM: + return flags & SECCOMP_PARSE_LOG ? log_oom() : -ENOMEM; + case -EEXIST: + assert_se(hashmap_update(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num)) == 0); + break; + default: + return r; + } + } else + (void) hashmap_remove(filter, INT_TO_PTR(id + 1)); + } + + return 0; +} + +int seccomp_restrict_namespaces(unsigned long retain) { + uint32_t arch; + int r; + + if (DEBUG_LOGGING) { + _cleanup_free_ char *s = NULL; + + (void) namespace_flags_to_string(retain, &s); + log_debug("Restricting namespace to: %s.", strna(s)); + } + + /* NOOP? */ + if (FLAGS_SET(retain, NAMESPACE_FLAGS_ALL)) + return 0; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + if ((retain & NAMESPACE_FLAGS_ALL) == 0) + /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall + * altogether. */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(setns), + 0); + else + /* Otherwise, block only the invocations with the appropriate flags in the loop below, but also the + * special invocation with a zero flags argument, right here. */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(setns), + 1, + SCMP_A1(SCMP_CMP_EQ, 0)); + if (r < 0) { + log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + for (unsigned i = 0; namespace_flag_map[i].name; i++) { + unsigned long f; + + f = namespace_flag_map[i].flag; + if (FLAGS_SET(retain, f)) { + log_debug("Permitting %s.", namespace_flag_map[i].name); + continue; + } + + log_debug("Blocking %s.", namespace_flag_map[i].name); + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(unshare), + 1, + SCMP_A0(SCMP_CMP_MASKED_EQ, f, f)); + if (r < 0) { + log_debug_errno(r, "Failed to add unshare() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + break; + } + + /* On s390/s390x the first two parameters to clone are switched */ + if (!IN_SET(arch, SCMP_ARCH_S390, SCMP_ARCH_S390X)) + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(clone), + 1, + SCMP_A0(SCMP_CMP_MASKED_EQ, f, f)); + else + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(clone), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, f, f)); + if (r < 0) { + log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + break; + } + + if ((retain & NAMESPACE_FLAGS_ALL) != 0) { + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(setns), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, f, f)); + if (r < 0) { + log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + break; + } + } + } + if (r < 0) + continue; + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install namespace restriction rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_protect_sysctl(void) { + uint32_t arch; + int r; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + if (IN_SET(arch, + SCMP_ARCH_AARCH64, +#ifdef SCMP_ARCH_RISCV64 + SCMP_ARCH_RISCV64, +#endif + SCMP_ARCH_X32 + )) + /* No _sysctl syscall */ + continue; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(_sysctl), + 0); + if (r < 0) { + log_debug_errno(r, "Failed to add _sysctl() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install sysctl protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_protect_syslog(void) { + uint32_t arch; + int r; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(syslog), + 0); + + if (r < 0) { + log_debug_errno(r, "Failed to add syslog() rule for architecture %s, skipping %m", seccomp_arch_to_string(arch)); + continue; + } + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install syslog protection rules for architecture %s, skipping %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_restrict_address_families(Set *address_families, bool allow_list) { + uint32_t arch; + int r; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + bool supported; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + switch (arch) { + + case SCMP_ARCH_X86_64: + case SCMP_ARCH_X32: + case SCMP_ARCH_ARM: + case SCMP_ARCH_AARCH64: + case SCMP_ARCH_MIPSEL64N32: + case SCMP_ARCH_MIPS64N32: + case SCMP_ARCH_MIPSEL64: + case SCMP_ARCH_MIPS64: +#ifdef SCMP_ARCH_RISCV64 + case SCMP_ARCH_RISCV64: +#endif + /* These we know we support (i.e. are the ones that do not use socketcall()) */ + supported = true; + break; + + case SCMP_ARCH_S390: + case SCMP_ARCH_S390X: + case SCMP_ARCH_X86: + case SCMP_ARCH_MIPSEL: + case SCMP_ARCH_MIPS: + case SCMP_ARCH_PPC: + case SCMP_ARCH_PPC64: + case SCMP_ARCH_PPC64LE: + default: + /* These we either know we don't support (i.e. are the ones that do use socketcall()), or we + * don't know */ + supported = false; + break; + } + + if (!supported) + continue; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + if (allow_list) { + int first = 0, last = 0; + void *afp; + + /* If this is an allow list, we first block the address families that are out of + * range and then everything that is not in the set. First, we find the lowest and + * highest address family in the set. */ + + SET_FOREACH(afp, address_families) { + int af = PTR_TO_INT(afp); + + if (af <= 0 || af >= af_max()) + continue; + + if (first == 0 || af < first) + first = af; + + if (last == 0 || af > last) + last = af; + } + + assert((first == 0) == (last == 0)); + + if (first == 0) { + + /* No entries in the valid range, block everything */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 0); + if (r < 0) { + log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + } else { + + /* Block everything below the first entry */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_LT, first)); + if (r < 0) { + log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + /* Block everything above the last entry */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_GT, last)); + if (r < 0) { + log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + /* Block everything between the first and last entry */ + for (int af = 1; af < af_max(); af++) { + + if (set_contains(address_families, INT_TO_PTR(af))) + continue; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_EQ, af)); + if (r < 0) + break; + } + if (r < 0) { + log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + } + + } else { + void *af; + + /* If this is a deny list, then generate one rule for each address family that are + * then combined in OR checks. */ + + SET_FOREACH(af, address_families) { + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af))); + if (r < 0) + break; + } + if (r < 0) { + log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + } + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install socket family rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_restrict_realtime(void) { + static const int permitted_policies[] = { + SCHED_OTHER, + SCHED_BATCH, + SCHED_IDLE, + }; + + int r, max_policy = 0; + uint32_t arch; + unsigned i; + + /* Determine the highest policy constant we want to allow */ + for (i = 0; i < ELEMENTSOF(permitted_policies); i++) + if (permitted_policies[i] > max_policy) + max_policy = permitted_policies[i]; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + int p; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + /* Go through all policies with lower values than that, and block them -- unless they appear in the + * allow list. */ + for (p = 0; p < max_policy; p++) { + bool good = false; + + /* Check if this is in the allow list. */ + for (i = 0; i < ELEMENTSOF(permitted_policies); i++) + if (permitted_policies[i] == p) { + good = true; + break; + } + + if (good) + continue; + + /* Deny this policy */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(sched_setscheduler), + 1, + SCMP_A1(SCMP_CMP_EQ, p)); + if (r < 0) { + log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + } + + /* Deny-list all other policies, i.e. the ones with higher values. Note that all comparisons + * are unsigned here, hence no need no check for < 0 values. */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(sched_setscheduler), + 1, + SCMP_A1(SCMP_CMP_GT, max_policy)); + if (r < 0) { + log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install realtime protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +static int add_seccomp_syscall_filter(scmp_filter_ctx seccomp, + uint32_t arch, + int nr, + unsigned arg_cnt, + const struct scmp_arg_cmp arg) { + int r; + + r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM), nr, arg_cnt, arg); + if (r < 0) { + _cleanup_free_ char *n = NULL; + + n = seccomp_syscall_resolve_num_arch(arch, nr); + log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m", + strna(n), + seccomp_arch_to_string(arch)); + } + + return r; +} + +/* For known architectures, check that syscalls are indeed defined or not. */ +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64) +assert_cc(SCMP_SYS(shmget) > 0); +assert_cc(SCMP_SYS(shmat) > 0); +assert_cc(SCMP_SYS(shmdt) > 0); +#endif + +int seccomp_memory_deny_write_execute(void) { + uint32_t arch; + unsigned loaded = 0; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0, r; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + switch (arch) { + + /* Note that on some architectures shmat() isn't available, and the call is multiplexed through ipc(). + * We ignore that here, which means there's still a way to get writable/executable + * memory, if an IPC key is mapped like this. That's a pity, but no total loss. */ + + case SCMP_ARCH_X86: + case SCMP_ARCH_S390: + filter_syscall = SCMP_SYS(mmap2); + block_syscall = SCMP_SYS(mmap); + /* shmat multiplexed, see above */ + break; + + case SCMP_ARCH_PPC: + case SCMP_ARCH_PPC64: + case SCMP_ARCH_PPC64LE: + case SCMP_ARCH_S390X: + filter_syscall = SCMP_SYS(mmap); + /* shmat multiplexed, see above */ + break; + + case SCMP_ARCH_ARM: + filter_syscall = SCMP_SYS(mmap2); /* arm has only mmap2 */ + shmat_syscall = SCMP_SYS(shmat); + break; + + case SCMP_ARCH_X86_64: + case SCMP_ARCH_X32: + case SCMP_ARCH_AARCH64: +#ifdef SCMP_ARCH_RISCV64 + case SCMP_ARCH_RISCV64: +#endif + filter_syscall = SCMP_SYS(mmap); /* amd64, x32, arm64 and riscv64 have only mmap */ + shmat_syscall = SCMP_SYS(shmat); + break; + + /* Please add more definitions here, if you port systemd to other architectures! */ + +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__powerpc__) && !defined(__powerpc64__) && !defined(__arm__) && !defined(__aarch64__) && !defined(__s390__) && !defined(__s390x__) && !(defined(__riscv) && __riscv_xlen == 64) +#warning "Consider adding the right mmap() syscall definitions here!" +#endif + } + + /* Can't filter mmap() on this arch, then skip it */ + if (filter_syscall == 0) + continue; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = add_seccomp_syscall_filter(seccomp, arch, filter_syscall, + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE)); + if (r < 0) + continue; + + if (block_syscall != 0) { + r = add_seccomp_syscall_filter(seccomp, arch, block_syscall, 0, (const struct scmp_arg_cmp){} ); + if (r < 0) + continue; + } + + r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(mprotect), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC)); + if (r < 0) + continue; + +#ifdef __NR_pkey_mprotect + r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(pkey_mprotect), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC)); + if (r < 0) + continue; +#endif + + if (shmat_syscall > 0) { + r = add_seccomp_syscall_filter(seccomp, arch, shmat_syscall, + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC)); + if (r < 0) + continue; + } + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install MemoryDenyWriteExecute= rule for architecture %s, skipping: %m", + seccomp_arch_to_string(arch)); + loaded++; + } + + if (loaded == 0) + log_debug("Failed to install any seccomp rules for MemoryDenyWriteExecute=."); + + return loaded; +} + +int seccomp_restrict_archs(Set *archs) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + void *id; + int r; + + /* This installs a filter with no rules, but that restricts the system call architectures to the specified + * list. + * + * There are some qualifications. However the most important use is to stop processes from bypassing + * system call restrictions, in case they used a broader (multiplexing) syscall which is only available + * in a non-native architecture. There are no holes in this use case, at least so far. */ + + /* Note libseccomp includes our "native" (current) architecture in the filter by default. + * We do not remove it. For example, our callers expect to be able to call execve() afterwards + * to run a program with the restrictions applied. */ + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + SET_FOREACH(id, archs) { + r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1); + if (r < 0 && r != -EEXIST) + return r; + } + + /* The vdso for x32 assumes that x86-64 syscalls are available. Let's allow them, since x32 + * x32 syscalls should basically match x86-64 for everything except the pointer type. + * The important thing is that you can block the old 32-bit x86 syscalls. + * https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=850047 */ + + if (seccomp_arch_native() == SCMP_ARCH_X32 || + set_contains(archs, UINT32_TO_PTR(SCMP_ARCH_X32 + 1))) { + + r = seccomp_arch_add(seccomp, SCMP_ARCH_X86_64); + if (r < 0 && r != -EEXIST) + return r; + } + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + return r; + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to restrict system call architectures, skipping: %m"); + + return 0; +} + +int parse_syscall_archs(char **l, Set **ret_archs) { + _cleanup_set_free_ Set *archs = NULL; + char **s; + int r; + + assert(l); + assert(ret_archs); + + STRV_FOREACH(s, l) { + uint32_t a; + + r = seccomp_arch_from_string(*s, &a); + if (r < 0) + return -EINVAL; + + r = set_ensure_put(&archs, NULL, UINT32_TO_PTR(a + 1)); + if (r < 0) + return -ENOMEM; + } + + *ret_archs = TAKE_PTR(archs); + return 0; +} + +int seccomp_filter_set_add(Hashmap *filter, bool add, const SyscallFilterSet *set) { + const char *i; + int r; + + assert(set); + + NULSTR_FOREACH(i, set->value) { + + if (i[0] == '@') { + const SyscallFilterSet *more; + + more = syscall_filter_set_find(i); + if (!more) + return -ENXIO; + + r = seccomp_filter_set_add(filter, add, more); + if (r < 0) + return r; + } else { + int id; + + id = seccomp_syscall_resolve_name(i); + if (id == __NR_SCMP_ERROR) { + log_debug("Couldn't resolve system call, ignoring: %s", i); + continue; + } + + if (add) { + r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(-1)); + if (r < 0) + return r; + } else + (void) hashmap_remove(filter, INT_TO_PTR(id + 1)); + } + } + + return 0; +} + +int seccomp_lock_personality(unsigned long personality) { + uint32_t arch; + int r; + + if (personality >= PERSONALITY_INVALID) + return -EINVAL; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(personality), + 1, + SCMP_A0(SCMP_CMP_NE, personality)); + if (r < 0) { + log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to enable personality lock for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_protect_hostname(void) { + uint32_t arch; + int r; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(sethostname), + 0); + if (r < 0) { + log_debug_errno(r, "Failed to add sethostname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(setdomainname), + 0); + if (r < 0) { + log_debug_errno(r, "Failed to add setdomainname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to apply hostname restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +static int seccomp_restrict_sxid(scmp_filter_ctx seccomp, mode_t m) { + /* Checks the mode_t parameter of the following system calls: + * + * → chmod() + fchmod() + fchmodat() + * → open() + creat() + openat() + * → mkdir() + mkdirat() + * → mknod() + mknodat() + * + * Returns error if *everything* failed, and 0 otherwise. + */ + int r; + bool any = false; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(chmod), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + log_debug_errno(r, "Failed to add filter for chmod: %m"); + else + any = true; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(fchmod), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + log_debug_errno(r, "Failed to add filter for fchmod: %m"); + else + any = true; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(fchmodat), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + log_debug_errno(r, "Failed to add filter for fchmodat: %m"); + else + any = true; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mkdir), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + log_debug_errno(r, "Failed to add filter for mkdir: %m"); + else + any = true; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mkdirat), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + log_debug_errno(r, "Failed to add filter for mkdirat: %m"); + else + any = true; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mknod), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + log_debug_errno(r, "Failed to add filter for mknod: %m"); + else + any = true; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mknodat), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + log_debug_errno(r, "Failed to add filter for mknodat: %m"); + else + any = true; + +#if SCMP_SYS(open) > 0 + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(open), + 2, + SCMP_A1(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT), + SCMP_A2(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + log_debug_errno(r, "Failed to add filter for open: %m"); + else + any = true; +#endif + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(openat), + 2, + SCMP_A2(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT), + SCMP_A3(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + log_debug_errno(r, "Failed to add filter for openat: %m"); + else + any = true; + +#if defined(__SNR_openat2) + /* The new openat2() system call can't be filtered sensibly, since it moves the flags parameter into + * an indirect structure. Let's block it entirely for now. That should be a reasonably OK thing to do + * for now, since openat2() is very new and code generally needs fallback logic anyway to be + * compatible with kernels that are not absolutely recent. */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(openat2), + 0); + if (r < 0) + log_debug_errno(r, "Failed to add filter for openat2: %m"); + else + any = true; +#endif + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(creat), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + log_debug_errno(r, "Failed to add filter for creat: %m"); + else + any = true; + + return any ? 0 : r; +} + +int seccomp_restrict_suid_sgid(void) { + uint32_t arch; + int r, k; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_restrict_sxid(seccomp, S_ISUID); + if (r < 0) + log_debug_errno(r, "Failed to add suid rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch)); + + k = seccomp_restrict_sxid(seccomp, S_ISGID); + if (k < 0) + log_debug_errno(r, "Failed to add sgid rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch)); + + if (r < 0 && k < 0) + continue; + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to apply suid/sgid restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +uint32_t scmp_act_kill_process(void) { + + /* Returns SCMP_ACT_KILL_PROCESS if it's supported, and SCMP_ACT_KILL_THREAD otherwise. We never + * actually want to use SCMP_ACT_KILL_THREAD as its semantics are nuts (killing arbitrary threads of + * a program is just a bad idea), but on old kernels/old libseccomp it is all we have, and at least + * for single-threaded apps does the right thing. */ + +#ifdef SCMP_ACT_KILL_PROCESS + if (seccomp_api_get() >= 3) + return SCMP_ACT_KILL_PROCESS; +#endif + + return SCMP_ACT_KILL; /* same as SCMP_ACT_KILL_THREAD */ +} diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h new file mode 100644 index 0000000..6105971 --- /dev/null +++ b/src/shared/seccomp-util.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <seccomp.h> +#include <stdbool.h> +#include <stdint.h> + +#include "errno-list.h" +#include "parse-util.h" +#include "set.h" +#include "string-util.h" + +const char* seccomp_arch_to_string(uint32_t c); +int seccomp_arch_from_string(const char *n, uint32_t *ret); + +int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action); + +bool is_seccomp_available(void); + +typedef struct SyscallFilterSet { + const char *name; + const char *help; + const char *value; +} SyscallFilterSet; + +enum { + /* Please leave DEFAULT first and KNOWN last, but sort the rest alphabetically */ + SYSCALL_FILTER_SET_DEFAULT, + SYSCALL_FILTER_SET_AIO, + SYSCALL_FILTER_SET_BASIC_IO, + SYSCALL_FILTER_SET_CHOWN, + SYSCALL_FILTER_SET_CLOCK, + SYSCALL_FILTER_SET_CPU_EMULATION, + SYSCALL_FILTER_SET_DEBUG, + SYSCALL_FILTER_SET_FILE_SYSTEM, + SYSCALL_FILTER_SET_IO_EVENT, + SYSCALL_FILTER_SET_IPC, + SYSCALL_FILTER_SET_KEYRING, + SYSCALL_FILTER_SET_MEMLOCK, + SYSCALL_FILTER_SET_MODULE, + SYSCALL_FILTER_SET_MOUNT, + SYSCALL_FILTER_SET_NETWORK_IO, + SYSCALL_FILTER_SET_OBSOLETE, + SYSCALL_FILTER_SET_PKEY, + SYSCALL_FILTER_SET_PRIVILEGED, + SYSCALL_FILTER_SET_PROCESS, + SYSCALL_FILTER_SET_RAW_IO, + SYSCALL_FILTER_SET_REBOOT, + SYSCALL_FILTER_SET_RESOURCES, + SYSCALL_FILTER_SET_SETUID, + SYSCALL_FILTER_SET_SIGNAL, + SYSCALL_FILTER_SET_SWAP, + SYSCALL_FILTER_SET_SYNC, + SYSCALL_FILTER_SET_SYSTEM_SERVICE, + SYSCALL_FILTER_SET_TIMER, + SYSCALL_FILTER_SET_KNOWN, + _SYSCALL_FILTER_SET_MAX +}; + +extern const SyscallFilterSet syscall_filter_sets[]; + +const SyscallFilterSet *syscall_filter_set_find(const char *name); + +int seccomp_filter_set_add(Hashmap *s, bool b, const SyscallFilterSet *set); + +int seccomp_add_syscall_filter_item( + scmp_filter_ctx *ctx, + const char *name, + uint32_t action, + char **exclude, + bool log_missing, + char ***added); + +int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing); +int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing); + +typedef enum SeccompParseFlags { + SECCOMP_PARSE_INVERT = 1 << 0, + SECCOMP_PARSE_ALLOW_LIST = 1 << 1, + SECCOMP_PARSE_LOG = 1 << 2, + SECCOMP_PARSE_PERMISSIVE = 1 << 3, +} SeccompParseFlags; + +int seccomp_parse_syscall_filter( + const char *name, + int errno_num, + Hashmap *filter, + SeccompParseFlags flags, + const char *unit, + const char *filename, unsigned line); + +int seccomp_restrict_archs(Set *archs); +int seccomp_restrict_namespaces(unsigned long retain); +int seccomp_protect_sysctl(void); +int seccomp_protect_syslog(void); +int seccomp_restrict_address_families(Set *address_families, bool allow_list); +int seccomp_restrict_realtime(void); +int seccomp_memory_deny_write_execute(void); +int seccomp_lock_personality(unsigned long personality); +int seccomp_protect_hostname(void); +int seccomp_restrict_suid_sgid(void); + +extern const uint32_t seccomp_local_archs[]; + +#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \ + for (unsigned _i = ({ (arch) = seccomp_local_archs[0]; 0; }); \ + seccomp_local_archs[_i] != (uint32_t) -1; \ + (arch) = seccomp_local_archs[++_i]) + +/* EACCES: does not have the CAP_SYS_ADMIN or no_new_privs == 1 + * ENOMEM: out of memory, failed to allocate space for a libseccomp structure, or would exceed a defined constant + * EFAULT: addresses passed as args (by libseccomp) are invalid */ +#define ERRNO_IS_SECCOMP_FATAL(r) \ + IN_SET(abs(r), EPERM, EACCES, ENOMEM, EFAULT) + +DEFINE_TRIVIAL_CLEANUP_FUNC(scmp_filter_ctx, seccomp_release); + +int parse_syscall_archs(char **l, Set **ret_archs); + +uint32_t scmp_act_kill_process(void); + +/* This is a special value to be used where syscall filters otherwise expect errno numbers, will be + replaced with real seccomp action. */ +enum { + SECCOMP_ERROR_NUMBER_KILL = INT_MAX - 1, +}; + +static inline bool seccomp_errno_or_action_is_valid(int n) { + return n == SECCOMP_ERROR_NUMBER_KILL || errno_is_valid(n); +} + +static inline int seccomp_parse_errno_or_action(const char *p) { + if (streq_ptr(p, "kill")) + return SECCOMP_ERROR_NUMBER_KILL; + return parse_errno(p); +} + +static inline const char *seccomp_errno_or_action_to_string(int num) { + if (num == SECCOMP_ERROR_NUMBER_KILL) + return "kill"; + return errno_to_name(num); +} diff --git a/src/shared/securebits-util.c b/src/shared/securebits-util.c new file mode 100644 index 0000000..c867807 --- /dev/null +++ b/src/shared/securebits-util.c @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdio.h> + +#include "alloc-util.h" +#include "extract-word.h" +#include "securebits-util.h" +#include "string-util.h" + +int secure_bits_to_string_alloc(int i, char **s) { + _cleanup_free_ char *str = NULL; + size_t len; + int r; + + assert(s); + + r = asprintf(&str, "%s%s%s%s%s%s", + (i & (1 << SECURE_KEEP_CAPS)) ? "keep-caps " : "", + (i & (1 << SECURE_KEEP_CAPS_LOCKED)) ? "keep-caps-locked " : "", + (i & (1 << SECURE_NO_SETUID_FIXUP)) ? "no-setuid-fixup " : "", + (i & (1 << SECURE_NO_SETUID_FIXUP_LOCKED)) ? "no-setuid-fixup-locked " : "", + (i & (1 << SECURE_NOROOT)) ? "noroot " : "", + (i & (1 << SECURE_NOROOT_LOCKED)) ? "noroot-locked " : ""); + if (r < 0) + return -ENOMEM; + + len = strlen(str); + if (len != 0) + str[len - 1] = '\0'; + + *s = TAKE_PTR(str); + + return 0; +} + +int secure_bits_from_string(const char *s) { + int secure_bits = 0; + const char *p; + int r; + + for (p = s;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE); + if (r == -ENOMEM) + return r; + if (r <= 0) + break; + + if (streq(word, "keep-caps")) + secure_bits |= 1 << SECURE_KEEP_CAPS; + else if (streq(word, "keep-caps-locked")) + secure_bits |= 1 << SECURE_KEEP_CAPS_LOCKED; + else if (streq(word, "no-setuid-fixup")) + secure_bits |= 1 << SECURE_NO_SETUID_FIXUP; + else if (streq(word, "no-setuid-fixup-locked")) + secure_bits |= 1 << SECURE_NO_SETUID_FIXUP_LOCKED; + else if (streq(word, "noroot")) + secure_bits |= 1 << SECURE_NOROOT; + else if (streq(word, "noroot-locked")) + secure_bits |= 1 << SECURE_NOROOT_LOCKED; + } + + return secure_bits; +} diff --git a/src/shared/securebits-util.h b/src/shared/securebits-util.h new file mode 100644 index 0000000..f2e65cf --- /dev/null +++ b/src/shared/securebits-util.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "missing_securebits.h" + +int secure_bits_to_string_alloc(int i, char **s); +int secure_bits_from_string(const char *s); + +static inline bool secure_bits_is_valid(int i) { + return ((SECURE_ALL_BITS | SECURE_ALL_LOCKS) & i) == i; +} + +static inline int secure_bits_to_string_alloc_with_check(int n, char **s) { + if (!secure_bits_is_valid(n)) + return -EINVAL; + + return secure_bits_to_string_alloc(n, s); +} diff --git a/src/shared/serialize.c b/src/shared/serialize.c new file mode 100644 index 0000000..45f57d6 --- /dev/null +++ b/src/shared/serialize.c @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> + +#include "alloc-util.h" +#include "env-util.h" +#include "escape.h" +#include "fileio.h" +#include "missing_mman.h" +#include "missing_syscall.h" +#include "parse-util.h" +#include "process-util.h" +#include "serialize.h" +#include "strv.h" +#include "tmpfile-util.h" + +int serialize_item(FILE *f, const char *key, const char *value) { + assert(f); + assert(key); + + if (!value) + return 0; + + /* Make sure that anything we serialize we can also read back again with read_line() with a maximum line size + * of LONG_LINE_MAX. This is a safety net only. All code calling us should filter this out earlier anyway. */ + if (strlen(key) + 1 + strlen(value) + 1 > LONG_LINE_MAX) { + log_warning("Attempted to serialize overly long item '%s', refusing.", key); + return -EINVAL; + } + + fputs(key, f); + fputc('=', f); + fputs(value, f); + fputc('\n', f); + + return 1; +} + +int serialize_item_escaped(FILE *f, const char *key, const char *value) { + _cleanup_free_ char *c = NULL; + + assert(f); + assert(key); + + if (!value) + return 0; + + c = cescape(value); + if (!c) + return log_oom(); + + return serialize_item(f, key, c); +} + +int serialize_item_format(FILE *f, const char *key, const char *format, ...) { + char buf[LONG_LINE_MAX]; + va_list ap; + int k; + + assert(f); + assert(key); + assert(format); + + va_start(ap, format); + k = vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + + if (k < 0 || (size_t) k >= sizeof(buf) || strlen(key) + 1 + k + 1 > LONG_LINE_MAX) { + log_warning("Attempted to serialize overly long item '%s', refusing.", key); + return -EINVAL; + } + + fputs(key, f); + fputc('=', f); + fputs(buf, f); + fputc('\n', f); + + return 1; +} + +int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd) { + int copy; + + assert(f); + assert(key); + + if (fd < 0) + return 0; + + copy = fdset_put_dup(fds, fd); + if (copy < 0) + return log_error_errno(copy, "Failed to add file descriptor to serialization set: %m"); + + return serialize_item_format(f, key, "%i", copy); +} + +int serialize_usec(FILE *f, const char *key, usec_t usec) { + assert(f); + assert(key); + + if (usec == USEC_INFINITY) + return 0; + + return serialize_item_format(f, key, USEC_FMT, usec); +} + +int serialize_dual_timestamp(FILE *f, const char *name, const dual_timestamp *t) { + assert(f); + assert(name); + assert(t); + + if (!dual_timestamp_is_set(t)) + return 0; + + return serialize_item_format(f, name, USEC_FMT " " USEC_FMT, t->realtime, t->monotonic); +} + +int serialize_strv(FILE *f, const char *key, char **l) { + int ret = 0, r; + char **i; + + /* Returns the first error, or positive if anything was serialized, 0 otherwise. */ + + STRV_FOREACH(i, l) { + r = serialize_item_escaped(f, key, *i); + if ((ret >= 0 && r < 0) || + (ret == 0 && r > 0)) + ret = r; + } + + return ret; +} + +int deserialize_usec(const char *value, usec_t *ret) { + int r; + + assert(value); + + r = safe_atou64(value, ret); + if (r < 0) + return log_debug_errno(r, "Failed to parse usec value \"%s\": %m", value); + + return 0; +} + +int deserialize_dual_timestamp(const char *value, dual_timestamp *t) { + uint64_t a, b; + int r, pos; + + assert(value); + assert(t); + + pos = strspn(value, WHITESPACE); + if (value[pos] == '-') + return -EINVAL; + pos += strspn(value + pos, DIGITS); + pos += strspn(value + pos, WHITESPACE); + if (value[pos] == '-') + return -EINVAL; + + r = sscanf(value, "%" PRIu64 "%" PRIu64 "%n", &a, &b, &pos); + if (r != 2) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse dual timestamp value \"%s\".", + value); + + if (value[pos] != '\0') + /* trailing garbage */ + return -EINVAL; + + t->realtime = a; + t->monotonic = b; + + return 0; +} + +int deserialize_environment(const char *value, char ***list) { + _cleanup_free_ char *unescaped = NULL; + int r; + + assert(value); + assert(list); + + /* Changes the *environment strv inline. */ + + r = cunescape(value, 0, &unescaped); + if (r < 0) + return log_error_errno(r, "Failed to unescape: %m"); + + r = strv_env_replace(list, unescaped); + if (r < 0) + return log_error_errno(r, "Failed to append environment variable: %m"); + + unescaped = NULL; /* now part of 'list' */ + return 0; +} + +int open_serialization_fd(const char *ident) { + int fd; + + fd = memfd_create(ident, MFD_CLOEXEC); + if (fd < 0) { + const char *path; + + path = getpid_cached() == 1 ? "/run/systemd" : "/tmp"; + fd = open_tmpfile_unlinkable(path, O_RDWR|O_CLOEXEC); + if (fd < 0) + return fd; + + log_debug("Serializing %s to %s.", ident, path); + } else + log_debug("Serializing %s to memfd.", ident); + + return fd; +} diff --git a/src/shared/serialize.h b/src/shared/serialize.h new file mode 100644 index 0000000..6d4f1ef --- /dev/null +++ b/src/shared/serialize.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdio.h> + +#include "fdset.h" +#include "macro.h" +#include "string-util.h" +#include "time-util.h" + +int serialize_item(FILE *f, const char *key, const char *value); +int serialize_item_escaped(FILE *f, const char *key, const char *value); +int serialize_item_format(FILE *f, const char *key, const char *value, ...) _printf_(3,4); +int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd); +int serialize_usec(FILE *f, const char *key, usec_t usec); +int serialize_dual_timestamp(FILE *f, const char *key, const dual_timestamp *t); +int serialize_strv(FILE *f, const char *key, char **l); + +static inline int serialize_bool(FILE *f, const char *key, bool b) { + return serialize_item(f, key, yes_no(b)); +} + +int deserialize_usec(const char *value, usec_t *timestamp); +int deserialize_dual_timestamp(const char *value, dual_timestamp *t); +int deserialize_environment(const char *value, char ***environment); + +int open_serialization_fd(const char *ident); diff --git a/src/shared/service-util.c b/src/shared/service-util.c new file mode 100644 index 0000000..092be6e --- /dev/null +++ b/src/shared/service-util.c @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <getopt.h> +#include <stdio.h> + +#include "alloc-util.h" +#include "pretty-print.h" +#include "service-util.h" +#include "terminal-util.h" +#include "util.h" + +static int help(const char *program_path, const char *service, const char *description, bool bus_introspect) { + _cleanup_free_ char *link = NULL; + int r; + + r = terminal_urlify_man(service, "8", &link); + if (r < 0) + return log_oom(); + + printf("%s [OPTIONS...]\n\n" + "%s%s%s\n\n" + "This program takes no positional arguments.\n\n" + "%sOptions%s:\n" + " -h --help Show this help\n" + " --version Show package version\n" + " --bus-introspect=PATH Write D-Bus XML introspection data\n" + "\nSee the %s for details.\n" + , program_path + , ansi_highlight(), description, ansi_normal() + , ansi_underline(), ansi_normal() + , link + ); + + return 0; /* No further action */ +} + +int service_parse_argv( + const char *service, + const char *description, + const BusObjectImplementation* const* bus_objects, + int argc, char *argv[]) { + + enum { + ARG_VERSION = 0x100, + ARG_BUS_INTROSPECT, + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, + { "bus-introspect", required_argument, NULL, ARG_BUS_INTROSPECT }, + {} + }; + + int c; + + assert(argc >= 0); + assert(argv); + + while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0) + switch(c) { + + case 'h': + return help(argv[0], service, description, bus_objects); + + case ARG_VERSION: + return version(); + + case ARG_BUS_INTROSPECT: + return bus_introspect_implementations( + stdout, + optarg, + bus_objects); + + case '?': + return -EINVAL; + + default: + assert_not_reached("Unknown option code."); + } + + if (optind < argc) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "This program takes no arguments."); + + return 1; /* Further action */ +} diff --git a/src/shared/service-util.h b/src/shared/service-util.h new file mode 100644 index 0000000..360341f --- /dev/null +++ b/src/shared/service-util.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "bus-object.h" + +int service_parse_argv( + const char *service, + const char *description, + const BusObjectImplementation* const* bus_objects, + int argc, char *argv[]); diff --git a/src/shared/sleep-config.c b/src/shared/sleep-config.c new file mode 100644 index 0000000..cea5148 --- /dev/null +++ b/src/shared/sleep-config.c @@ -0,0 +1,703 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/*** + Copyright © 2018 Dell Inc. +***/ + +#include <errno.h> +#include <fcntl.h> +#include <linux/fs.h> +#include <stdbool.h> +#include <stddef.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <syslog.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "blockdev-util.h" +#include "btrfs-util.h" +#include "conf-parser.h" +#include "def.h" +#include "env-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "log.h" +#include "macro.h" +#include "parse-util.h" +#include "path-util.h" +#include "sleep-config.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" + +int parse_sleep_config(SleepConfig **ret_sleep_config) { + _cleanup_(free_sleep_configp) SleepConfig *sc; + int allow_suspend = -1, allow_hibernate = -1, + allow_s2h = -1, allow_hybrid_sleep = -1; + + sc = new0(SleepConfig, 1); + if (!sc) + return log_oom(); + + const ConfigTableItem items[] = { + { "Sleep", "AllowSuspend", config_parse_tristate, 0, &allow_suspend }, + { "Sleep", "AllowHibernation", config_parse_tristate, 0, &allow_hibernate }, + { "Sleep", "AllowSuspendThenHibernate", config_parse_tristate, 0, &allow_s2h }, + { "Sleep", "AllowHybridSleep", config_parse_tristate, 0, &allow_hybrid_sleep }, + + { "Sleep", "SuspendMode", config_parse_strv, 0, &sc->suspend_modes }, + { "Sleep", "SuspendState", config_parse_strv, 0, &sc->suspend_states }, + { "Sleep", "HibernateMode", config_parse_strv, 0, &sc->hibernate_modes }, + { "Sleep", "HibernateState", config_parse_strv, 0, &sc->hibernate_states }, + { "Sleep", "HybridSleepMode", config_parse_strv, 0, &sc->hybrid_modes }, + { "Sleep", "HybridSleepState", config_parse_strv, 0, &sc->hybrid_states }, + + { "Sleep", "HibernateDelaySec", config_parse_sec, 0, &sc->hibernate_delay_sec}, + {} + }; + + (void) config_parse_many_nulstr( + PKGSYSCONFDIR "/sleep.conf", + CONF_PATHS_NULSTR("systemd/sleep.conf.d"), + "Sleep\0", + config_item_table_lookup, items, + CONFIG_PARSE_WARN, + NULL, + NULL); + + /* use default values unless set */ + sc->allow_suspend = allow_suspend != 0; + sc->allow_hibernate = allow_hibernate != 0; + sc->allow_hybrid_sleep = allow_hybrid_sleep >= 0 ? allow_hybrid_sleep + : (allow_suspend != 0 && allow_hibernate != 0); + sc->allow_s2h = allow_s2h >= 0 ? allow_s2h + : (allow_suspend != 0 && allow_hibernate != 0); + + if (!sc->suspend_states) + sc->suspend_states = strv_new("mem", "standby", "freeze"); + if (!sc->hibernate_modes) + sc->hibernate_modes = strv_new("platform", "shutdown"); + if (!sc->hibernate_states) + sc->hibernate_states = strv_new("disk"); + if (!sc->hybrid_modes) + sc->hybrid_modes = strv_new("suspend", "platform", "shutdown"); + if (!sc->hybrid_states) + sc->hybrid_states = strv_new("disk"); + if (sc->hibernate_delay_sec == 0) + sc->hibernate_delay_sec = 2 * USEC_PER_HOUR; + + /* ensure values set for all required fields */ + if (!sc->suspend_states || !sc->hibernate_modes + || !sc->hibernate_states || !sc->hybrid_modes || !sc->hybrid_states) + return log_oom(); + + *ret_sleep_config = TAKE_PTR(sc); + + return 0; +} + +int can_sleep_state(char **types) { + _cleanup_free_ char *text = NULL; + int r; + + if (strv_isempty(types)) + return true; + + /* If /sys is read-only we cannot sleep */ + if (access("/sys/power/state", W_OK) < 0) { + log_debug_errno(errno, "/sys/power/state is not writable, cannot sleep: %m"); + return false; + } + + r = read_one_line_file("/sys/power/state", &text); + if (r < 0) { + log_debug_errno(r, "Failed to read /sys/power/state, cannot sleep: %m"); + return false; + } + + const char *found; + r = string_contains_word_strv(text, NULL, types, &found); + if (r < 0) + return log_debug_errno(r, "Failed to parse /sys/power/state: %m"); + if (r > 0) + log_debug("Sleep mode \"%s\" is supported by the kernel.", found); + else if (DEBUG_LOGGING) { + _cleanup_free_ char *t = strv_join(types, "/"); + log_debug("Sleep mode %s not supported by the kernel, sorry.", strnull(t)); + } + return r; +} + +int can_sleep_disk(char **types) { + _cleanup_free_ char *text = NULL; + int r; + + if (strv_isempty(types)) + return true; + + /* If /sys is read-only we cannot sleep */ + if (access("/sys/power/disk", W_OK) < 0) { + log_debug_errno(errno, "/sys/power/disk is not writable: %m"); + return false; + } + + r = read_one_line_file("/sys/power/disk", &text); + if (r < 0) { + log_debug_errno(r, "Couldn't read /sys/power/disk: %m"); + return false; + } + + for (const char *p = text;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, 0); + if (r < 0) + return log_debug_errno(r, "Failed to parse /sys/power/disk: %m"); + if (r == 0) + break; + + char *s = word; + size_t l = strlen(s); + if (s[0] == '[' && s[l-1] == ']') { + s[l-1] = '\0'; + s++; + } + + if (strv_contains(types, s)) { + log_debug("Disk sleep mode \"%s\" is supported by the kernel.", s); + return true; + } + } + + if (DEBUG_LOGGING) { + _cleanup_free_ char *t = strv_join(types, "/"); + log_debug("Disk sleep mode %s not supported by the kernel, sorry.", strnull(t)); + } + return false; +} + +#define HIBERNATION_SWAP_THRESHOLD 0.98 + +SwapEntry* swap_entry_free(SwapEntry *se) { + if (!se) + return NULL; + + free(se->device); + free(se->type); + + return mfree(se); +} + +HibernateLocation* hibernate_location_free(HibernateLocation *hl) { + if (!hl) + return NULL; + + swap_entry_free(hl->swap); + + return mfree(hl); +} + +static int swap_device_to_device_id(const SwapEntry *swap, dev_t *ret_dev) { + struct stat sb; + int r; + + assert(swap); + assert(swap->device); + assert(swap->type); + + r = stat(swap->device, &sb); + if (r < 0) + return -errno; + + if (streq(swap->type, "partition")) { + if (!S_ISBLK(sb.st_mode)) + return -ENOTBLK; + + *ret_dev = sb.st_rdev; + return 0; + } + + return get_block_device(swap->device, ret_dev); +} + +/* + * Attempt to calculate the swap file offset on supported filesystems. On unsupported + * filesystems, a debug message is logged and ret_offset is set to UINT64_MAX. + */ +static int calculate_swap_file_offset(const SwapEntry *swap, uint64_t *ret_offset) { + _cleanup_close_ int fd = -1; + _cleanup_free_ struct fiemap *fiemap = NULL; + struct stat sb; + int r, btrfs; + + assert(swap); + assert(swap->device); + assert(streq(swap->type, "file")); + + fd = open(swap->device, O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + return log_debug_errno(errno, "Failed to open swap file %s to determine on-disk offset: %m", swap->device); + + if (fstat(fd, &sb) < 0) + return log_debug_errno(errno, "Failed to stat %s: %m", swap->device); + + btrfs = btrfs_is_filesystem(fd); + if (btrfs < 0) + return log_debug_errno(btrfs, "Error checking %s for Btrfs filesystem: %m", swap->device); + if (btrfs > 0) { + log_debug("%s: detection of swap file offset on Btrfs is not supported", swap->device); + *ret_offset = UINT64_MAX; + return 0; + } + + r = read_fiemap(fd, &fiemap); + if (r < 0) + return log_debug_errno(r, "Unable to read extent map for '%s': %m", swap->device); + + *ret_offset = fiemap->fm_extents[0].fe_physical / page_size(); + return 0; +} + +static int read_resume_files(dev_t *ret_resume, uint64_t *ret_resume_offset) { + _cleanup_free_ char *resume_str = NULL, *resume_offset_str = NULL; + uint64_t resume_offset = 0; + dev_t resume; + int r; + + r = read_one_line_file("/sys/power/resume", &resume_str); + if (r < 0) + return log_debug_errno(r, "Error reading /sys/power/resume: %m"); + + r = parse_dev(resume_str, &resume); + if (r < 0) + return log_debug_errno(r, "Error parsing /sys/power/resume device: %s: %m", resume_str); + + r = read_one_line_file("/sys/power/resume_offset", &resume_offset_str); + if (r == -ENOENT) + log_debug_errno(r, "Kernel does not support resume_offset; swap file offset detection will be skipped."); + else if (r < 0) + return log_debug_errno(r, "Error reading /sys/power/resume_offset: %m"); + else { + r = safe_atou64(resume_offset_str, &resume_offset); + if (r < 0) + return log_debug_errno(r, "Failed to parse value in /sys/power/resume_offset \"%s\": %m", resume_offset_str); + } + + if (resume_offset > 0 && resume == 0) + log_debug("Warning: found /sys/power/resume_offset==%" PRIu64 ", but /sys/power/resume unset. Misconfiguration?", + resume_offset); + + *ret_resume = resume; + *ret_resume_offset = resume_offset; + + return 0; +} + +/* + * Determine if the HibernateLocation matches the resume= (device) and resume_offset= (file). + */ +static bool location_is_resume_device(const HibernateLocation *location, dev_t sys_resume, uint64_t sys_offset) { + if (!location) + return false; + + return sys_resume > 0 && + sys_resume == location->devno && + (sys_offset == location->offset || (sys_offset > 0 && location->offset == UINT64_MAX)); +} + +/* + * Attempt to find the hibernation location by parsing /proc/swaps, /sys/power/resume, and + * /sys/power/resume_offset. + * + * Returns: + * 1 - Values are set in /sys/power/resume and /sys/power/resume_offset. + * ret_hibernate_location will represent matching /proc/swap entry if identified or NULL if not. + * + * 0 - No values are set in /sys/power/resume and /sys/power/resume_offset. + ret_hibernate_location will represent the highest priority swap with most remaining space discovered in /proc/swaps. + * + * Negative value in the case of error. + */ +int find_hibernate_location(HibernateLocation **ret_hibernate_location) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_(hibernate_location_freep) HibernateLocation *hibernate_location = NULL; + dev_t sys_resume; + uint64_t sys_offset = 0; + bool resume_match = false; + int r; + + /* read the /sys/power/resume & /sys/power/resume_offset values */ + r = read_resume_files(&sys_resume, &sys_offset); + if (r < 0) + return r; + + f = fopen("/proc/swaps", "re"); + if (!f) { + log_debug_errno(errno, "Failed to open /proc/swaps: %m"); + return errno == ENOENT ? -EOPNOTSUPP : -errno; /* Convert swap not supported to a recognizable error */ + } + + (void) fscanf(f, "%*s %*s %*s %*s %*s\n"); + for (unsigned i = 1;; i++) { + _cleanup_(swap_entry_freep) SwapEntry *swap = NULL; + uint64_t swap_offset = 0; + int k; + + swap = new0(SwapEntry, 1); + if (!swap) + return -ENOMEM; + + k = fscanf(f, + "%ms " /* device/file */ + "%ms " /* type of swap */ + "%" PRIu64 /* swap size */ + "%" PRIu64 /* used */ + "%i\n", /* priority */ + &swap->device, &swap->type, &swap->size, &swap->used, &swap->priority); + if (k == EOF) + break; + if (k != 5) { + log_debug("Failed to parse /proc/swaps:%u, ignoring", i); + continue; + } + + if (streq(swap->type, "file")) { + if (endswith(swap->device, "\\040(deleted)")) { + log_debug("Ignoring deleted swap file '%s'.", swap->device); + continue; + } + + r = calculate_swap_file_offset(swap, &swap_offset); + if (r < 0) + return r; + + } else if (streq(swap->type, "partition")) { + const char *fn; + + fn = path_startswith(swap->device, "/dev/"); + if (fn && startswith(fn, "zram")) { + log_debug("%s: ignoring zram swap", swap->device); + continue; + } + + } else { + log_debug("%s: swap type %s is unsupported for hibernation, ignoring", swap->device, swap->type); + continue; + } + + /* prefer resume device or highest priority swap with most remaining space */ + if (hibernate_location && swap->priority < hibernate_location->swap->priority) { + log_debug("%s: ignoring device with lower priority", swap->device); + continue; + } + if (hibernate_location && + (swap->priority == hibernate_location->swap->priority + && swap->size - swap->used < hibernate_location->swap->size - hibernate_location->swap->used)) { + log_debug("%s: ignoring device with lower usable space", swap->device); + continue; + } + + dev_t swap_device; + r = swap_device_to_device_id(swap, &swap_device); + if (r < 0) + return log_debug_errno(r, "%s: failed to query device number: %m", swap->device); + if (swap_device == 0) + return log_debug_errno(SYNTHETIC_ERRNO(ENODEV), "%s: not backed by block device.", swap->device); + + hibernate_location = hibernate_location_free(hibernate_location); + hibernate_location = new(HibernateLocation, 1); + if (!hibernate_location) + return -ENOMEM; + + *hibernate_location = (HibernateLocation) { + .devno = swap_device, + .offset = swap_offset, + .swap = TAKE_PTR(swap), + }; + + /* if the swap is the resume device, stop the loop */ + if (location_is_resume_device(hibernate_location, sys_resume, sys_offset)) { + log_debug("%s: device matches configured resume settings.", hibernate_location->swap->device); + resume_match = true; + break; + } + + log_debug("%s: is a candidate device.", hibernate_location->swap->device); + } + + /* We found nothing at all */ + if (!hibernate_location) + return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS), + "No possible swap partitions or files suitable for hibernation were found in /proc/swaps."); + + /* resume= is set but a matching /proc/swaps entry was not found */ + if (sys_resume != 0 && !resume_match) + return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS), + "No swap partitions or files matching resume config were found in /proc/swaps."); + + if (hibernate_location->offset == UINT64_MAX) { + if (sys_offset == 0) + return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS), "Offset detection failed and /sys/power/resume_offset is not set."); + + hibernate_location->offset = sys_offset; + } + + if (resume_match) + log_debug("Hibernation will attempt to use swap entry with path: %s, device: %u:%u, offset: %" PRIu64 ", priority: %i", + hibernate_location->swap->device, major(hibernate_location->devno), minor(hibernate_location->devno), + hibernate_location->offset, hibernate_location->swap->priority); + else + log_debug("/sys/power/resume is not configured; attempting to hibernate with path: %s, device: %u:%u, offset: %" PRIu64 ", priority: %i", + hibernate_location->swap->device, major(hibernate_location->devno), minor(hibernate_location->devno), + hibernate_location->offset, hibernate_location->swap->priority); + + *ret_hibernate_location = TAKE_PTR(hibernate_location); + + if (resume_match) + return 1; + + return 0; +} + +static bool enough_swap_for_hibernation(void) { + _cleanup_free_ char *active = NULL; + _cleanup_(hibernate_location_freep) HibernateLocation *hibernate_location = NULL; + unsigned long long act = 0; + int r; + + if (getenv_bool("SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK") > 0) + return true; + + r = find_hibernate_location(&hibernate_location); + if (r < 0) + return false; + + /* If /sys/power/{resume,resume_offset} is configured but a matching entry + * could not be identified in /proc/swaps, user is likely using Btrfs with a swapfile; + * return true and let the system attempt hibernation. + */ + if (r > 0 && !hibernate_location) { + log_debug("Unable to determine remaining swap space; hibernation may fail"); + return true; + } + + if (!hibernate_location) + return false; + + r = get_proc_field("/proc/meminfo", "Active(anon)", WHITESPACE, &active); + if (r < 0) { + log_debug_errno(r, "Failed to retrieve Active(anon) from /proc/meminfo: %m"); + return false; + } + + r = safe_atollu(active, &act); + if (r < 0) { + log_debug_errno(r, "Failed to parse Active(anon) from /proc/meminfo: %s: %m", active); + return false; + } + + r = act <= (hibernate_location->swap->size - hibernate_location->swap->used) * HIBERNATION_SWAP_THRESHOLD; + log_debug("%s swap for hibernation, Active(anon)=%llu kB, size=%" PRIu64 " kB, used=%" PRIu64 " kB, threshold=%.2g%%", + r ? "Enough" : "Not enough", act, hibernate_location->swap->size, hibernate_location->swap->used, 100*HIBERNATION_SWAP_THRESHOLD); + + return r; +} + +int read_fiemap(int fd, struct fiemap **ret) { + _cleanup_free_ struct fiemap *fiemap = NULL, *result_fiemap = NULL; + struct stat statinfo; + uint32_t result_extents = 0; + uint64_t fiemap_start = 0, fiemap_length; + const size_t n_extra = DIV_ROUND_UP(sizeof(struct fiemap), sizeof(struct fiemap_extent)); + size_t fiemap_allocated = n_extra, result_fiemap_allocated = n_extra; + + if (fstat(fd, &statinfo) < 0) + return log_debug_errno(errno, "Cannot determine file size: %m"); + if (!S_ISREG(statinfo.st_mode)) + return -ENOTTY; + fiemap_length = statinfo.st_size; + + /* Zero this out in case we run on a file with no extents */ + fiemap = calloc(n_extra, sizeof(struct fiemap_extent)); + if (!fiemap) + return -ENOMEM; + + result_fiemap = malloc_multiply(n_extra, sizeof(struct fiemap_extent)); + if (!result_fiemap) + return -ENOMEM; + + /* XFS filesystem has incorrect implementation of fiemap ioctl and + * returns extents for only one block-group at a time, so we need + * to handle it manually, starting the next fiemap call from the end + * of the last extent + */ + while (fiemap_start < fiemap_length) { + *fiemap = (struct fiemap) { + .fm_start = fiemap_start, + .fm_length = fiemap_length, + .fm_flags = FIEMAP_FLAG_SYNC, + }; + + /* Find out how many extents there are */ + if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) + return log_debug_errno(errno, "Failed to read extents: %m"); + + /* Nothing to process */ + if (fiemap->fm_mapped_extents == 0) + break; + + /* Resize fiemap to allow us to read in the extents, result fiemap has to hold all + * the extents for the whole file. Add space for the initial struct fiemap. */ + if (!greedy_realloc0((void**) &fiemap, &fiemap_allocated, + n_extra + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent))) + return -ENOMEM; + + fiemap->fm_extent_count = fiemap->fm_mapped_extents; + fiemap->fm_mapped_extents = 0; + + if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) + return log_debug_errno(errno, "Failed to read extents: %m"); + + /* Resize result_fiemap to allow us to copy in the extents */ + if (!greedy_realloc((void**) &result_fiemap, &result_fiemap_allocated, + n_extra + result_extents + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent))) + return -ENOMEM; + + memcpy(result_fiemap->fm_extents + result_extents, + fiemap->fm_extents, + sizeof(struct fiemap_extent) * fiemap->fm_mapped_extents); + + result_extents += fiemap->fm_mapped_extents; + + /* Highly unlikely that it is zero */ + if (_likely_(fiemap->fm_mapped_extents > 0)) { + uint32_t i = fiemap->fm_mapped_extents - 1; + + fiemap_start = fiemap->fm_extents[i].fe_logical + + fiemap->fm_extents[i].fe_length; + + if (fiemap->fm_extents[i].fe_flags & FIEMAP_EXTENT_LAST) + break; + } + } + + memcpy(result_fiemap, fiemap, sizeof(struct fiemap)); + result_fiemap->fm_mapped_extents = result_extents; + *ret = TAKE_PTR(result_fiemap); + return 0; +} + +static int can_sleep_internal(const char *verb, bool check_allowed, const SleepConfig *sleep_config); + +static bool can_s2h(const SleepConfig *sleep_config) { + const char *p; + int r; + + if (!clock_supported(CLOCK_BOOTTIME_ALARM)) { + log_debug("CLOCK_BOOTTIME_ALARM is not supported."); + return false; + } + + FOREACH_STRING(p, "suspend", "hibernate") { + r = can_sleep_internal(p, false, sleep_config); + if (IN_SET(r, 0, -ENOSPC, -EADV)) { + log_debug("Unable to %s system.", p); + return false; + } + if (r < 0) + return log_debug_errno(r, "Failed to check if %s is possible: %m", p); + } + + return true; +} + +static int can_sleep_internal(const char *verb, bool check_allowed, const SleepConfig *sleep_config) { + bool allow; + char **modes = NULL, **states = NULL; + int r; + + assert(STR_IN_SET(verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate")); + + r = sleep_settings(verb, sleep_config, &allow, &modes, &states); + if (r < 0) + return false; + + if (check_allowed && !allow) { + log_debug("Sleep mode \"%s\" is disabled by configuration.", verb); + return false; + } + + if (streq(verb, "suspend-then-hibernate")) + return can_s2h(sleep_config); + + if (!can_sleep_state(states) || !can_sleep_disk(modes)) + return false; + + if (streq(verb, "suspend")) + return true; + + if (!enough_swap_for_hibernation()) + return -ENOSPC; + + return true; +} + +int can_sleep(const char *verb) { + _cleanup_(free_sleep_configp) SleepConfig *sleep_config = NULL; + int r; + + r = parse_sleep_config(&sleep_config); + if (r < 0) + return r; + + return can_sleep_internal(verb, true, sleep_config); +} + +int sleep_settings(const char *verb, const SleepConfig *sleep_config, bool *ret_allow, char ***ret_modes, char ***ret_states) { + + assert(verb); + assert(sleep_config); + assert(STR_IN_SET(verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate")); + + if (streq(verb, "suspend")) { + *ret_allow = sleep_config->allow_suspend; + *ret_modes = sleep_config->suspend_modes; + *ret_states = sleep_config->suspend_states; + } else if (streq(verb, "hibernate")) { + *ret_allow = sleep_config->allow_hibernate; + *ret_modes = sleep_config->hibernate_modes; + *ret_states = sleep_config->hibernate_states; + } else if (streq(verb, "hybrid-sleep")) { + *ret_allow = sleep_config->allow_hybrid_sleep; + *ret_modes = sleep_config->hybrid_modes; + *ret_states = sleep_config->hybrid_states; + } else if (streq(verb, "suspend-then-hibernate")) { + *ret_allow = sleep_config->allow_s2h; + *ret_modes = *ret_states = NULL; + } + + /* suspend modes empty by default */ + if ((!ret_modes && !streq(verb, "suspend")) || !ret_states) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No modes or states set for %s; Check sleep.conf", verb); + + return 0; +} + +SleepConfig* free_sleep_config(SleepConfig *sc) { + if (!sc) + return NULL; + + strv_free(sc->suspend_modes); + strv_free(sc->suspend_states); + + strv_free(sc->hibernate_modes); + strv_free(sc->hibernate_states); + + strv_free(sc->hybrid_modes); + strv_free(sc->hybrid_states); + + return mfree(sc); +} diff --git a/src/shared/sleep-config.h b/src/shared/sleep-config.h new file mode 100644 index 0000000..4b30e6d --- /dev/null +++ b/src/shared/sleep-config.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <linux/fiemap.h> +#include "time-util.h" + +typedef struct SleepConfig { + bool allow_suspend; /* AllowSuspend */ + bool allow_hibernate; /* AllowHibernation */ + bool allow_s2h; /* AllowSuspendThenHibernate */ + bool allow_hybrid_sleep; /* AllowHybridSleep */ + + char **suspend_modes; /* SuspendMode */ + char **suspend_states; /* SuspendState */ + char **hibernate_modes; /* HibernateMode */ + char **hibernate_states; /* HibernateState */ + char **hybrid_modes; /* HybridSleepMode */ + char **hybrid_states; /* HybridSleepState */ + + usec_t hibernate_delay_sec; /* HibernateDelaySec */ +} SleepConfig; + +SleepConfig* free_sleep_config(SleepConfig *sc); +DEFINE_TRIVIAL_CLEANUP_FUNC(SleepConfig*, free_sleep_config); + +/* entry in /proc/swaps */ +typedef struct SwapEntry { + char *device; + char *type; + uint64_t size; + uint64_t used; + int priority; +} SwapEntry; + +SwapEntry* swap_entry_free(SwapEntry *se); +DEFINE_TRIVIAL_CLEANUP_FUNC(SwapEntry*, swap_entry_free); + +/* + * represents values for /sys/power/resume & /sys/power/resume_offset + * and the matching /proc/swap entry. + */ +typedef struct HibernateLocation { + dev_t devno; + uint64_t offset; + SwapEntry *swap; +} HibernateLocation; + +HibernateLocation* hibernate_location_free(HibernateLocation *hl); +DEFINE_TRIVIAL_CLEANUP_FUNC(HibernateLocation*, hibernate_location_free); + +int sleep_settings(const char *verb, const SleepConfig *sleep_config, bool *ret_allow, char ***ret_modes, char ***ret_states); + +int read_fiemap(int fd, struct fiemap **ret); +int parse_sleep_config(SleepConfig **sleep_config); +int find_hibernate_location(HibernateLocation **ret_hibernate_location); + +int can_sleep(const char *verb); +int can_sleep_disk(char **types); +int can_sleep_state(char **types); diff --git a/src/shared/socket-netlink.c b/src/shared/socket-netlink.c new file mode 100644 index 0000000..4a7007d --- /dev/null +++ b/src/shared/socket-netlink.c @@ -0,0 +1,493 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <arpa/inet.h> +#include <errno.h> +#include <net/if.h> +#include <string.h> + +#include "alloc-util.h" +#include "errno-util.h" +#include "extract-word.h" +#include "log.h" +#include "memory-util.h" +#include "netlink-util.h" +#include "parse-util.h" +#include "socket-netlink.h" +#include "socket-util.h" +#include "string-util.h" + +int resolve_ifname(sd_netlink **rtnl, const char *name) { + int r; + + /* Like if_nametoindex, but resolves "alternative names" too. */ + + assert(name); + + r = if_nametoindex(name); + if (r > 0) + return r; + + return rtnl_resolve_link_alternative_name(rtnl, name); +} + +int resolve_interface(sd_netlink **rtnl, const char *name) { + int r; + + /* Like resolve_ifname, but resolves interface numbers too. */ + + assert(name); + + r = parse_ifindex(name); + if (r > 0) + return r; + assert(r < 0); + + return resolve_ifname(rtnl, name); +} + +int resolve_interface_or_warn(sd_netlink **rtnl, const char *name) { + int r; + + r = resolve_interface(rtnl, name); + if (r < 0) + return log_error_errno(r, "Failed to resolve interface \"%s\": %m", name); + return r; +} + +int socket_address_parse(SocketAddress *a, const char *s) { + _cleanup_free_ char *n = NULL; + char *e; + int r; + + assert(a); + assert(s); + + if (IN_SET(*s, '/', '@')) { + /* AF_UNIX socket */ + struct sockaddr_un un; + + r = sockaddr_un_set_path(&un, s); + if (r < 0) + return r; + + *a = (SocketAddress) { + .sockaddr.un = un, + .size = r, + }; + + } else if (startswith(s, "vsock:")) { + /* AF_VSOCK socket in vsock:cid:port notation */ + const char *cid_start = s + STRLEN("vsock:"); + unsigned port, cid; + + e = strchr(cid_start, ':'); + if (!e) + return -EINVAL; + + r = safe_atou(e+1, &port); + if (r < 0) + return r; + + n = strndup(cid_start, e - cid_start); + if (!n) + return -ENOMEM; + + if (isempty(n)) + cid = VMADDR_CID_ANY; + else { + r = safe_atou(n, &cid); + if (r < 0) + return r; + } + + *a = (SocketAddress) { + .sockaddr.vm = { + .svm_cid = cid, + .svm_family = AF_VSOCK, + .svm_port = port, + }, + .size = sizeof(struct sockaddr_vm), + }; + + } else { + uint16_t port; + + r = parse_ip_port(s, &port); + if (r == -ERANGE) + return r; /* Valid port syntax, but the numerical value is wrong for a port. */ + if (r >= 0) { + /* Just a port */ + if (socket_ipv6_is_supported()) + *a = (SocketAddress) { + .sockaddr.in6 = { + .sin6_family = AF_INET6, + .sin6_port = htobe16(port), + .sin6_addr = in6addr_any, + }, + .size = sizeof(struct sockaddr_in6), + }; + else + *a = (SocketAddress) { + .sockaddr.in = { + .sin_family = AF_INET, + .sin_port = htobe16(port), + .sin_addr.s_addr = INADDR_ANY, + }, + .size = sizeof(struct sockaddr_in), + }; + + } else { + union in_addr_union address; + int family, ifindex; + + r = in_addr_port_ifindex_name_from_string_auto(s, &family, &address, &port, &ifindex, NULL); + if (r < 0) + return r; + + if (port == 0) /* No port, no go. */ + return -EINVAL; + + if (family == AF_INET) + *a = (SocketAddress) { + .sockaddr.in = { + .sin_family = AF_INET, + .sin_addr = address.in, + .sin_port = htobe16(port), + }, + .size = sizeof(struct sockaddr_in), + }; + else if (family == AF_INET6) + *a = (SocketAddress) { + .sockaddr.in6 = { + .sin6_family = AF_INET6, + .sin6_addr = address.in6, + .sin6_port = htobe16(port), + .sin6_scope_id = ifindex, + }, + .size = sizeof(struct sockaddr_in6), + }; + else + assert_not_reached("Family quarrel"); + } + } + + return 0; +} + +int socket_address_parse_and_warn(SocketAddress *a, const char *s) { + SocketAddress b; + int r; + + /* Similar to socket_address_parse() but warns for IPv6 sockets when we don't support them. */ + + r = socket_address_parse(&b, s); + if (r < 0) + return r; + + if (!socket_ipv6_is_supported() && b.sockaddr.sa.sa_family == AF_INET6) { + log_warning("Binding to IPv6 address not available since kernel does not support IPv6."); + return -EAFNOSUPPORT; + } + + *a = b; + return 0; +} + +int socket_address_parse_netlink(SocketAddress *a, const char *s) { + _cleanup_free_ char *word = NULL; + unsigned group = 0; + int family, r; + + assert(a); + assert(s); + + r = extract_first_word(&s, &word, NULL, 0); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + family = netlink_family_from_string(word); + if (family < 0) + return -EINVAL; + + if (!isempty(s)) { + r = safe_atou(s, &group); + if (r < 0) + return r; + } + + *a = (SocketAddress) { + .type = SOCK_RAW, + .sockaddr.nl.nl_family = AF_NETLINK, + .sockaddr.nl.nl_groups = group, + .protocol = family, + .size = sizeof(struct sockaddr_nl), + }; + + return 0; +} + +bool socket_address_is(const SocketAddress *a, const char *s, int type) { + struct SocketAddress b; + + assert(a); + assert(s); + + if (socket_address_parse(&b, s) < 0) + return false; + + b.type = type; + + return socket_address_equal(a, &b); +} + +bool socket_address_is_netlink(const SocketAddress *a, const char *s) { + struct SocketAddress b; + + assert(a); + assert(s); + + if (socket_address_parse_netlink(&b, s) < 0) + return false; + + return socket_address_equal(a, &b); +} + +int make_socket_fd(int log_level, const char* address, int type, int flags) { + SocketAddress a; + int fd, r; + + r = socket_address_parse(&a, address); + if (r < 0) + return log_error_errno(r, "Failed to parse socket address \"%s\": %m", address); + + a.type = type; + + fd = socket_address_listen(&a, type | flags, SOMAXCONN, SOCKET_ADDRESS_DEFAULT, + NULL, false, false, false, 0755, 0644, NULL); + if (fd < 0 || log_get_max_level() >= log_level) { + _cleanup_free_ char *p = NULL; + + r = socket_address_print(&a, &p); + if (r < 0) + return log_error_errno(r, "socket_address_print(): %m"); + + if (fd < 0) + log_error_errno(fd, "Failed to listen on %s: %m", p); + else + log_full(log_level, "Listening on %s", p); + } + + return fd; +} + +int in_addr_port_ifindex_name_from_string_auto( + const char *s, + int *ret_family, + union in_addr_union *ret_address, + uint16_t *ret_port, + int *ret_ifindex, + char **ret_server_name) { + + _cleanup_free_ char *buf1 = NULL, *buf2 = NULL, *name = NULL; + int family, ifindex = 0, r; + union in_addr_union a; + uint16_t port = 0; + const char *m; + + assert(s); + + /* This accepts the following: + * 192.168.0.1:53#example.com + * [2001:4860:4860::8888]:53%eth0#example.com + * + * If ret_port is NULL, then the port cannot be specified. + * If ret_ifindex is NULL, then the interface index cannot be specified. + * If ret_server_name is NULL, then server_name cannot be specified. + * + * ret_family is always AF_INET or AF_INET6. + */ + + m = strchr(s, '#'); + if (m) { + if (!ret_server_name) + return -EINVAL; + + if (isempty(m + 1)) + return -EINVAL; + + name = strdup(m + 1); + if (!name) + return -ENOMEM; + + s = buf1 = strndup(s, m - s); + if (!buf1) + return -ENOMEM; + } + + m = strchr(s, '%'); + if (m) { + if (!ret_ifindex) + return -EINVAL; + + if (isempty(m + 1)) + return -EINVAL; + + if (!ifname_valid_full(m + 1, IFNAME_VALID_ALTERNATIVE | IFNAME_VALID_NUMERIC)) + return -EINVAL; /* We want to return -EINVAL for syntactically invalid names, + * and -ENODEV for valid but nonexistent interfaces. */ + + ifindex = resolve_interface(NULL, m + 1); + if (ifindex < 0) + return ifindex; + + s = buf2 = strndup(s, m - s); + if (!buf2) + return -ENOMEM; + } + + m = strrchr(s, ':'); + if (m) { + if (*s == '[') { + _cleanup_free_ char *ip_str = NULL; + + if (!ret_port) + return -EINVAL; + + if (*(m - 1) != ']') + return -EINVAL; + + family = AF_INET6; + + r = parse_ip_port(m + 1, &port); + if (r < 0) + return r; + + ip_str = strndup(s + 1, m - s - 2); + if (!ip_str) + return -ENOMEM; + + r = in_addr_from_string(family, ip_str, &a); + if (r < 0) + return r; + } else { + /* First try to parse the string as IPv6 address without port number */ + r = in_addr_from_string(AF_INET6, s, &a); + if (r < 0) { + /* Then the input should be IPv4 address with port number */ + _cleanup_free_ char *ip_str = NULL; + + if (!ret_port) + return -EINVAL; + + family = AF_INET; + + ip_str = strndup(s, m - s); + if (!ip_str) + return -ENOMEM; + + r = in_addr_from_string(family, ip_str, &a); + if (r < 0) + return r; + + r = parse_ip_port(m + 1, &port); + if (r < 0) + return r; + } else + family = AF_INET6; + } + } else { + family = AF_INET; + r = in_addr_from_string(family, s, &a); + if (r < 0) + return r; + } + + if (ret_family) + *ret_family = family; + if (ret_address) + *ret_address = a; + if (ret_port) + *ret_port = port; + if (ret_ifindex) + *ret_ifindex = ifindex; + if (ret_server_name) + *ret_server_name = TAKE_PTR(name); + + return r; +} + +struct in_addr_full *in_addr_full_free(struct in_addr_full *a) { + if (!a) + return NULL; + + free(a->server_name); + free(a->cached_server_string); + return mfree(a); +} + +int in_addr_full_new( + int family, + const union in_addr_union *a, + uint16_t port, + int ifindex, + const char *server_name, + struct in_addr_full **ret) { + + _cleanup_free_ char *name = NULL; + struct in_addr_full *x; + + assert(ret); + + if (!isempty(server_name)) { + name = strdup(server_name); + if (!name) + return -ENOMEM; + } + + x = new(struct in_addr_full, 1); + if (!x) + return -ENOMEM; + + *x = (struct in_addr_full) { + .family = family, + .address = *a, + .port = port, + .ifindex = ifindex, + .server_name = TAKE_PTR(name), + }; + + *ret = x; + return 0; +} + +int in_addr_full_new_from_string(const char *s, struct in_addr_full **ret) { + _cleanup_free_ char *server_name = NULL; + int family, ifindex, r; + union in_addr_union a; + uint16_t port; + + assert(s); + + r = in_addr_port_ifindex_name_from_string_auto(s, &family, &a, &port, &ifindex, &server_name); + if (r < 0) + return r; + + return in_addr_full_new(family, &a, port, ifindex, server_name, ret); +} + +const char *in_addr_full_to_string(struct in_addr_full *a) { + assert(a); + + if (!a->cached_server_string) + (void) in_addr_port_ifindex_name_to_string( + a->family, + &a->address, + a->port, + a->ifindex, + a->server_name, + &a->cached_server_string); + + return a->cached_server_string; +} diff --git a/src/shared/socket-netlink.h b/src/shared/socket-netlink.h new file mode 100644 index 0000000..eac5991 --- /dev/null +++ b/src/shared/socket-netlink.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-netlink.h" + +#include "in-addr-util.h" +#include "macro.h" +#include "socket-util.h" + +int resolve_ifname(sd_netlink **rtnl, const char *name); +int resolve_interface(sd_netlink **rtnl, const char *name); +int resolve_interface_or_warn(sd_netlink **rtnl, const char *name); + +int make_socket_fd(int log_level, const char* address, int type, int flags); + +int socket_address_parse(SocketAddress *a, const char *s); +int socket_address_parse_and_warn(SocketAddress *a, const char *s); +int socket_address_parse_netlink(SocketAddress *a, const char *s); + +bool socket_address_is(const SocketAddress *a, const char *s, int type); +bool socket_address_is_netlink(const SocketAddress *a, const char *s); + +int in_addr_port_ifindex_name_from_string_auto( + const char *s, + int *ret_family, + union in_addr_union *ret_address, + uint16_t *ret_port, + int *ret_ifindex, + char **ret_server_name); +static inline int in_addr_ifindex_name_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex, char **server_name) { + return in_addr_port_ifindex_name_from_string_auto(s, family, ret, NULL, ifindex, server_name); +} +static inline int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex) { + return in_addr_ifindex_name_from_string_auto(s, family, ret, ifindex, NULL); +} + +struct in_addr_full { + int family; + union in_addr_union address; + uint16_t port; + int ifindex; + char *server_name; + char *cached_server_string; /* Should not be handled directly, but through in_addr_full_to_string(). */ +}; + +struct in_addr_full *in_addr_full_free(struct in_addr_full *a); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct in_addr_full*, in_addr_full_free); +int in_addr_full_new(int family, const union in_addr_union *a, uint16_t port, int ifindex, const char *server_name, struct in_addr_full **ret); +int in_addr_full_new_from_string(const char *s, struct in_addr_full **ret); +const char *in_addr_full_to_string(struct in_addr_full *a); diff --git a/src/shared/spawn-ask-password-agent.c b/src/shared/spawn-ask-password-agent.c new file mode 100644 index 0000000..1f07b19 --- /dev/null +++ b/src/shared/spawn-ask-password-agent.c @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> + +#include "log.h" +#include "process-util.h" +#include "spawn-ask-password-agent.h" +#include "util.h" + +static pid_t agent_pid = 0; + +int ask_password_agent_open(void) { + int r; + + if (agent_pid > 0) + return 0; + + /* We check STDIN here, not STDOUT, since this is about input, + * not output */ + if (!isatty(STDIN_FILENO)) + return 0; + + if (!is_main_thread()) + return -EPERM; + + r = fork_agent("(sd-askpwagent)", + NULL, 0, + &agent_pid, + SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH, + SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH, "--watch", NULL); + if (r < 0) + return log_error_errno(r, "Failed to fork TTY ask password agent: %m"); + + return 1; +} + +void ask_password_agent_close(void) { + + if (agent_pid <= 0) + return; + + /* Inform agent that we are done */ + (void) kill_and_sigcont(agent_pid, SIGTERM); + (void) wait_for_terminate(agent_pid, NULL); + agent_pid = 0; +} + +int ask_password_agent_open_if_enabled(BusTransport transport, bool ask_password) { + + /* Open the ask password agent as a child process if necessary */ + + if (transport != BUS_TRANSPORT_LOCAL) + return 0; + + if (!ask_password) + return 0; + + return ask_password_agent_open(); +} diff --git a/src/shared/spawn-ask-password-agent.h b/src/shared/spawn-ask-password-agent.h new file mode 100644 index 0000000..a76cdb1 --- /dev/null +++ b/src/shared/spawn-ask-password-agent.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "bus-util.h" + +int ask_password_agent_open(void); +void ask_password_agent_close(void); + +int ask_password_agent_open_if_enabled(BusTransport transport, bool ask_password); diff --git a/src/shared/spawn-polkit-agent.c b/src/shared/spawn-polkit-agent.c new file mode 100644 index 0000000..a0024eb --- /dev/null +++ b/src/shared/spawn-polkit-agent.c @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <poll.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> + +#include "fd-util.h" +#include "io-util.h" +#include "log.h" +#include "macro.h" +#include "process-util.h" +#include "spawn-polkit-agent.h" +#include "stdio-util.h" +#include "time-util.h" +#include "util.h" + +#if ENABLE_POLKIT +static pid_t agent_pid = 0; + +int polkit_agent_open(void) { + char notify_fd[DECIMAL_STR_MAX(int) + 1]; + int pipe_fd[2], r; + + if (agent_pid > 0) + return 0; + + /* Clients that run as root don't need to activate/query polkit */ + if (geteuid() == 0) + return 0; + + /* We check STDIN here, not STDOUT, since this is about input, not output */ + if (!isatty(STDIN_FILENO)) + return 0; + + if (!is_main_thread()) + return -EPERM; + + if (pipe2(pipe_fd, 0) < 0) + return -errno; + + xsprintf(notify_fd, "%i", pipe_fd[1]); + + r = fork_agent("(polkit-agent)", + &pipe_fd[1], 1, + &agent_pid, + POLKIT_AGENT_BINARY_PATH, + POLKIT_AGENT_BINARY_PATH, "--notify-fd", notify_fd, "--fallback", NULL); + + /* Close the writing side, because that's the one for the agent */ + safe_close(pipe_fd[1]); + + if (r < 0) + log_error_errno(r, "Failed to fork TTY ask password agent: %m"); + else + /* Wait until the agent closes the fd */ + fd_wait_for_event(pipe_fd[0], POLLHUP, USEC_INFINITY); + + safe_close(pipe_fd[0]); + + return r; +} + +void polkit_agent_close(void) { + + if (agent_pid <= 0) + return; + + /* Inform agent that we are done */ + (void) kill_and_sigcont(agent_pid, SIGTERM); + (void) wait_for_terminate(agent_pid, NULL); + agent_pid = 0; +} + +#else + +int polkit_agent_open(void) { + return 0; +} + +void polkit_agent_close(void) { +} + +#endif + +int polkit_agent_open_if_enabled(BusTransport transport, bool ask_password) { + + /* Open the polkit agent as a child process if necessary */ + + if (transport != BUS_TRANSPORT_LOCAL) + return 0; + + if (!ask_password) + return 0; + + return polkit_agent_open(); +} diff --git a/src/shared/spawn-polkit-agent.h b/src/shared/spawn-polkit-agent.h new file mode 100644 index 0000000..325dfdd --- /dev/null +++ b/src/shared/spawn-polkit-agent.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "bus-util.h" + +int polkit_agent_open(void); +void polkit_agent_close(void); + +int polkit_agent_open_if_enabled(BusTransport transport, bool ask_password); diff --git a/src/shared/specifier.c b/src/shared/specifier.c new file mode 100644 index 0000000..86731f8 --- /dev/null +++ b/src/shared/specifier.c @@ -0,0 +1,358 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> +#include <sys/utsname.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "architecture.h" +#include "format-util.h" +#include "fs-util.h" +#include "hostname-util.h" +#include "macro.h" +#include "os-util.h" +#include "specifier.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" + +/* + * Generic infrastructure for replacing %x style specifiers in + * strings. Will call a callback for each replacement. + */ + +/* Any ASCII character or digit: our pool of potential specifiers, + * and "%" used for escaping. */ +#define POSSIBLE_SPECIFIERS ALPHANUMERICAL "%" + +int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **_ret) { + size_t l, allocated = 0; + _cleanup_free_ char *ret = NULL; + char *t; + const char *f; + bool percent = false; + int r; + + assert(text); + assert(table); + + l = strlen(text); + if (!GREEDY_REALLOC(ret, allocated, l + 1)) + return -ENOMEM; + t = ret; + + for (f = text; *f; f++, l--) + if (percent) { + if (*f == '%') + *(t++) = '%'; + else { + const Specifier *i; + + for (i = table; i->specifier; i++) + if (i->specifier == *f) + break; + + if (i->lookup) { + _cleanup_free_ char *w = NULL; + size_t k, j; + + r = i->lookup(i->specifier, i->data, userdata, &w); + if (r < 0) + return r; + + j = t - ret; + k = strlen(w); + + if (!GREEDY_REALLOC(ret, allocated, j + k + l + 1)) + return -ENOMEM; + memcpy(ret + j, w, k); + t = ret + j + k; + } else if (strchr(POSSIBLE_SPECIFIERS, *f)) + /* Oops, an unknown specifier. */ + return -EBADSLT; + else { + *(t++) = '%'; + *(t++) = *f; + } + } + + percent = false; + } else if (*f == '%') + percent = true; + else + *(t++) = *f; + + /* If string ended with a stray %, also end with % */ + if (percent) + *(t++) = '%'; + *(t++) = 0; + + /* Try to deallocate unused bytes, but don't sweat it too much */ + if ((size_t)(t - ret) < allocated) { + t = realloc(ret, t - ret); + if (t) + ret = t; + } + + *_ret = TAKE_PTR(ret); + return 0; +} + +/* Generic handler for simple string replacements */ + +int specifier_string(char specifier, const void *data, const void *userdata, char **ret) { + char *n; + + n = strdup(strempty(data)); + if (!n) + return -ENOMEM; + + *ret = n; + return 0; +} + +int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret) { + sd_id128_t id; + char *n; + int r; + + r = sd_id128_get_machine(&id); + if (r < 0) + return r; + + n = new(char, 33); + if (!n) + return -ENOMEM; + + *ret = sd_id128_to_string(id, n); + return 0; +} + +int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret) { + sd_id128_t id; + char *n; + int r; + + r = sd_id128_get_boot(&id); + if (r < 0) + return r; + + n = new(char, 33); + if (!n) + return -ENOMEM; + + *ret = sd_id128_to_string(id, n); + return 0; +} + +int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret) { + char *n; + + n = gethostname_malloc(); + if (!n) + return -ENOMEM; + + *ret = n; + return 0; +} + +int specifier_short_host_name(char specifier, const void *data, const void *userdata, char **ret) { + char *n; + + n = gethostname_short_malloc(); + if (!n) + return -ENOMEM; + + *ret = n; + return 0; +} + +int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret) { + struct utsname uts; + char *n; + int r; + + r = uname(&uts); + if (r < 0) + return -errno; + + n = strdup(uts.release); + if (!n) + return -ENOMEM; + + *ret = n; + return 0; +} + +int specifier_architecture(char specifier, const void *data, const void *userdata, char **ret) { + char *t; + + t = strdup(architecture_to_string(uname_architecture())); + if (!t) + return -ENOMEM; + + *ret = t; + return 0; +} + +static int specifier_os_release_common(const char *field, char **ret) { + char *t = NULL; + int r; + + r = parse_os_release(NULL, field, &t, NULL); + if (r < 0) + return r; + if (!t) { + /* fields in /etc/os-release might quite possibly be missing, even if everything is entirely + * valid otherwise. Let's hence return "" in that case. */ + t = strdup(""); + if (!t) + return -ENOMEM; + } + + *ret = t; + return 0; +} + +int specifier_os_id(char specifier, const void *data, const void *userdata, char **ret) { + return specifier_os_release_common("ID", ret); +} + +int specifier_os_version_id(char specifier, const void *data, const void *userdata, char **ret) { + return specifier_os_release_common("VERSION_ID", ret); +} + +int specifier_os_build_id(char specifier, const void *data, const void *userdata, char **ret) { + return specifier_os_release_common("BUILD_ID", ret); +} + +int specifier_os_variant_id(char specifier, const void *data, const void *userdata, char **ret) { + return specifier_os_release_common("VARIANT_ID", ret); +} + +int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret) { + char *t; + + t = gid_to_name(getgid()); + if (!t) + return -ENOMEM; + + *ret = t; + return 0; +} + +int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret) { + if (asprintf(ret, UID_FMT, getgid()) < 0) + return -ENOMEM; + + return 0; +} + +int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret) { + char *t; + + /* If we are UID 0 (root), this will not result in NSS, otherwise it might. This is good, as we want to be able + * to run this in PID 1, where our user ID is 0, but where NSS lookups are not allowed. + + * We don't use getusername_malloc() here, because we don't want to look at $USER, to remain consistent with + * specifer_user_id() below. + */ + + t = uid_to_name(getuid()); + if (!t) + return -ENOMEM; + + *ret = t; + return 0; +} + +int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret) { + + if (asprintf(ret, UID_FMT, getuid()) < 0) + return -ENOMEM; + + return 0; +} + +int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret) { + + /* On PID 1 (which runs as root) this will not result in NSS, + * which is good. See above */ + + return get_home_dir(ret); +} + +int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret) { + + /* On PID 1 (which runs as root) this will not result in NSS, + * which is good. See above */ + + return get_shell(ret); +} + +int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) { + const char *p; + char *copy; + int r; + + r = tmp_dir(&p); + if (r < 0) + return r; + + copy = strdup(p); + if (!copy) + return -ENOMEM; + + *ret = copy; + return 0; +} + +int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) { + const char *p; + char *copy; + int r; + + r = var_tmp_dir(&p); + if (r < 0) + return r; + + copy = strdup(p); + if (!copy) + return -ENOMEM; + + *ret = copy; + return 0; +} + +int specifier_escape_strv(char **l, char ***ret) { + char **z, **p, **q; + + assert(ret); + + if (strv_isempty(l)) { + *ret = NULL; + return 0; + } + + z = new(char*, strv_length(l)+1); + if (!z) + return -ENOMEM; + + for (p = l, q = z; *p; p++, q++) { + + *q = specifier_escape(*p); + if (!*q) { + strv_free(z); + return -ENOMEM; + } + } + + *q = NULL; + *ret = z; + + return 0; +} diff --git a/src/shared/specifier.h b/src/shared/specifier.h new file mode 100644 index 0000000..1323b41 --- /dev/null +++ b/src/shared/specifier.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "string-util.h" + +typedef int (*SpecifierCallback)(char specifier, const void *data, const void *userdata, char **ret); + +typedef struct Specifier { + const char specifier; + const SpecifierCallback lookup; + const void *data; +} Specifier; + +int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **ret); + +int specifier_string(char specifier, const void *data, const void *userdata, char **ret); + +int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret); +int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret); +int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret); +int specifier_short_host_name(char specifier, const void *data, const void *userdata, char **ret); +int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret); +int specifier_architecture(char specifier, const void *data, const void *userdata, char **ret); +int specifier_os_id(char specifier, const void *data, const void *userdata, char **ret); +int specifier_os_version_id(char specifier, const void *data, const void *userdata, char **ret); +int specifier_os_build_id(char specifier, const void *data, const void *userdata, char **ret); +int specifier_os_variant_id(char specifier, const void *data, const void *userdata, char **ret); + +int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret); +int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret); +int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret); +int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret); +int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret); +int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret); + +int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret); +int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret); + +/* Typically, in places where one of the above specifier is to be resolved the other similar ones are to be + * resolved, too. Hence let's define common macros for the relevant array entries. + * + * COMMON_SYSTEM_SPECIFIERS: + * %a: the native userspace architecture + * %b: the boot ID of the running system + * %B: the OS build ID, according to /etc/os-release + * %H: the hostname of the running system + * %l: the short hostname of the running system + * %m: the machine ID of the running system + * %o: the OS ID according to /etc/os-release + * %v: the kernel version + * %w: the OS version ID, according to /etc/os-release + * %W: the OS variant ID, according to /etc/os-release + * + * COMMON_CREDS_SPECIFIERS: + * %g: the groupname of the running user + * %G: the GID of the running user + * %u: the username of the running user + * %U: the UID of the running user + * + * COMMON_TMP_SPECIFIERS: + * %T: the temporary directory (e.g. /tmp, or $TMPDIR, $TEMP, $TMP) + * %V: the temporary directory for large, persistent stuff (e.g. /var/tmp, or $TMPDIR, $TEMP, $TMP) + */ + +#define COMMON_SYSTEM_SPECIFIERS \ + { 'a', specifier_architecture, NULL }, \ + { 'b', specifier_boot_id, NULL }, \ + { 'B', specifier_os_build_id, NULL }, \ + { 'H', specifier_host_name, NULL }, \ + { 'l', specifier_short_host_name, NULL }, \ + { 'm', specifier_machine_id, NULL }, \ + { 'o', specifier_os_id, NULL }, \ + { 'v', specifier_kernel_release, NULL }, \ + { 'w', specifier_os_version_id, NULL }, \ + { 'W', specifier_os_variant_id, NULL } + +#define COMMON_CREDS_SPECIFIERS \ + { 'g', specifier_group_name, NULL }, \ + { 'G', specifier_group_id, NULL }, \ + { 'u', specifier_user_name, NULL }, \ + { 'U', specifier_user_id, NULL } + +#define COMMON_TMP_SPECIFIERS \ + { 'T', specifier_tmp_dir, NULL }, \ + { 'V', specifier_var_tmp_dir, NULL } + +static inline char* specifier_escape(const char *string) { + return strreplace(string, "%", "%%"); +} + +int specifier_escape_strv(char **l, char ***ret); diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c new file mode 100644 index 0000000..e0dd17a --- /dev/null +++ b/src/shared/switch-root.c @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "base-filesystem.h" +#include "fd-util.h" +#include "fs-util.h" +#include "log.h" +#include "missing_syscall.h" +#include "mkdir.h" +#include "mount-util.h" +#include "mountpoint-util.h" +#include "path-util.h" +#include "rm-rf.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "switch-root.h" +#include "user-util.h" +#include "util.h" + +int switch_root(const char *new_root, + const char *old_root_after, /* path below the new root, where to place the old root after the transition */ + bool unmount_old_root, + unsigned long mount_flags) { /* MS_MOVE or MS_BIND */ + + _cleanup_free_ char *resolved_old_root_after = NULL; + _cleanup_close_ int old_root_fd = -1; + bool old_root_remove; + const char *i; + int r; + + assert(new_root); + assert(old_root_after); + + if (path_equal(new_root, "/")) + return 0; + + /* Check if we shall remove the contents of the old root */ + old_root_remove = in_initrd(); + if (old_root_remove) { + old_root_fd = open("/", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY|O_DIRECTORY); + if (old_root_fd < 0) + return log_error_errno(errno, "Failed to open root directory: %m"); + } + + /* Determine where we shall place the old root after the transition */ + r = chase_symlinks(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL); + if (r < 0) + return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after); + if (r == 0) /* Doesn't exist yet. Let's create it */ + (void) mkdir_p_label(resolved_old_root_after, 0755); + + /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted MS_SHARED. Hence + * remount them MS_PRIVATE here as a work-around. + * + * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */ + if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0) + return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m"); + + FOREACH_STRING(i, "/sys", "/dev", "/run", "/proc") { + _cleanup_free_ char *chased = NULL; + + r = chase_symlinks(i, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &chased, NULL); + if (r < 0) + return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, i); + if (r > 0) { + /* Already exists. Let's see if it is a mount point already. */ + r = path_is_mount_point(chased, NULL, 0); + if (r < 0) + return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased); + if (r > 0) /* If it is already mounted, then do nothing */ + continue; + } else + /* Doesn't exist yet? */ + (void) mkdir_p_label(chased, 0755); + + if (mount(i, chased, NULL, mount_flags, NULL) < 0) + return log_error_errno(errno, "Failed to mount %s to %s: %m", i, chased); + } + + /* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants + * them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error + * and switch_root() nevertheless. */ + (void) base_filesystem_create(new_root, UID_INVALID, GID_INVALID); + + if (chdir(new_root) < 0) + return log_error_errno(errno, "Failed to change directory to %s: %m", new_root); + + /* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /), + * that's not possible however, and hence we simply overmount root */ + if (pivot_root(new_root, resolved_old_root_after) >= 0) { + + /* Immediately get rid of the old root, if detach_oldroot is set. + * Since we are running off it we need to do this lazily. */ + if (unmount_old_root) { + r = umount_recursive(old_root_after, MNT_DETACH); + if (r < 0) + log_warning_errno(r, "Failed to unmount old root directory tree, ignoring: %m"); + } + + } else if (mount(new_root, "/", NULL, MS_MOVE, NULL) < 0) + return log_error_errno(errno, "Failed to move %s to /: %m", new_root); + + if (chroot(".") < 0) + return log_error_errno(errno, "Failed to change root: %m"); + + if (chdir("/") < 0) + return log_error_errno(errno, "Failed to change directory: %m"); + + if (old_root_fd >= 0) { + struct stat rb; + + if (fstat(old_root_fd, &rb) < 0) + log_warning_errno(errno, "Failed to stat old root directory, leaving: %m"); + else + (void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */ + } + + return 0; +} diff --git a/src/shared/switch-root.h b/src/shared/switch-root.h new file mode 100644 index 0000000..4e04283 --- /dev/null +++ b/src/shared/switch-root.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +int switch_root(const char *new_root, const char *oldroot, bool detach_oldroot, unsigned long mountflags); diff --git a/src/shared/syscall-names.text b/src/shared/syscall-names.text new file mode 100644 index 0000000..f1b7e29 --- /dev/null +++ b/src/shared/syscall-names.text @@ -0,0 +1,598 @@ +_llseek +_newselect +_sysctl +accept +accept4 +access +acct +add_key +adjtimex +alarm +arc_gettls +arc_settls +arc_usr_cmpxchg +arch_prctl +arm_fadvise64_64 +arm_sync_file_range +atomic_barrier +atomic_cmpxchg_32 +bdflush +bfin_spinlock +bind +bpf +brk +cache_sync +cachectl +cacheflush +capget +capset +chdir +chmod +chown +chown32 +chroot +clock_adjtime +clock_adjtime64 +clock_getres +clock_getres_time64 +clock_gettime +clock_gettime64 +clock_nanosleep +clock_nanosleep_time64 +clock_settime +clock_settime64 +clone +clone2 +clone3 +close +close_range +connect +copy_file_range +creat +create_module +delete_module +dipc +dup +dup2 +dup3 +epoll_create +epoll_create1 +epoll_ctl +epoll_ctl_old +epoll_pwait +epoll_wait +epoll_wait_old +eventfd +eventfd2 +exec_with_loader +execv +execve +execveat +exit +exit_group +faccessat +faccessat2 +fadvise64 +fadvise64_64 +fallocate +fanotify_init +fanotify_mark +fchdir +fchmod +fchmodat +fchown +fchown32 +fchownat +fcntl +fcntl64 +fdatasync +fgetxattr +finit_module +flistxattr +flock +fork +fp_udfiex_crtl +fremovexattr +fsconfig +fsetxattr +fsmount +fsopen +fspick +fstat +fstat64 +fstatat64 +fstatfs +fstatfs64 +fsync +ftruncate +ftruncate64 +futex +futex_time64 +futimesat +get_kernel_syms +get_mempolicy +get_robust_list +get_thread_area +getcpu +getcwd +getdents +getdents64 +getdomainname +getdtablesize +getegid +getegid32 +geteuid +geteuid32 +getgid +getgid32 +getgroups +getgroups32 +gethostname +getitimer +getpagesize +getpeername +getpgid +getpgrp +getpid +getpmsg +getppid +getpriority +getrandom +getresgid +getresgid32 +getresuid +getresuid32 +getrlimit +getrusage +getsid +getsockname +getsockopt +gettid +gettimeofday +getuid +getuid32 +getunwind +getxattr +getxgid +getxpid +getxuid +idle +init_module +inotify_add_watch +inotify_init +inotify_init1 +inotify_rm_watch +io_cancel +io_destroy +io_getevents +io_pgetevents +io_pgetevents_time64 +io_setup +io_submit +io_uring_enter +io_uring_register +io_uring_setup +ioctl +ioperm +iopl +ioprio_get +ioprio_set +ipc +kcmp +kern_features +kexec_file_load +kexec_load +keyctl +kill +lchown +lchown32 +lgetxattr +link +linkat +listen +listxattr +llistxattr +lookup_dcookie +lremovexattr +lseek +lsetxattr +lstat +lstat64 +madvise +mbind +membarrier +memfd_create +memory_ordering +migrate_pages +mincore +mkdir +mkdirat +mknod +mknodat +mlock +mlock2 +mlockall +mmap +mmap2 +modify_ldt +mount +move_mount +move_pages +mprotect +mq_getsetattr +mq_notify +mq_open +mq_timedreceive +mq_timedreceive_time64 +mq_timedsend +mq_timedsend_time64 +mq_unlink +mremap +msgctl +msgget +msgrcv +msgsnd +msync +multiplexer +munlock +munlockall +munmap +name_to_handle_at +nanosleep +newfstatat +nfsservctl +ni_syscall +nice +old_adjtimex +old_getpagesize +oldfstat +oldlstat +oldolduname +oldstat +oldumount +olduname +open +open_by_handle_at +open_tree +openat +openat2 +or1k_atomic +osf_adjtime +osf_afs_syscall +osf_alt_plock +osf_alt_setsid +osf_alt_sigpending +osf_asynch_daemon +osf_audcntl +osf_audgen +osf_chflags +osf_execve +osf_exportfs +osf_fchflags +osf_fdatasync +osf_fpathconf +osf_fstat +osf_fstatfs +osf_fstatfs64 +osf_fuser +osf_getaddressconf +osf_getdirentries +osf_getdomainname +osf_getfh +osf_getfsstat +osf_gethostid +osf_getitimer +osf_getlogin +osf_getmnt +osf_getrusage +osf_getsysinfo +osf_gettimeofday +osf_kloadcall +osf_kmodcall +osf_lstat +osf_memcntl +osf_mincore +osf_mount +osf_mremap +osf_msfs_syscall +osf_msleep +osf_mvalid +osf_mwakeup +osf_naccept +osf_nfssvc +osf_ngetpeername +osf_ngetsockname +osf_nrecvfrom +osf_nrecvmsg +osf_nsendmsg +osf_ntp_adjtime +osf_ntp_gettime +osf_old_creat +osf_old_fstat +osf_old_getpgrp +osf_old_killpg +osf_old_lstat +osf_old_open +osf_old_sigaction +osf_old_sigblock +osf_old_sigreturn +osf_old_sigsetmask +osf_old_sigvec +osf_old_stat +osf_old_vadvise +osf_old_vtrace +osf_old_wait +osf_oldquota +osf_pathconf +osf_pid_block +osf_pid_unblock +osf_plock +osf_priocntlset +osf_profil +osf_proplist_syscall +osf_reboot +osf_revoke +osf_sbrk +osf_security +osf_select +osf_set_program_attributes +osf_set_speculative +osf_sethostid +osf_setitimer +osf_setlogin +osf_setsysinfo +osf_settimeofday +osf_shmat +osf_signal +osf_sigprocmask +osf_sigsendset +osf_sigstack +osf_sigwaitprim +osf_sstk +osf_stat +osf_statfs +osf_statfs64 +osf_subsys_info +osf_swapctl +osf_swapon +osf_syscall +osf_sysinfo +osf_table +osf_uadmin +osf_usleep_thread +osf_uswitch +osf_utc_adjtime +osf_utc_gettime +osf_utimes +osf_utsname +osf_wait4 +osf_waitid +pause +pciconfig_iobase +pciconfig_read +pciconfig_write +perf_event_open +perfctr +perfmonctl +personality +pidfd_getfd +pidfd_open +pidfd_send_signal +pipe +pipe2 +pivot_root +pkey_alloc +pkey_free +pkey_mprotect +poll +ppoll +ppoll_time64 +prctl +pread64 +preadv +preadv2 +prlimit64 +process_madvise +process_vm_readv +process_vm_writev +pselect6 +pselect6_time64 +ptrace +pwrite64 +pwritev +pwritev2 +query_module +quotactl +read +readahead +readdir +readlink +readlinkat +readv +reboot +recv +recvfrom +recvmmsg +recvmmsg_time64 +recvmsg +remap_file_pages +removexattr +rename +renameat +renameat2 +request_key +restart_syscall +riscv_flush_icache +rmdir +rseq +rt_sigaction +rt_sigpending +rt_sigprocmask +rt_sigqueueinfo +rt_sigreturn +rt_sigsuspend +rt_sigtimedwait +rt_sigtimedwait_time64 +rt_tgsigqueueinfo +rtas +s390_guarded_storage +s390_pci_mmio_read +s390_pci_mmio_write +s390_runtime_instr +s390_sthyi +sched_get_affinity +sched_get_priority_max +sched_get_priority_min +sched_getaffinity +sched_getattr +sched_getparam +sched_getscheduler +sched_rr_get_interval +sched_rr_get_interval_time64 +sched_set_affinity +sched_setaffinity +sched_setattr +sched_setparam +sched_setscheduler +sched_yield +seccomp +select +semctl +semget +semop +semtimedop +semtimedop_time64 +send +sendfile +sendfile64 +sendmmsg +sendmsg +sendto +set_mempolicy +set_robust_list +set_thread_area +set_tid_address +setdomainname +setfsgid +setfsgid32 +setfsuid +setfsuid32 +setgid +setgid32 +setgroups +setgroups32 +sethae +sethostname +setitimer +setns +setpgid +setpgrp +setpriority +setregid +setregid32 +setresgid +setresgid32 +setresuid +setresuid32 +setreuid +setreuid32 +setrlimit +setsid +setsockopt +settimeofday +setuid +setuid32 +setxattr +sgetmask +shmat +shmctl +shmdt +shmget +shutdown +sigaction +sigaltstack +signal +signalfd +signalfd4 +sigpending +sigprocmask +sigreturn +sigsuspend +socket +socketcall +socketpair +splice +spu_create +spu_run +ssetmask +stat +stat64 +statfs +statfs64 +statx +stime +subpage_prot +swapcontext +swapoff +swapon +switch_endian +symlink +symlinkat +sync +sync_file_range +sync_file_range2 +syncfs +sys_debug_setcontext +syscall +sysfs +sysinfo +syslog +sysmips +tee +tgkill +time +timer_create +timer_delete +timer_getoverrun +timer_gettime +timer_gettime64 +timer_settime +timer_settime64 +timerfd +timerfd_create +timerfd_gettime +timerfd_gettime64 +timerfd_settime +timerfd_settime64 +times +tkill +truncate +truncate64 +udftrap +ugetrlimit +umask +umount +umount2 +uname +unlink +unlinkat +unshare +uselib +userfaultfd +ustat +utime +utimensat +utimensat_time64 +utimes +utimesat +utrap_install +vfork +vhangup +vm86 +vm86old +vmsplice +wait4 +waitid +waitpid +write +writev diff --git a/src/shared/sysctl-util.c b/src/shared/sysctl-util.c new file mode 100644 index 0000000..670c331 --- /dev/null +++ b/src/shared/sysctl-util.c @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> + +#include "fd-util.h" +#include "fileio.h" +#include "log.h" +#include "macro.h" +#include "path-util.h" +#include "string-util.h" +#include "sysctl-util.h" + +char *sysctl_normalize(char *s) { + char *n; + + n = strpbrk(s, "/."); + + /* If the first separator is a slash, the path is + * assumed to be normalized and slashes remain slashes + * and dots remains dots. */ + + if (n && *n == '.') + /* Dots become slashes and slashes become dots. Fun. */ + do { + if (*n == '.') + *n = '/'; + else + *n = '.'; + + n = strpbrk(n + 1, "/."); + } while (n); + + path_simplify(s, true); + + /* Kill the leading slash, but keep the first character of the string in the same place. */ + if (*s == '/' && *(s+1)) + memmove(s, s+1, strlen(s)); + + return s; +} + +int sysctl_write(const char *property, const char *value) { + char *p; + _cleanup_close_ int fd = -1; + + assert(property); + assert(value); + + log_debug("Setting '%s' to '%.*s'.", property, (int) strcspn(value, NEWLINE), value); + + p = strjoina("/proc/sys/", property); + fd = open(p, O_WRONLY|O_CLOEXEC); + if (fd < 0) + return -errno; + + if (!endswith(value, "\n")) + value = strjoina(value, "\n"); + + if (write(fd, value, strlen(value)) < 0) + return -errno; + + return 0; +} + +int sysctl_writef(const char *property, const char *format, ...) { + _cleanup_free_ char *v = NULL; + va_list ap; + int r; + + va_start(ap, format); + r = vasprintf(&v, format, ap); + va_end(ap); + + if (r < 0) + return -ENOMEM; + + return sysctl_write(property, v); +} + +int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value) { + const char *p; + + assert(IN_SET(af, AF_INET, AF_INET6)); + assert(property); + assert(value); + + p = strjoina("/proc/sys/net/ipv", af == AF_INET ? "4" : "6", + ifname ? "/conf/" : "", strempty(ifname), + property[0] == '/' ? "" : "/", property); + + log_debug("Setting '%s' to '%s'", p, value); + + return write_string_file(p, value, WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER); +} + +int sysctl_read(const char *property, char **content) { + char *p; + + assert(property); + assert(content); + + p = strjoina("/proc/sys/", property); + return read_full_file(p, content, NULL); +} + +int sysctl_read_ip_property(int af, const char *ifname, const char *property, char **ret) { + _cleanup_free_ char *value = NULL; + const char *p; + int r; + + assert(IN_SET(af, AF_INET, AF_INET6)); + assert(property); + + p = strjoina("/proc/sys/net/ipv", af == AF_INET ? "4" : "6", + ifname ? "/conf/" : "", strempty(ifname), + property[0] == '/' ? "" : "/", property); + + r = read_one_line_file(p, &value); + if (r < 0) + return r; + + if (ret) + *ret = TAKE_PTR(value); + + return r; +} diff --git a/src/shared/sysctl-util.h b/src/shared/sysctl-util.h new file mode 100644 index 0000000..3236419 --- /dev/null +++ b/src/shared/sysctl-util.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include "macro.h" +#include "stdio-util.h" +#include "string-util.h" + +char *sysctl_normalize(char *s); +int sysctl_read(const char *property, char **value); +int sysctl_write(const char *property, const char *value); +int sysctl_writef(const char *property, const char *format, ...) _printf_(2, 3); + +int sysctl_read_ip_property(int af, const char *ifname, const char *property, char **ret); +int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value); +static inline int sysctl_write_ip_property_boolean(int af, const char *ifname, const char *property, bool value) { + return sysctl_write_ip_property(af, ifname, property, one_zero(value)); +} + +#define DEFINE_SYSCTL_WRITE_IP_PROPERTY(name, type, format) \ + static inline int sysctl_write_ip_property_##name(int af, const char *ifname, const char *property, type value) { \ + char buf[DECIMAL_STR_MAX(type)]; \ + xsprintf(buf, format, value); \ + return sysctl_write_ip_property(af, ifname, property, buf); \ + } + +DEFINE_SYSCTL_WRITE_IP_PROPERTY(int, int, "%i"); +DEFINE_SYSCTL_WRITE_IP_PROPERTY(uint32, uint32_t, "%" PRIu32); diff --git a/src/shared/test-tables.h b/src/shared/test-tables.h new file mode 100644 index 0000000..bb8177b --- /dev/null +++ b/src/shared/test-tables.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +typedef const char* (*lookup_t)(int); +typedef int (*reverse_t)(const char*); + +static inline void _test_table(const char *name, + lookup_t lookup, + reverse_t reverse, + int size, + bool sparse) { + int i, boring = 0; + + for (i = -1; i < size + 1; i++) { + const char* val = lookup(i); + int rev; + + if (val) { + rev = reverse(val); + boring = 0; + } else { + rev = reverse("--no-such--value----"); + boring += i >= 0; + } + + if (boring < 1 || i == size) + printf("%s: %d → %s → %d\n", name, i, val, rev); + else if (boring == 1) + printf("%*s ...\n", (int) strlen(name), ""); + + assert_se(!(i >= 0 && i < size ? + sparse ? rev != i && rev != -1 : val == NULL || rev != i : + val != NULL || rev != -1)); + } +} + +#define test_table(lower, upper) \ + _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, false) + +#define test_table_sparse(lower, upper) \ + _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, true) diff --git a/src/shared/tests.c b/src/shared/tests.c new file mode 100644 index 0000000..ab7d799 --- /dev/null +++ b/src/shared/tests.c @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sched.h> +#include <signal.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/mount.h> +#include <sys/wait.h> +#include <util.h> + +/* When we include libgen.h because we need dirname() we immediately + * undefine basename() since libgen.h defines it as a macro to the POSIX + * version which is really broken. We prefer GNU basename(). */ +#include <libgen.h> +#undef basename + +#include "sd-bus.h" + +#include "alloc-util.h" +#include "bus-error.h" +#include "bus-locator.h" +#include "bus-util.h" +#include "bus-wait-for-jobs.h" +#include "cgroup-setup.h" +#include "cgroup-util.h" +#include "env-file.h" +#include "env-util.h" +#include "fs-util.h" +#include "log.h" +#include "namespace-util.h" +#include "path-util.h" +#include "process-util.h" +#include "random-util.h" +#include "strv.h" +#include "tests.h" + +char* setup_fake_runtime_dir(void) { + char t[] = "/tmp/fake-xdg-runtime-XXXXXX", *p; + + assert_se(mkdtemp(t)); + assert_se(setenv("XDG_RUNTIME_DIR", t, 1) >= 0); + assert_se(p = strdup(t)); + + return p; +} + +static void load_testdata_env(void) { + static bool called = false; + _cleanup_free_ char *s = NULL; + _cleanup_free_ char *envpath = NULL; + _cleanup_strv_free_ char **pairs = NULL; + char **k, **v; + + if (called) + return; + called = true; + + assert_se(readlink_and_make_absolute("/proc/self/exe", &s) >= 0); + dirname(s); + + envpath = path_join(s, "systemd-runtest.env"); + if (load_env_file_pairs(NULL, envpath, &pairs) < 0) + return; + + STRV_FOREACH_PAIR(k, v, pairs) + setenv(*k, *v, 0); +} + +int get_testdata_dir(const char *suffix, char **ret) { + const char *dir; + char *p; + + load_testdata_env(); + + /* if the env var is set, use that */ + dir = getenv("SYSTEMD_TEST_DATA"); + if (!dir) + dir = SYSTEMD_TEST_DATA; + if (access(dir, F_OK) < 0) + return log_error_errno(errno, "ERROR: $SYSTEMD_TEST_DATA directory [%s] not accessible: %m", dir); + + p = path_join(dir, suffix); + if (!p) + return log_oom(); + + *ret = p; + return 0; +} + +const char* get_catalog_dir(void) { + const char *env; + + load_testdata_env(); + + /* if the env var is set, use that */ + env = getenv("SYSTEMD_CATALOG_DIR"); + if (!env) + env = SYSTEMD_CATALOG_DIR; + if (access(env, F_OK) < 0) { + fprintf(stderr, "ERROR: $SYSTEMD_CATALOG_DIR directory [%s] does not exist\n", env); + exit(EXIT_FAILURE); + } + return env; +} + +bool slow_tests_enabled(void) { + int r; + + r = getenv_bool("SYSTEMD_SLOW_TESTS"); + if (r >= 0) + return r; + + if (r != -ENXIO) + log_warning_errno(r, "Cannot parse $SYSTEMD_SLOW_TESTS, ignoring."); + return SYSTEMD_SLOW_TESTS_DEFAULT; +} + +void test_setup_logging(int level) { + log_set_max_level(level); + log_parse_environment(); + log_open(); +} + +int log_tests_skipped(const char *message) { + log_notice("%s: %s, skipping tests.", + program_invocation_short_name, message); + return EXIT_TEST_SKIP; +} + +int log_tests_skipped_errno(int r, const char *message) { + log_notice_errno(r, "%s: %s, skipping tests: %m", + program_invocation_short_name, message); + return EXIT_TEST_SKIP; +} + +bool have_namespaces(void) { + siginfo_t si = {}; + pid_t pid; + + /* Checks whether namespaces are available. In some cases they aren't. We do this by calling unshare(), and we + * do so in a child process in order not to affect our own process. */ + + pid = fork(); + assert_se(pid >= 0); + + if (pid == 0) { + /* child */ + if (detach_mount_namespace() < 0) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + assert_se(waitid(P_PID, pid, &si, WEXITED) >= 0); + assert_se(si.si_code == CLD_EXITED); + + if (si.si_status == EXIT_SUCCESS) + return true; + + if (si.si_status == EXIT_FAILURE) + return false; + + assert_not_reached("unexpected exit code"); +} + +bool can_memlock(void) { + /* Let's see if we can mlock() a larger blob of memory. BPF programs are charged against + * RLIMIT_MEMLOCK, hence let's first make sure we can lock memory at all, and skip the test if we + * cannot. Why not check RLIMIT_MEMLOCK explicitly? Because in container environments the + * RLIMIT_MEMLOCK value we see might not match the RLIMIT_MEMLOCK value actually in effect. */ + + void *p = mmap(NULL, CAN_MEMLOCK_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); + if (p == MAP_FAILED) + return false; + + bool b = mlock(p, CAN_MEMLOCK_SIZE) >= 0; + if (b) + assert_se(munlock(p, CAN_MEMLOCK_SIZE) >= 0); + + assert_se(munmap(p, CAN_MEMLOCK_SIZE) >= 0); + return b; +} + +static int allocate_scope(void) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_free_ char *scope = NULL; + const char *object; + int r; + + /* Let's try to run this test in a scope of its own, with delegation turned on, so that PID 1 doesn't + * interfere with our cgroup management. */ + + r = sd_bus_default_system(&bus); + if (r < 0) + return log_error_errno(r, "Failed to connect to system bus: %m"); + + r = bus_wait_for_jobs_new(bus, &w); + if (r < 0) + return log_oom(); + + if (asprintf(&scope, "%s-%" PRIx64 ".scope", program_invocation_short_name, random_u64()) < 0) + return log_oom(); + + r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit"); + if (r < 0) + return bus_log_create_error(r); + + /* Name and Mode */ + r = sd_bus_message_append(m, "ss", scope, "fail"); + if (r < 0) + return bus_log_create_error(r); + + /* Properties */ + r = sd_bus_message_open_container(m, 'a', "(sv)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "(sv)", "PIDs", "au", 1, (uint32_t) getpid_cached()); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "(sv)", "Delegate", "b", 1); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "(sv)", "CollectMode", "s", "inactive-or-failed"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + /* Auxiliary units */ + r = sd_bus_message_append(m, "a(sa(sv))", 0); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_call(bus, m, 0, &error, &reply); + if (r < 0) + return log_error_errno(r, "Failed to start transient scope unit: %s", bus_error_message(&error, r)); + + r = sd_bus_message_read(reply, "o", &object); + if (r < 0) + return bus_log_parse_error(r); + + r = bus_wait_for_jobs_one(w, object, false); + if (r < 0) + return r; + + return 0; +} + +static int enter_cgroup(char **ret_cgroup, bool enter_subroot) { + _cleanup_free_ char *cgroup_root = NULL, *cgroup_subroot = NULL; + CGroupMask supported; + int r; + + r = allocate_scope(); + if (r < 0) + log_warning_errno(r, "Couldn't allocate a scope unit for this test, proceeding without."); + + r = cg_pid_get_path(NULL, 0, &cgroup_root); + if (r == -ENOMEDIUM) + return log_warning_errno(r, "cg_pid_get_path(NULL, 0, ...) failed: %m"); + assert(r >= 0); + + if (enter_subroot) + assert_se(asprintf(&cgroup_subroot, "%s/%" PRIx64, cgroup_root, random_u64()) >= 0); + else { + cgroup_subroot = strdup(cgroup_root); + assert_se(cgroup_subroot != NULL); + } + + assert_se(cg_mask_supported(&supported) >= 0); + + /* If this fails, then we don't mind as the later cgroup operations will fail too, and it's fine if + * we handle any errors at that point. */ + + r = cg_create_everywhere(supported, _CGROUP_MASK_ALL, cgroup_subroot); + if (r < 0) + return r; + + r = cg_attach_everywhere(supported, cgroup_subroot, 0, NULL, NULL); + if (r < 0) + return r; + + if (ret_cgroup) + *ret_cgroup = TAKE_PTR(cgroup_subroot); + + return 0; +} + +int enter_cgroup_subroot(char **ret_cgroup) { + return enter_cgroup(ret_cgroup, true); +} + +int enter_cgroup_root(char **ret_cgroup) { + return enter_cgroup(ret_cgroup, false); +} + +const char *ci_environment(void) { + /* We return a string because we might want to provide multiple bits of information later on: not + * just the general CI environment type, but also whether we're sanitizing or not, etc. The caller is + * expected to use strstr on the returned value. */ + static const char *ans = POINTER_MAX; + const char *p; + int r; + + if (ans != POINTER_MAX) + return ans; + + /* We allow specifying the environment with $CITYPE. Nobody uses this so far, but we are ready. */ + p = getenv("CITYPE"); + if (!isempty(p)) + return (ans = p); + + if (getenv_bool("TRAVIS") > 0) + return (ans = "travis"); + if (getenv_bool("SEMAPHORE") > 0) + return (ans = "semaphore"); + if (getenv_bool("GITHUB_ACTIONS") > 0) + return (ans = "github-actions"); + if (getenv("AUTOPKGTEST_ARTIFACTS") || getenv("AUTOPKGTEST_TMP")) + return (ans = "autopkgtest"); + + FOREACH_STRING(p, "CI", "CONTINOUS_INTEGRATION") { + /* Those vars are booleans according to Semaphore and Travis docs: + * https://docs.travis-ci.com/user/environment-variables/#default-environment-variables + * https://docs.semaphoreci.com/ci-cd-environment/environment-variables/#ci + */ + r = getenv_bool(p); + if (r > 0) + return (ans = "unknown"); /* Some other unknown thing */ + if (r == 0) + return (ans = NULL); + } + + return (ans = NULL); +} diff --git a/src/shared/tests.h b/src/shared/tests.h new file mode 100644 index 0000000..c135076 --- /dev/null +++ b/src/shared/tests.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "sd-daemon.h" + +#include "macro.h" + +static inline bool manager_errno_skip_test(int r) { + return IN_SET(abs(r), + EPERM, + EACCES, + EADDRINUSE, + EHOSTDOWN, + ENOENT, + ENOMEDIUM /* cannot determine cgroup */ + ); +} + +char* setup_fake_runtime_dir(void); +int enter_cgroup_subroot(char **ret_cgroup); +int enter_cgroup_root(char **ret_cgroup); +int get_testdata_dir(const char *suffix, char **ret); +const char* get_catalog_dir(void); +bool slow_tests_enabled(void); +void test_setup_logging(int level); +int log_tests_skipped(const char *message); +int log_tests_skipped_errno(int r, const char *message); + +bool have_namespaces(void); + +/* We use the small but non-trivial limit here */ +#define CAN_MEMLOCK_SIZE (512 * 1024U) +bool can_memlock(void); + +#define TEST_REQ_RUNNING_SYSTEMD(x) \ + if (sd_booted() > 0) { \ + x; \ + } else { \ + printf("systemd not booted skipping '%s'\n", #x); \ + } + +/* Provide a convenient way to check if we're running in CI. */ +const char *ci_environment(void); diff --git a/src/shared/tmpfile-util-label.c b/src/shared/tmpfile-util-label.c new file mode 100644 index 0000000..d37c0b0 --- /dev/null +++ b/src/shared/tmpfile-util-label.c @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/stat.h> + +#include "selinux-util.h" +#include "tmpfile-util-label.h" +#include "tmpfile-util.h" + +int fopen_temporary_label( + const char *target, + const char *path, + FILE **f, + char **temp_path) { + + int r; + + r = mac_selinux_create_file_prepare(target, S_IFREG); + if (r < 0) + return r; + + r = fopen_temporary(path, f, temp_path); + + mac_selinux_create_file_clear(); + + return r; +} diff --git a/src/shared/tmpfile-util-label.h b/src/shared/tmpfile-util-label.h new file mode 100644 index 0000000..01afc06 --- /dev/null +++ b/src/shared/tmpfile-util-label.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdio.h> + +/* These functions are split out of tmpfile-util.h (and not for example just flags to the functions they wrap) in order + * to optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but + * not for all */ + +int fopen_temporary_label(const char *target, const char *path, FILE **f, char **temp_path); diff --git a/src/shared/tomoyo-util.c b/src/shared/tomoyo-util.c new file mode 100644 index 0000000..2347179 --- /dev/null +++ b/src/shared/tomoyo-util.c @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <unistd.h> + +#include "tomoyo-util.h" + +bool mac_tomoyo_use(void) { + static int cached_use = -1; + + if (cached_use < 0) + cached_use = (access("/sys/kernel/security/tomoyo/version", + F_OK) == 0); + + return cached_use; +} diff --git a/src/shared/tomoyo-util.h b/src/shared/tomoyo-util.h new file mode 100644 index 0000000..a6ee7d4 --- /dev/null +++ b/src/shared/tomoyo-util.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +bool mac_tomoyo_use(void); diff --git a/src/shared/udev-util.c b/src/shared/udev-util.c new file mode 100644 index 0000000..030922e --- /dev/null +++ b/src/shared/udev-util.c @@ -0,0 +1,371 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "device-util.h" +#include "env-file.h" +#include "escape.h" +#include "log.h" +#include "macro.h" +#include "parse-util.h" +#include "path-util.h" +#include "signal-util.h" +#include "string-table.h" +#include "string-util.h" +#include "udev-util.h" +#include "utf8.h" + +static const char* const resolve_name_timing_table[_RESOLVE_NAME_TIMING_MAX] = { + [RESOLVE_NAME_NEVER] = "never", + [RESOLVE_NAME_LATE] = "late", + [RESOLVE_NAME_EARLY] = "early", +}; + +DEFINE_STRING_TABLE_LOOKUP(resolve_name_timing, ResolveNameTiming); + +int udev_parse_config_full( + unsigned *ret_children_max, + usec_t *ret_exec_delay_usec, + usec_t *ret_event_timeout_usec, + ResolveNameTiming *ret_resolve_name_timing, + int *ret_timeout_signal) { + + _cleanup_free_ char *log_val = NULL, *children_max = NULL, *exec_delay = NULL, *event_timeout = NULL, *resolve_names = NULL, *timeout_signal = NULL; + int r; + + r = parse_env_file(NULL, "/etc/udev/udev.conf", + "udev_log", &log_val, + "children_max", &children_max, + "exec_delay", &exec_delay, + "event_timeout", &event_timeout, + "resolve_names", &resolve_names, + "timeout_signal", &timeout_signal); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + + if (log_val) { + const char *log; + size_t n; + + /* unquote */ + n = strlen(log_val); + if (n >= 2 && + ((log_val[0] == '"' && log_val[n-1] == '"') || + (log_val[0] == '\'' && log_val[n-1] == '\''))) { + log_val[n - 1] = '\0'; + log = log_val + 1; + } else + log = log_val; + + /* we set the udev log level here explicitly, this is supposed + * to regulate the code in libudev/ and udev/. */ + r = log_set_max_level_from_string_realm(LOG_REALM_UDEV, log); + if (r < 0) + log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r, + "failed to set udev log level '%s', ignoring: %m", log); + } + + if (ret_children_max && children_max) { + r = safe_atou(children_max, ret_children_max); + if (r < 0) + log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r, + "failed to parse children_max=%s, ignoring: %m", children_max); + } + + if (ret_exec_delay_usec && exec_delay) { + r = parse_sec(exec_delay, ret_exec_delay_usec); + if (r < 0) + log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r, + "failed to parse exec_delay=%s, ignoring: %m", exec_delay); + } + + if (ret_event_timeout_usec && event_timeout) { + r = parse_sec(event_timeout, ret_event_timeout_usec); + if (r < 0) + log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r, + "failed to parse event_timeout=%s, ignoring: %m", event_timeout); + } + + if (ret_resolve_name_timing && resolve_names) { + ResolveNameTiming t; + + t = resolve_name_timing_from_string(resolve_names); + if (t < 0) + log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r, + "failed to parse resolve_names=%s, ignoring.", resolve_names); + else + *ret_resolve_name_timing = t; + } + + if (ret_timeout_signal && timeout_signal) { + r = signal_from_string(timeout_signal); + if (r < 0) + log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r, + "failed to parse timeout_signal=%s, ignoring: %m", timeout_signal); + else + *ret_timeout_signal = r; + } + + return 0; +} + +/* Note that if -ENOENT is returned, it will be logged at debug level rather than error, + * because it's an expected, common occurrence that the caller will handle with a fallback */ +static int device_new_from_dev_path(const char *devlink, sd_device **ret_device) { + struct stat st; + int r; + + assert(devlink); + + if (stat(devlink, &st) < 0) + return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, + "Failed to stat() %s: %m", devlink); + + if (!S_ISBLK(st.st_mode)) + return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), + "%s does not point to a block device: %m", devlink); + + r = sd_device_new_from_devnum(ret_device, 'b', st.st_rdev); + if (r < 0) + return log_error_errno(r, "Failed to initialize device from %s: %m", devlink); + + return 0; +} + +struct DeviceMonitorData { + const char *sysname; + const char *devlink; + sd_device *device; +}; + +static void device_monitor_data_free(struct DeviceMonitorData *d) { + assert(d); + + sd_device_unref(d->device); +} + +static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) { + struct DeviceMonitorData *data = userdata; + const char *sysname; + + assert(device); + assert(data); + assert(data->sysname || data->devlink); + assert(!data->device); + + /* Ignore REMOVE events here. We are waiting for initialization after all, not de-initialization. We + * might see a REMOVE event from an earlier use of the device (devices by the same name are recycled + * by the kernel after all), which we should not get confused by. After all we cannot distinguish use + * cycles of the devices, as the udev queue is entirely asynchronous. + * + * If we see a REMOVE event here for the use cycle we actually care about then we won't notice of + * course, but that should be OK, given the timeout logic used on the wait loop: this will be noticed + * by means of -ETIMEDOUT. Thus we won't notice immediately, but eventually, and that should be + * sufficient for an error path that should regularly not happen. + * + * (And yes, we only need to special case REMOVE. It's the only "negative" event type, where a device + * ceases to exist. All other event types are "positive": the device exists and is registered in the + * udev database, thus whenever we see the event, we can consider it initialized.) */ + if (device_for_action(device, DEVICE_ACTION_REMOVE)) + return 0; + + if (data->sysname && sd_device_get_sysname(device, &sysname) >= 0 && streq(sysname, data->sysname)) + goto found; + + if (data->devlink) { + const char *devlink; + + FOREACH_DEVICE_DEVLINK(device, devlink) + if (path_equal(devlink, data->devlink)) + goto found; + + if (sd_device_get_devname(device, &devlink) >= 0 && path_equal(devlink, data->devlink)) + goto found; + } + + return 0; + +found: + data->device = sd_device_ref(device); + return sd_event_exit(sd_device_monitor_get_event(monitor), 0); +} + +static int device_wait_for_initialization_internal( + sd_device *_device, + const char *devlink, + const char *subsystem, + usec_t deadline, + sd_device **ret) { + + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL; + _cleanup_(sd_event_source_unrefp) sd_event_source *timeout_source = NULL; + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + /* Ensure that if !_device && devlink, device gets unrefd on errors since it will be new */ + _cleanup_(sd_device_unrefp) sd_device *device = sd_device_ref(_device); + _cleanup_(device_monitor_data_free) struct DeviceMonitorData data = { + .devlink = devlink, + }; + int r; + + assert(device || (subsystem && devlink)); + + /* Devlink might already exist, if it does get the device to use the sysname filtering */ + if (!device && devlink) { + r = device_new_from_dev_path(devlink, &device); + if (r < 0 && r != -ENOENT) + return r; + } + + if (device) { + if (sd_device_get_is_initialized(device) > 0) { + if (ret) + *ret = sd_device_ref(device); + return 0; + } + /* We need either the sysname or the devlink for filtering */ + assert_se(sd_device_get_sysname(device, &data.sysname) >= 0 || devlink); + } + + /* Wait until the device is initialized, so that we can get access to the ID_PATH property */ + + r = sd_event_new(&event); + if (r < 0) + return log_error_errno(r, "Failed to get default event: %m"); + + r = sd_device_monitor_new(&monitor); + if (r < 0) + return log_error_errno(r, "Failed to acquire monitor: %m"); + + if (device && !subsystem) { + r = sd_device_get_subsystem(device, &subsystem); + if (r < 0 && r != -ENOENT) + return log_device_error_errno(device, r, "Failed to get subsystem: %m"); + } + + if (subsystem) { + r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, subsystem, NULL); + if (r < 0) + return log_error_errno(r, "Failed to add %s subsystem match to monitor: %m", subsystem); + } + + r = sd_device_monitor_attach_event(monitor, event); + if (r < 0) + return log_error_errno(r, "Failed to attach event to device monitor: %m"); + + r = sd_device_monitor_start(monitor, device_monitor_handler, &data); + if (r < 0) + return log_error_errno(r, "Failed to start device monitor: %m"); + + if (deadline != USEC_INFINITY) { + r = sd_event_add_time( + event, &timeout_source, + CLOCK_MONOTONIC, deadline, 0, + NULL, INT_TO_PTR(-ETIMEDOUT)); + if (r < 0) + return log_error_errno(r, "Failed to add timeout event source: %m"); + } + + /* Check again, maybe things changed. Udev will re-read the db if the device wasn't initialized + * yet. */ + if (!device && devlink) { + r = device_new_from_dev_path(devlink, &device); + if (r < 0 && r != -ENOENT) + return r; + } + if (device && sd_device_get_is_initialized(device) > 0) { + if (ret) + *ret = sd_device_ref(device); + return 0; + } + + r = sd_event_loop(event); + if (r < 0) + return log_error_errno(r, "Failed to wait for device to be initialized: %m"); + + if (ret) + *ret = TAKE_PTR(data.device); + return 0; +} + +int device_wait_for_initialization(sd_device *device, const char *subsystem, usec_t deadline, sd_device **ret) { + return device_wait_for_initialization_internal(device, NULL, subsystem, deadline, ret); +} + +int device_wait_for_devlink(const char *devlink, const char *subsystem, usec_t deadline, sd_device **ret) { + return device_wait_for_initialization_internal(NULL, devlink, subsystem, deadline, ret); +} + +int device_is_renaming(sd_device *dev) { + int r; + + assert(dev); + + r = sd_device_get_property_value(dev, "ID_RENAMING", NULL); + if (r == -ENOENT) + return false; + if (r < 0) + return r; + + return true; +} + +bool device_for_action(sd_device *dev, DeviceAction action) { + DeviceAction a; + + assert(dev); + + if (device_get_action(dev, &a) < 0) + return false; + + return a == action; +} + +int udev_rule_parse_value(char *str, char **ret_value, char **ret_endpos) { + char *i, *j; + int r; + bool is_escaped; + + /* value must be double quotated */ + is_escaped = str[0] == 'e'; + str += is_escaped; + if (str[0] != '"') + return -EINVAL; + str++; + + if (!is_escaped) { + /* unescape double quotation '\"'->'"' */ + for (i = j = str; *i != '"'; i++, j++) { + if (*i == '\0') + return -EINVAL; + if (i[0] == '\\' && i[1] == '"') + i++; + *j = *i; + } + j[0] = '\0'; + } else { + _cleanup_free_ char *unescaped = NULL; + + /* find the end position of value */ + for (i = str; *i != '"'; i++) { + if (i[0] == '\\') + i++; + if (*i == '\0') + return -EINVAL; + } + i[0] = '\0'; + + r = cunescape_length(str, i - str, 0, &unescaped); + if (r < 0) + return r; + assert(r <= i - str); + memcpy(str, unescaped, r + 1); + } + + *ret_value = str; + *ret_endpos = i + 1; + return 0; +} diff --git a/src/shared/udev-util.h b/src/shared/udev-util.h new file mode 100644 index 0000000..270861e --- /dev/null +++ b/src/shared/udev-util.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-device.h" + +#include "device-private.h" +#include "time-util.h" + +typedef enum ResolveNameTiming { + RESOLVE_NAME_NEVER, + RESOLVE_NAME_LATE, + RESOLVE_NAME_EARLY, + _RESOLVE_NAME_TIMING_MAX, + _RESOLVE_NAME_TIMING_INVALID = -1, +} ResolveNameTiming; + +ResolveNameTiming resolve_name_timing_from_string(const char *s) _pure_; +const char *resolve_name_timing_to_string(ResolveNameTiming i) _const_; + +int udev_parse_config_full( + unsigned *ret_children_max, + usec_t *ret_exec_delay_usec, + usec_t *ret_event_timeout_usec, + ResolveNameTiming *ret_resolve_name_timing, + int *ret_timeout_signal); + +static inline int udev_parse_config(void) { + return udev_parse_config_full(NULL, NULL, NULL, NULL, NULL); +} + +int device_wait_for_initialization(sd_device *device, const char *subsystem, usec_t deadline, sd_device **ret); +int device_wait_for_devlink(const char *path, const char *subsystem, usec_t deadline, sd_device **ret); +int device_is_renaming(sd_device *dev); +bool device_for_action(sd_device *dev, DeviceAction action); + +int udev_rule_parse_value(char *str, char **ret_value, char **ret_endpos); diff --git a/src/shared/uid-range.c b/src/shared/uid-range.c new file mode 100644 index 0000000..5d5bf7f --- /dev/null +++ b/src/shared/uid-range.c @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "macro.h" +#include "sort-util.h" +#include "uid-range.h" +#include "user-util.h" + +static bool uid_range_intersect(UidRange *range, uid_t start, uid_t nr) { + assert(range); + + return range->start <= start + nr && + range->start + range->nr >= start; +} + +static void uid_range_coalesce(UidRange **p, unsigned *n) { + assert(p); + assert(n); + + for (unsigned i = 0; i < *n; i++) { + for (unsigned j = i + 1; j < *n; j++) { + UidRange *x = (*p)+i, *y = (*p)+j; + + if (uid_range_intersect(x, y->start, y->nr)) { + uid_t begin, end; + + begin = MIN(x->start, y->start); + end = MAX(x->start + x->nr, y->start + y->nr); + + x->start = begin; + x->nr = end - begin; + + if (*n > j+1) + memmove(y, y+1, sizeof(UidRange) * (*n - j -1)); + + (*n)--; + j--; + } + } + } +} + +static int uid_range_compare(const UidRange *a, const UidRange *b) { + int r; + + r = CMP(a->start, b->start); + if (r != 0) + return r; + + return CMP(a->nr, b->nr); +} + +int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr) { + bool found = false; + UidRange *x; + + assert(p); + assert(n); + + if (nr <= 0) + return 0; + + for (unsigned i = 0; i < *n; i++) { + x = (*p) + i; + if (uid_range_intersect(x, start, nr)) { + found = true; + break; + } + } + + if (found) { + uid_t begin, end; + + begin = MIN(x->start, start); + end = MAX(x->start + x->nr, start + nr); + + x->start = begin; + x->nr = end - begin; + } else { + UidRange *t; + + t = reallocarray(*p, *n + 1, sizeof(UidRange)); + if (!t) + return -ENOMEM; + + *p = t; + x = t + ((*n) ++); + + x->start = start; + x->nr = nr; + } + + typesafe_qsort(*p, *n, uid_range_compare); + uid_range_coalesce(p, n); + + return *n; +} + +int uid_range_add_str(UidRange **p, unsigned *n, const char *s) { + uid_t start, nr; + const char *t; + int r; + + assert(p); + assert(n); + assert(s); + + t = strchr(s, '-'); + if (t) { + char *b; + uid_t end; + + b = strndupa(s, t - s); + r = parse_uid(b, &start); + if (r < 0) + return r; + + r = parse_uid(t+1, &end); + if (r < 0) + return r; + + if (end < start) + return -EINVAL; + + nr = end - start + 1; + } else { + r = parse_uid(s, &start); + if (r < 0) + return r; + + nr = 1; + } + + return uid_range_add(p, n, start, nr); +} + +int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid) { + uid_t closest = UID_INVALID, candidate; + + assert(p); + assert(uid); + + candidate = *uid - 1; + + for (unsigned i = 0; i < n; i++) { + uid_t begin, end; + + begin = p[i].start; + end = p[i].start + p[i].nr - 1; + + if (candidate >= begin && candidate <= end) { + *uid = candidate; + return 1; + } + + if (end < candidate) + closest = end; + } + + if (closest == UID_INVALID) + return -EBUSY; + + *uid = closest; + return 1; +} + +bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid) { + assert(p); + assert(uid); + + for (unsigned i = 0; i < n; i++) + if (uid >= p[i].start && uid < p[i].start + p[i].nr) + return true; + + return false; +} diff --git a/src/shared/uid-range.h b/src/shared/uid-range.h new file mode 100644 index 0000000..ef168cd --- /dev/null +++ b/src/shared/uid-range.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <sys/types.h> + +typedef struct UidRange { + uid_t start, nr; +} UidRange; + +int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr); +int uid_range_add_str(UidRange **p, unsigned *n, const char *s); + +int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid); +bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid); diff --git a/src/shared/unit-file.c b/src/shared/unit-file.c new file mode 100644 index 0000000..4c30719 --- /dev/null +++ b/src/shared/unit-file.c @@ -0,0 +1,601 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "sd-id128.h" + +#include "dirent-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "macro.h" +#include "path-lookup.h" +#include "set.h" +#include "special.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "unit-file.h" + +bool unit_type_may_alias(UnitType type) { + return IN_SET(type, + UNIT_SERVICE, + UNIT_SOCKET, + UNIT_TARGET, + UNIT_DEVICE, + UNIT_TIMER, + UNIT_PATH); +} + +bool unit_type_may_template(UnitType type) { + return IN_SET(type, + UNIT_SERVICE, + UNIT_SOCKET, + UNIT_TARGET, + UNIT_TIMER, + UNIT_PATH); +} + +int unit_symlink_name_compatible(const char *symlink, const char *target, bool instance_propagation) { + _cleanup_free_ char *template = NULL; + int r, un_type1, un_type2; + + un_type1 = unit_name_classify(symlink); + + /* The straightforward case: the symlink name matches the target and we have a valid unit */ + if (streq(symlink, target) && + (un_type1 & (UNIT_NAME_PLAIN | UNIT_NAME_INSTANCE))) + return 1; + + r = unit_name_template(symlink, &template); + if (r == -EINVAL) + return 0; /* Not a template */ + if (r < 0) + return r; + + un_type2 = unit_name_classify(target); + + /* An instance name points to a target that is just the template name */ + if (un_type1 == UNIT_NAME_INSTANCE && + un_type2 == UNIT_NAME_TEMPLATE && + streq(template, target)) + return 1; + + /* foo@.target.requires/bar@.service: instance will be propagated */ + if (instance_propagation && + un_type1 == UNIT_NAME_TEMPLATE && + un_type2 == UNIT_NAME_TEMPLATE && + streq(template, target)) + return 1; + + return 0; +} + +int unit_validate_alias_symlink_and_warn(const char *filename, const char *target) { + const char *src, *dst; + _cleanup_free_ char *src_instance = NULL, *dst_instance = NULL; + UnitType src_unit_type, dst_unit_type; + int src_name_type, dst_name_type; + + /* Check if the *alias* symlink is valid. This applies to symlinks like + * /etc/systemd/system/dbus.service → dbus-broker.service, but not to .wants or .requires symlinks + * and such. Neither does this apply to symlinks which *link* units, i.e. symlinks to outside of the + * unit lookup path. + * + * -EINVAL is returned if the something is wrong with the source filename or the source unit type is + * not allowed to symlink, + * -EXDEV if the target filename is not a valid unit name or doesn't match the source. + */ + + src = basename(filename); + dst = basename(target); + + /* src checks */ + + src_name_type = unit_name_to_instance(src, &src_instance); + if (src_name_type < 0) + return log_notice_errno(src_name_type, + "%s: not a valid unit name \"%s\": %m", filename, src); + + src_unit_type = unit_name_to_type(src); + assert(src_unit_type >= 0); /* unit_name_to_instance() checked the suffix already */ + + if (!unit_type_may_alias(src_unit_type)) + return log_notice_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: symlinks are not allowed for units of this type, rejecting.", + filename); + + if (src_name_type != UNIT_NAME_PLAIN && + !unit_type_may_template(src_unit_type)) + return log_notice_errno(SYNTHETIC_ERRNO(EINVAL), + "%s: templates not allowed for %s units, rejecting.", + filename, unit_type_to_string(src_unit_type)); + + /* dst checks */ + + dst_name_type = unit_name_to_instance(dst, &dst_instance); + if (dst_name_type < 0) + return log_notice_errno(dst_name_type == -EINVAL ? SYNTHETIC_ERRNO(EXDEV) : dst_name_type, + "%s points to \"%s\" which is not a valid unit name: %m", + filename, dst); + + if (!(dst_name_type == src_name_type || + (src_name_type == UNIT_NAME_INSTANCE && dst_name_type == UNIT_NAME_TEMPLATE))) + return log_notice_errno(SYNTHETIC_ERRNO(EXDEV), + "%s: symlink target name type \"%s\" does not match source, rejecting.", + filename, dst); + + if (dst_name_type == UNIT_NAME_INSTANCE) { + assert(src_instance); + assert(dst_instance); + if (!streq(src_instance, dst_instance)) + return log_notice_errno(SYNTHETIC_ERRNO(EXDEV), + "%s: unit symlink target \"%s\" instance name doesn't match, rejecting.", + filename, dst); + } + + dst_unit_type = unit_name_to_type(dst); + if (dst_unit_type != src_unit_type) + return log_notice_errno(SYNTHETIC_ERRNO(EXDEV), + "%s: symlink target \"%s\" has incompatible suffix, rejecting.", + filename, dst); + + return 0; +} + +#define FOLLOW_MAX 8 + +static int unit_ids_map_get( + Hashmap *unit_ids_map, + const char *unit_name, + const char **ret_fragment_path) { + + /* Resolve recursively until we hit an absolute path, i.e. a non-aliased unit. + * + * We distinguish the case where unit_name was not found in the hashmap at all, and the case where + * some symlink was broken. + * + * If a symlink target points to an instance name, then we also check for the template. */ + + const char *id = NULL; + int r; + + for (unsigned n = 0; n < FOLLOW_MAX; n++) { + const char *t = hashmap_get(unit_ids_map, id ?: unit_name); + if (!t) { + _cleanup_free_ char *template = NULL; + + if (!id) + return -ENOENT; + + r = unit_name_template(id, &template); + if (r == -EINVAL) + return -ENXIO; /* we failed to find the symlink target */ + if (r < 0) + return log_error_errno(r, "Failed to determine template name for %s: %m", id); + + t = hashmap_get(unit_ids_map, template); + if (!t) + return -ENXIO; + + /* We successfully switched from instanced name to a template, let's continue */ + } + + if (path_is_absolute(t)) { + if (ret_fragment_path) + *ret_fragment_path = t; + return 0; + } + + id = t; + } + + return -ELOOP; +} + +static bool lookup_paths_mtime_exclude(const LookupPaths *lp, const char *path) { + /* Paths that are under our exclusive control. Users shall not alter those directly. */ + + return streq_ptr(path, lp->generator) || + streq_ptr(path, lp->generator_early) || + streq_ptr(path, lp->generator_late) || + streq_ptr(path, lp->transient) || + streq_ptr(path, lp->persistent_control) || + streq_ptr(path, lp->runtime_control); +} + +#define HASH_KEY SD_ID128_MAKE(4e,86,1b,e3,39,b3,40,46,98,5d,b8,11,34,8f,c3,c1) + +bool lookup_paths_timestamp_hash_same(const LookupPaths *lp, uint64_t timestamp_hash, uint64_t *ret_new) { + struct siphash state; + + siphash24_init(&state, HASH_KEY.bytes); + + char **dir; + STRV_FOREACH(dir, (char**) lp->search_path) { + struct stat st; + + if (lookup_paths_mtime_exclude(lp, *dir)) + continue; + + /* Determine the latest lookup path modification time */ + if (stat(*dir, &st) < 0) { + if (errno == ENOENT) + continue; + + log_debug_errno(errno, "Failed to stat %s, ignoring: %m", *dir); + continue; + } + + siphash24_compress_usec_t(timespec_load(&st.st_mtim), &state); + } + + uint64_t updated = siphash24_finalize(&state); + if (ret_new) + *ret_new = updated; + if (updated != timestamp_hash) + log_debug("Modification times have changed, need to update cache."); + return updated == timestamp_hash; +} + +int unit_file_build_name_map( + const LookupPaths *lp, + uint64_t *cache_timestamp_hash, + Hashmap **unit_ids_map, + Hashmap **unit_names_map, + Set **path_cache) { + + /* Build two mappings: any name → main unit (i.e. the end result of symlink resolution), unit name → + * all aliases (i.e. the entry for a given key is a a list of all names which point to this key). The + * key is included in the value iff we saw a file or symlink with that name. In other words, if we + * have a key, but it is not present in the value for itself, there was an alias pointing to it, but + * the unit itself is not loadable. + * + * At the same, build a cache of paths where to find units. The non-const parameters are for input + * and output. Existing contents will be freed before the new contents are stored. + */ + + _cleanup_hashmap_free_ Hashmap *ids = NULL, *names = NULL; + _cleanup_set_free_free_ Set *paths = NULL; + uint64_t timestamp_hash; + char **dir; + int r; + + /* Before doing anything, check if the timestamp hash that was passed is still valid. + * If yes, do nothing. */ + if (cache_timestamp_hash && + lookup_paths_timestamp_hash_same(lp, *cache_timestamp_hash, ×tamp_hash)) + return 0; + + /* The timestamp hash is now set based on the mtimes from before when we start reading files. + * If anything is modified concurrently, we'll consider the cache outdated. */ + + if (path_cache) { + paths = set_new(&path_hash_ops_free); + if (!paths) + return log_oom(); + } + + STRV_FOREACH(dir, (char**) lp->search_path) { + struct dirent *de; + _cleanup_closedir_ DIR *d = NULL; + + d = opendir(*dir); + if (!d) { + if (errno != ENOENT) + log_warning_errno(errno, "Failed to open \"%s\", ignoring: %m", *dir); + continue; + } + + FOREACH_DIRENT_ALL(de, d, log_warning_errno(errno, "Failed to read \"%s\", ignoring: %m", *dir)) { + char *filename; + _cleanup_free_ char *_filename_free = NULL, *simplified = NULL; + const char *suffix, *dst = NULL; + bool valid_unit_name; + + valid_unit_name = unit_name_is_valid(de->d_name, UNIT_NAME_ANY); + + /* We only care about valid units and dirs with certain suffixes, let's ignore the + * rest. */ + if (!valid_unit_name && + !ENDSWITH_SET(de->d_name, ".wants", ".requires", ".d")) + continue; + + filename = path_join(*dir, de->d_name); + if (!filename) + return log_oom(); + + if (paths) { + r = set_consume(paths, filename); + if (r < 0) + return log_oom(); + /* We will still use filename below. This is safe because we know the set + * holds a reference. */ + } else + _filename_free = filename; /* Make sure we free the filename. */ + + if (!valid_unit_name) + continue; + assert_se(suffix = strrchr(de->d_name, '.')); + + /* search_path is ordered by priority (highest first). If the name is already mapped + * to something (incl. itself), it means that we have already seen it, and we should + * ignore it here. */ + if (hashmap_contains(ids, de->d_name)) + continue; + + dirent_ensure_type(d, de); + if (de->d_type == DT_LNK) { + /* We don't explicitly check for alias loops here. unit_ids_map_get() which + * limits the number of hops should be used to access the map. */ + + _cleanup_free_ char *target = NULL; + + r = readlinkat_malloc(dirfd(d), de->d_name, &target); + if (r < 0) { + log_warning_errno(r, "Failed to read symlink %s/%s, ignoring: %m", + *dir, de->d_name); + continue; + } + + const bool is_abs = path_is_absolute(target); + if (lp->root_dir || !is_abs) { + char *target_abs = path_join(is_abs ? lp->root_dir : *dir, target); + if (!target_abs) + return log_oom(); + + free_and_replace(target, target_abs); + } + + /* Get rid of "." and ".." components in target path */ + r = chase_symlinks(target, lp->root_dir, CHASE_NOFOLLOW | CHASE_NONEXISTENT, &simplified, NULL); + if (r < 0) { + log_warning_errno(r, "Failed to resolve symlink %s pointing to %s, ignoring: %m", + filename, target); + continue; + } + + /* Check if the symlink goes outside of our search path. + * If yes, it's a linked unit file or mask, and we don't care about the target name. + * Let's just store the link destination directly. + * If not, let's verify that it's a good symlink. */ + char *tail = path_startswith_strv(simplified, lp->search_path); + if (tail) { + bool self_alias; + + dst = basename(simplified); + self_alias = streq(dst, de->d_name); + + if (is_path(tail)) + log_full(self_alias ? LOG_DEBUG : LOG_WARNING, + "Suspicious symlink %s→%s, treating as alias.", + filename, simplified); + + r = unit_validate_alias_symlink_and_warn(filename, simplified); + if (r < 0) + continue; + + if (self_alias) { + /* A self-alias that has no effect */ + log_debug("%s: self-alias: %s/%s → %s, ignoring.", + __func__, *dir, de->d_name, dst); + continue; + } + + log_debug("%s: alias: %s/%s → %s", __func__, *dir, de->d_name, dst); + } else { + dst = simplified; + + log_debug("%s: linked unit file: %s/%s → %s", __func__, *dir, de->d_name, dst); + } + + } else { + dst = filename; + log_debug("%s: normal unit file: %s", __func__, dst); + } + + r = hashmap_put_strdup(&ids, de->d_name, dst); + if (r < 0) + return log_warning_errno(r, "Failed to add entry to hashmap (%s→%s): %m", + de->d_name, dst); + } + } + + /* Let's also put the names in the reverse db. */ + const char *dummy, *src; + HASHMAP_FOREACH_KEY(dummy, src, ids) { + const char *dst; + + r = unit_ids_map_get(ids, src, &dst); + if (r < 0) + continue; + + if (null_or_empty_path(dst) != 0) + continue; + + /* Do not treat instance symlinks that point to the template as aliases */ + if (unit_name_is_valid(basename(dst), UNIT_NAME_TEMPLATE) && + unit_name_is_valid(src, UNIT_NAME_INSTANCE)) + continue; + + r = string_strv_hashmap_put(&names, basename(dst), src); + if (r < 0) + return log_warning_errno(r, "Failed to add entry to hashmap (%s→%s): %m", + basename(dst), src); + } + + if (cache_timestamp_hash) + *cache_timestamp_hash = timestamp_hash; + + hashmap_free_and_replace(*unit_ids_map, ids); + hashmap_free_and_replace(*unit_names_map, names); + if (path_cache) + set_free_and_replace(*path_cache, paths); + + return 1; +} + +int unit_file_find_fragment( + Hashmap *unit_ids_map, + Hashmap *unit_name_map, + const char *unit_name, + const char **ret_fragment_path, + Set **ret_names) { + + const char *fragment = NULL; + _cleanup_free_ char *template = NULL, *instance = NULL; + _cleanup_set_free_free_ Set *names = NULL; + char **t, **nnn; + int r, name_type; + + /* Finds a fragment path, and returns the set of names: + * if we have …/foo.service and …/foo-alias.service→foo.service, + * and …/foo@.service and …/foo-alias@.service→foo@.service, + * and …/foo@inst.service, + * this should return: + * foo.service → …/foo.service, {foo.service, foo-alias.service}, + * foo-alias.service → …/foo.service, {foo.service, foo-alias.service}, + * foo@.service → …/foo@.service, {foo@.service, foo-alias@.service}, + * foo-alias@.service → …/foo@.service, {foo@.service, foo-alias@.service}, + * foo@bar.service → …/foo@.service, {foo@bar.service, foo-alias@bar.service}, + * foo-alias@bar.service → …/foo@.service, {foo@bar.service, foo-alias@bar.service}, + * foo-alias@inst.service → …/foo@inst.service, {foo@inst.service, foo-alias@inst.service}. + */ + + name_type = unit_name_to_instance(unit_name, &instance); + if (name_type < 0) + return name_type; + + names = set_new(&string_hash_ops); + if (!names) + return -ENOMEM; + + /* The unit always has its own name if it's not a template. */ + if (IN_SET(name_type, UNIT_NAME_PLAIN, UNIT_NAME_INSTANCE)) { + r = set_put_strdup(&names, unit_name); + if (r < 0) + return r; + } + + /* First try to load fragment under the original name */ + r = unit_ids_map_get(unit_ids_map, unit_name, &fragment); + if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO)) + return log_debug_errno(r, "Cannot load unit %s: %m", unit_name); + + if (fragment) { + /* Add any aliases of the original name to the set of names */ + nnn = hashmap_get(unit_name_map, basename(fragment)); + STRV_FOREACH(t, nnn) { + if (name_type == UNIT_NAME_INSTANCE && unit_name_is_valid(*t, UNIT_NAME_TEMPLATE)) { + char *inst; + + r = unit_name_replace_instance(*t, instance, &inst); + if (r < 0) + return log_debug_errno(r, "Cannot build instance name %s+%s: %m", *t, instance); + + if (!streq(unit_name, inst)) + log_debug("%s: %s has alias %s", __func__, unit_name, inst); + + log_info("%s: %s+%s → %s", __func__, *t, instance, inst); + r = set_consume(names, inst); + } else { + if (!streq(unit_name, *t)) + log_debug("%s: %s has alias %s", __func__, unit_name, *t); + + r = set_put_strdup(&names, *t); + } + if (r < 0) + return r; + } + } + + if (!fragment && name_type == UNIT_NAME_INSTANCE) { + /* Look for a fragment under the template name */ + + r = unit_name_template(unit_name, &template); + if (r < 0) + return log_error_errno(r, "Failed to determine template name: %m"); + + r = unit_ids_map_get(unit_ids_map, template, &fragment); + if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO)) + return log_debug_errno(r, "Cannot load template %s: %m", template); + + if (fragment) { + /* Add any aliases of the original name to the set of names */ + nnn = hashmap_get(unit_name_map, basename(fragment)); + STRV_FOREACH(t, nnn) { + _cleanup_free_ char *inst = NULL; + const char *inst_fragment = NULL; + + r = unit_name_replace_instance(*t, instance, &inst); + if (r < 0) + return log_debug_errno(r, "Cannot build instance name %s+%s: %m", template, instance); + + /* Exclude any aliases that point in some other direction. */ + r = unit_ids_map_get(unit_ids_map, inst, &inst_fragment); + if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO)) + return log_debug_errno(r, "Cannot find instance fragment %s: %m", inst); + + if (inst_fragment && + !streq(basename(inst_fragment), basename(fragment))) { + log_debug("Instance %s has fragment %s and is not an alias of %s.", + inst, inst_fragment, unit_name); + continue; + } + + if (!streq(unit_name, inst)) + log_debug("%s: %s has alias %s", __func__, unit_name, inst); + r = set_consume(names, TAKE_PTR(inst)); + if (r < 0) + return r; + } + } + } + + *ret_fragment_path = fragment; + *ret_names = TAKE_PTR(names); + + // FIXME: if instance, consider any unit names with different template name + return 0; +} + +static const char * const rlmap[] = { + "emergency", SPECIAL_EMERGENCY_TARGET, + "-b", SPECIAL_EMERGENCY_TARGET, + "rescue", SPECIAL_RESCUE_TARGET, + "single", SPECIAL_RESCUE_TARGET, + "-s", SPECIAL_RESCUE_TARGET, + "s", SPECIAL_RESCUE_TARGET, + "S", SPECIAL_RESCUE_TARGET, + "1", SPECIAL_RESCUE_TARGET, + "2", SPECIAL_MULTI_USER_TARGET, + "3", SPECIAL_MULTI_USER_TARGET, + "4", SPECIAL_MULTI_USER_TARGET, + "5", SPECIAL_GRAPHICAL_TARGET, + NULL +}; + +static const char * const rlmap_initrd[] = { + "emergency", SPECIAL_EMERGENCY_TARGET, + "rescue", SPECIAL_RESCUE_TARGET, + NULL +}; + +const char* runlevel_to_target(const char *word) { + const char * const *rlmap_ptr; + size_t i; + + if (!word) + return NULL; + + if (in_initrd()) { + word = startswith(word, "rd."); + if (!word) + return NULL; + } + + rlmap_ptr = in_initrd() ? rlmap_initrd : rlmap; + + for (i = 0; rlmap_ptr[i]; i += 2) + if (streq(word, rlmap_ptr[i])) + return rlmap_ptr[i+1]; + + return NULL; +} diff --git a/src/shared/unit-file.h b/src/shared/unit-file.h new file mode 100644 index 0000000..5463b0a --- /dev/null +++ b/src/shared/unit-file.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "hashmap.h" +#include "time-util.h" +#include "unit-name.h" + +typedef enum UnitFileState UnitFileState; +typedef enum UnitFileScope UnitFileScope; +typedef struct LookupPaths LookupPaths; + +enum UnitFileState { + UNIT_FILE_ENABLED, + UNIT_FILE_ENABLED_RUNTIME, + UNIT_FILE_LINKED, + UNIT_FILE_LINKED_RUNTIME, + UNIT_FILE_ALIAS, + UNIT_FILE_MASKED, + UNIT_FILE_MASKED_RUNTIME, + UNIT_FILE_STATIC, + UNIT_FILE_DISABLED, + UNIT_FILE_INDIRECT, + UNIT_FILE_GENERATED, + UNIT_FILE_TRANSIENT, + UNIT_FILE_BAD, + _UNIT_FILE_STATE_MAX, + _UNIT_FILE_STATE_INVALID = -1 +}; + +enum UnitFileScope { + UNIT_FILE_SYSTEM, + UNIT_FILE_GLOBAL, + UNIT_FILE_USER, + _UNIT_FILE_SCOPE_MAX, + _UNIT_FILE_SCOPE_INVALID = -1 +}; + +bool unit_type_may_alias(UnitType type) _const_; +bool unit_type_may_template(UnitType type) _const_; + +int unit_symlink_name_compatible(const char *symlink, const char *target, bool instance_propagation); +int unit_validate_alias_symlink_and_warn(const char *filename, const char *target); + +bool lookup_paths_timestamp_hash_same(const LookupPaths *lp, uint64_t timestamp_hash, uint64_t *ret_new); +int unit_file_build_name_map( + const LookupPaths *lp, + uint64_t *cache_timestamp_hash, + Hashmap **unit_ids_map, + Hashmap **unit_names_map, + Set **path_cache); + +int unit_file_find_fragment( + Hashmap *unit_ids_map, + Hashmap *unit_name_map, + const char *unit_name, + const char **ret_fragment_path, + Set **ret_names); + +const char* runlevel_to_target(const char *rl); diff --git a/src/shared/user-record-nss.c b/src/shared/user-record-nss.c new file mode 100644 index 0000000..88b8fc2 --- /dev/null +++ b/src/shared/user-record-nss.c @@ -0,0 +1,531 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "errno-util.h" +#include "format-util.h" +#include "libcrypt-util.h" +#include "strv.h" +#include "user-record-nss.h" +#include "user-util.h" +#include "utf8.h" + +#define SET_IF(field, condition, value, fallback) \ + field = (condition) ? (value) : (fallback) + +static inline const char* utf8_only(const char *s) { + return s && utf8_is_valid(s) ? s : NULL; +} + +static inline int strv_extend_strv_utf8_only(char ***dst, char **src, bool filter_duplicates) { + _cleanup_free_ char **t = NULL; + size_t l, j = 0; + + /* First, do a shallow copy of s, filtering for only valid utf-8 strings */ + l = strv_length(src); + t = new(char*, l + 1); + if (!t) + return -ENOMEM; + + for (size_t i = 0; i < l; i++) + if (utf8_is_valid(src[i])) + t[j++] = src[i]; + if (j == 0) + return 0; + + t[j] = NULL; + return strv_extend_strv(dst, t, filter_duplicates); +} + +int nss_passwd_to_user_record( + const struct passwd *pwd, + const struct spwd *spwd, + UserRecord **ret) { + + _cleanup_(user_record_unrefp) UserRecord *hr = NULL; + int r; + + assert(pwd); + assert(ret); + + if (isempty(pwd->pw_name)) + return -EINVAL; + + if (spwd && !streq_ptr(spwd->sp_namp, pwd->pw_name)) + return -EINVAL; + + hr = user_record_new(); + if (!hr) + return -ENOMEM; + + r = free_and_strdup(&hr->user_name, pwd->pw_name); + if (r < 0) + return r; + + /* Some bad NSS modules synthesize GECOS fields with embedded ":" or "\n" characters, which are not + * something we can output in /etc/passwd compatible format, since these are record separators + * there. We normally refuse that, but we need to maintain compatibility with arbitrary NSS modules, + * hence let's do what glibc does: mangle the data to fit the format. */ + if (isempty(pwd->pw_gecos) || streq_ptr(pwd->pw_gecos, hr->user_name)) + hr->real_name = mfree(hr->real_name); + else if (valid_gecos(pwd->pw_gecos)) { + r = free_and_strdup(&hr->real_name, pwd->pw_gecos); + if (r < 0) + return r; + } else { + _cleanup_free_ char *mangled = NULL; + + mangled = mangle_gecos(pwd->pw_gecos); + if (!mangled) + return -ENOMEM; + + free_and_replace(hr->real_name, mangled); + } + + r = free_and_strdup(&hr->home_directory, utf8_only(empty_to_null(pwd->pw_dir))); + if (r < 0) + return r; + + r = free_and_strdup(&hr->shell, utf8_only(empty_to_null(pwd->pw_shell))); + if (r < 0) + return r; + + hr->uid = pwd->pw_uid; + hr->gid = pwd->pw_gid; + + if (spwd && + looks_like_hashed_password(utf8_only(spwd->sp_pwdp))) { /* Ignore locked, disabled, and mojibake passwords */ + strv_free_erase(hr->hashed_password); + hr->hashed_password = strv_new(spwd->sp_pwdp); + if (!hr->hashed_password) + return -ENOMEM; + } else + hr->hashed_password = strv_free_erase(hr->hashed_password); + + /* shadow-utils suggests using "chage -E 0" (or -E 1, depending on which man page you check) + * for locking a whole account, hence check for that. Note that it also defines a way to lock + * just a password instead of the whole account, but that's mostly pointless in times of + * password-less authorization, hence let's not bother. */ + + SET_IF(hr->locked, + spwd && spwd->sp_expire >= 0, + spwd->sp_expire <= 1, -1); + + SET_IF(hr->not_after_usec, + spwd && spwd->sp_expire > 1 && (uint64_t) spwd->sp_expire < (UINT64_MAX-1)/USEC_PER_DAY, + spwd->sp_expire * USEC_PER_DAY, UINT64_MAX); + + SET_IF(hr->password_change_now, + spwd && spwd->sp_lstchg >= 0, + spwd->sp_lstchg == 0, -1); + + SET_IF(hr->last_password_change_usec, + spwd && spwd->sp_lstchg > 0 && (uint64_t) spwd->sp_lstchg <= (UINT64_MAX-1)/USEC_PER_DAY, + spwd->sp_lstchg * USEC_PER_DAY, UINT64_MAX); + + SET_IF(hr->password_change_min_usec, + spwd && spwd->sp_min > 0 && (uint64_t) spwd->sp_min <= (UINT64_MAX-1)/USEC_PER_DAY, + spwd->sp_min * USEC_PER_DAY, UINT64_MAX); + + SET_IF(hr->password_change_max_usec, + spwd && spwd->sp_max > 0 && (uint64_t) spwd->sp_max <= (UINT64_MAX-1)/USEC_PER_DAY, + spwd->sp_max * USEC_PER_DAY, UINT64_MAX); + + SET_IF(hr->password_change_warn_usec, + spwd && spwd->sp_warn > 0 && (uint64_t) spwd->sp_warn <= (UINT64_MAX-1)/USEC_PER_DAY, + spwd->sp_warn * USEC_PER_DAY, UINT64_MAX); + + SET_IF(hr->password_change_inactive_usec, + spwd && spwd->sp_inact > 0 && (uint64_t) spwd->sp_inact <= (UINT64_MAX-1)/USEC_PER_DAY, + spwd->sp_inact * USEC_PER_DAY, UINT64_MAX); + + hr->json = json_variant_unref(hr->json); + r = json_build(&hr->json, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(hr->user_name)), + JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(hr->uid)), + JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(hr->gid)), + JSON_BUILD_PAIR_CONDITION(hr->real_name, "realName", JSON_BUILD_STRING(hr->real_name)), + JSON_BUILD_PAIR_CONDITION(hr->home_directory, "homeDirectory", JSON_BUILD_STRING(hr->home_directory)), + JSON_BUILD_PAIR_CONDITION(hr->shell, "shell", JSON_BUILD_STRING(hr->shell)), + JSON_BUILD_PAIR_CONDITION(!strv_isempty(hr->hashed_password), "privileged", JSON_BUILD_OBJECT(JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_STRV(hr->hashed_password)))), + JSON_BUILD_PAIR_CONDITION(hr->locked >= 0, "locked", JSON_BUILD_BOOLEAN(hr->locked)), + JSON_BUILD_PAIR_CONDITION(hr->not_after_usec != UINT64_MAX, "notAfterUSec", JSON_BUILD_UNSIGNED(hr->not_after_usec)), + JSON_BUILD_PAIR_CONDITION(hr->password_change_now >= 0, "passwordChangeNow", JSON_BUILD_BOOLEAN(hr->password_change_now)), + JSON_BUILD_PAIR_CONDITION(hr->last_password_change_usec != UINT64_MAX, "lastPasswordChangeUSec", JSON_BUILD_UNSIGNED(hr->last_password_change_usec)), + JSON_BUILD_PAIR_CONDITION(hr->password_change_min_usec != UINT64_MAX, "passwordChangeMinUSec", JSON_BUILD_UNSIGNED(hr->password_change_min_usec)), + JSON_BUILD_PAIR_CONDITION(hr->password_change_max_usec != UINT64_MAX, "passwordChangeMaxUSec", JSON_BUILD_UNSIGNED(hr->password_change_max_usec)), + JSON_BUILD_PAIR_CONDITION(hr->password_change_warn_usec != UINT64_MAX, "passwordChangeWarnUSec", JSON_BUILD_UNSIGNED(hr->password_change_warn_usec)), + JSON_BUILD_PAIR_CONDITION(hr->password_change_inactive_usec != UINT64_MAX, "passwordChangeInactiveUSec", JSON_BUILD_UNSIGNED(hr->password_change_inactive_usec)))); + + if (r < 0) + return r; + + hr->mask = USER_RECORD_REGULAR | + (!strv_isempty(hr->hashed_password) ? USER_RECORD_PRIVILEGED : 0); + + *ret = TAKE_PTR(hr); + return 0; +} + +int nss_spwd_for_passwd(const struct passwd *pwd, struct spwd *ret_spwd, char **ret_buffer) { + size_t buflen = 4096; + int r; + + assert(pwd); + assert(ret_spwd); + assert(ret_buffer); + + for (;;) { + _cleanup_free_ char *buf = NULL; + struct spwd spwd, *result; + + buf = malloc(buflen); + if (!buf) + return -ENOMEM; + + r = getspnam_r(pwd->pw_name, &spwd, buf, buflen, &result); + if (r == 0) { + if (!result) + return -ESRCH; + + *ret_spwd = *result; + *ret_buffer = TAKE_PTR(buf); + return 0; + } + if (r < 0) + return -EIO; /* Weird, this should not return negative! */ + if (r != ERANGE) + return -r; + + if (buflen > SIZE_MAX / 2) + return -ERANGE; + + buflen *= 2; + buf = mfree(buf); + } +} + +int nss_user_record_by_name( + const char *name, + bool with_shadow, + UserRecord **ret) { + + _cleanup_free_ char *buf = NULL, *sbuf = NULL; + struct passwd pwd, *result; + bool incomplete = false; + size_t buflen = 4096; + struct spwd spwd, *sresult = NULL; + int r; + + assert(name); + assert(ret); + + for (;;) { + buf = malloc(buflen); + if (!buf) + return -ENOMEM; + + r = getpwnam_r(name, &pwd, buf, buflen, &result); + if (r == 0) { + if (!result) + return -ESRCH; + + break; + } + + if (r < 0) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getpwnam_r() returned a negative value"); + if (r != ERANGE) + return -r; + + if (buflen > SIZE_MAX / 2) + return -ERANGE; + + buflen *= 2; + buf = mfree(buf); + } + + if (with_shadow) { + r = nss_spwd_for_passwd(result, &spwd, &sbuf); + if (r < 0) { + log_debug_errno(r, "Failed to do shadow lookup for user %s, ignoring: %m", name); + incomplete = ERRNO_IS_PRIVILEGE(r); + } else + sresult = &spwd; + } else + incomplete = true; + + r = nss_passwd_to_user_record(result, sresult, ret); + if (r < 0) + return r; + + (*ret)->incomplete = incomplete; + return 0; +} + +int nss_user_record_by_uid( + uid_t uid, + bool with_shadow, + UserRecord **ret) { + + _cleanup_free_ char *buf = NULL, *sbuf = NULL; + struct passwd pwd, *result; + bool incomplete = false; + size_t buflen = 4096; + struct spwd spwd, *sresult = NULL; + int r; + + assert(ret); + + for (;;) { + buf = malloc(buflen); + if (!buf) + return -ENOMEM; + + r = getpwuid_r(uid, &pwd, buf, buflen, &result); + if (r == 0) { + if (!result) + return -ESRCH; + + break; + } + if (r < 0) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getpwuid_r() returned a negative value"); + if (r != ERANGE) + return -r; + + if (buflen > SIZE_MAX / 2) + return -ERANGE; + + buflen *= 2; + buf = mfree(buf); + } + + if (with_shadow) { + r = nss_spwd_for_passwd(result, &spwd, &sbuf); + if (r < 0) { + log_debug_errno(r, "Failed to do shadow lookup for UID " UID_FMT ", ignoring: %m", uid); + incomplete = ERRNO_IS_PRIVILEGE(r); + } else + sresult = &spwd; + } else + incomplete = true; + + r = nss_passwd_to_user_record(result, sresult, ret); + if (r < 0) + return r; + + (*ret)->incomplete = incomplete; + return 0; +} + +int nss_group_to_group_record( + const struct group *grp, + const struct sgrp *sgrp, + GroupRecord **ret) { + + _cleanup_(group_record_unrefp) GroupRecord *g = NULL; + int r; + + assert(grp); + assert(ret); + + if (isempty(grp->gr_name)) + return -EINVAL; + + if (sgrp && !streq_ptr(sgrp->sg_namp, grp->gr_name)) + return -EINVAL; + + g = group_record_new(); + if (!g) + return -ENOMEM; + + g->group_name = strdup(grp->gr_name); + if (!g->group_name) + return -ENOMEM; + + r = strv_extend_strv_utf8_only(&g->members, grp->gr_mem, false); + if (r < 0) + return r; + + g->gid = grp->gr_gid; + + if (sgrp) { + if (looks_like_hashed_password(utf8_only(sgrp->sg_passwd))) { + g->hashed_password = strv_new(sgrp->sg_passwd); + if (!g->hashed_password) + return -ENOMEM; + } + + r = strv_extend_strv_utf8_only(&g->members, sgrp->sg_mem, true); + if (r < 0) + return r; + + r = strv_extend_strv_utf8_only(&g->administrators, sgrp->sg_adm, false); + if (r < 0) + return r; + } + + r = json_build(&g->json, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(g->group_name)), + JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(g->gid)), + JSON_BUILD_PAIR_CONDITION(!strv_isempty(g->members), "members", JSON_BUILD_STRV(g->members)), + JSON_BUILD_PAIR_CONDITION(!strv_isempty(g->hashed_password), "privileged", JSON_BUILD_OBJECT(JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_STRV(g->hashed_password)))), + JSON_BUILD_PAIR_CONDITION(!strv_isempty(g->administrators), "administrators", JSON_BUILD_STRV(g->administrators)))); + if (r < 0) + return r; + + g->mask = USER_RECORD_REGULAR | + (!strv_isempty(g->hashed_password) ? USER_RECORD_PRIVILEGED : 0); + + *ret = TAKE_PTR(g); + return 0; +} + +int nss_sgrp_for_group(const struct group *grp, struct sgrp *ret_sgrp, char **ret_buffer) { + size_t buflen = 4096; + int r; + + assert(grp); + assert(ret_sgrp); + assert(ret_buffer); + + for (;;) { + _cleanup_free_ char *buf = NULL; + struct sgrp sgrp, *result; + + buf = malloc(buflen); + if (!buf) + return -ENOMEM; + + r = getsgnam_r(grp->gr_name, &sgrp, buf, buflen, &result); + if (r == 0) { + if (!result) + return -ESRCH; + + *ret_sgrp = *result; + *ret_buffer = TAKE_PTR(buf); + return 0; + } + if (r < 0) + return -EIO; /* Weird, this should not return negative! */ + if (r != ERANGE) + return -r; + + if (buflen > SIZE_MAX / 2) + return -ERANGE; + + buflen *= 2; + buf = mfree(buf); + } +} + +int nss_group_record_by_name( + const char *name, + bool with_shadow, + GroupRecord **ret) { + + _cleanup_free_ char *buf = NULL, *sbuf = NULL; + struct group grp, *result; + bool incomplete = false; + size_t buflen = 4096; + struct sgrp sgrp, *sresult = NULL; + int r; + + assert(name); + assert(ret); + + for (;;) { + buf = malloc(buflen); + if (!buf) + return -ENOMEM; + + r = getgrnam_r(name, &grp, buf, buflen, &result); + if (r == 0) { + if (!result) + return -ESRCH; + + break; + } + + if (r < 0) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getgrnam_r() returned a negative value"); + if (r != ERANGE) + return -r; + if (buflen > SIZE_MAX / 2) + return -ERANGE; + + buflen *= 2; + buf = mfree(buf); + } + + if (with_shadow) { + r = nss_sgrp_for_group(result, &sgrp, &sbuf); + if (r < 0) { + log_debug_errno(r, "Failed to do shadow lookup for group %s, ignoring: %m", result->gr_name); + incomplete = ERRNO_IS_PRIVILEGE(r); + } else + sresult = &sgrp; + } else + incomplete = true; + + r = nss_group_to_group_record(result, sresult, ret); + if (r < 0) + return r; + + (*ret)->incomplete = incomplete; + return 0; +} + +int nss_group_record_by_gid( + gid_t gid, + bool with_shadow, + GroupRecord **ret) { + + _cleanup_free_ char *buf = NULL, *sbuf = NULL; + struct group grp, *result; + bool incomplete = false; + size_t buflen = 4096; + struct sgrp sgrp, *sresult = NULL; + int r; + + assert(ret); + + for (;;) { + buf = malloc(buflen); + if (!buf) + return -ENOMEM; + + r = getgrgid_r(gid, &grp, buf, buflen, &result); + if (r == 0) { + if (!result) + return -ESRCH; + break; + } + + if (r < 0) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getgrgid_r() returned a negative value"); + if (r != ERANGE) + return -r; + if (buflen > SIZE_MAX / 2) + return -ERANGE; + + buflen *= 2; + buf = mfree(buf); + } + + if (with_shadow) { + r = nss_sgrp_for_group(result, &sgrp, &sbuf); + if (r < 0) { + log_debug_errno(r, "Failed to do shadow lookup for group %s, ignoring: %m", result->gr_name); + incomplete = ERRNO_IS_PRIVILEGE(r); + } else + sresult = &sgrp; + } else + incomplete = true; + + r = nss_group_to_group_record(result, sresult, ret); + if (r < 0) + return r; + + (*ret)->incomplete = incomplete; + return 0; +} diff --git a/src/shared/user-record-nss.h b/src/shared/user-record-nss.h new file mode 100644 index 0000000..22ab04d --- /dev/null +++ b/src/shared/user-record-nss.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <grp.h> +#include <gshadow.h> +#include <pwd.h> +#include <shadow.h> + +#include "group-record.h" +#include "user-record.h" + +/* Synthesize UserRecord and GroupRecord objects from NSS data */ + +int nss_passwd_to_user_record(const struct passwd *pwd, const struct spwd *spwd, UserRecord **ret); +int nss_spwd_for_passwd(const struct passwd *pwd, struct spwd *ret_spwd, char **ret_buffer); + +int nss_user_record_by_name(const char *name, bool with_shadow, UserRecord **ret); +int nss_user_record_by_uid(uid_t uid, bool with_shadow, UserRecord **ret); + +int nss_group_to_group_record(const struct group *grp, const struct sgrp *sgrp, GroupRecord **ret); +int nss_sgrp_for_group(const struct group *grp, struct sgrp *ret_sgrp, char **ret_buffer); + +int nss_group_record_by_name(const char *name, bool with_shadow, GroupRecord **ret); +int nss_group_record_by_gid(gid_t gid, bool with_shadow, GroupRecord **ret); diff --git a/src/shared/user-record-show.c b/src/shared/user-record-show.c new file mode 100644 index 0000000..2979028 --- /dev/null +++ b/src/shared/user-record-show.c @@ -0,0 +1,584 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "format-util.h" +#include "fs-util.h" +#include "process-util.h" +#include "rlimit-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "user-record-show.h" +#include "user-util.h" +#include "userdb.h" + +const char *user_record_state_color(const char *state) { + if (STR_IN_SET(state, "unfixated", "absent")) + return ansi_grey(); + else if (streq(state, "active")) + return ansi_highlight_green(); + else if (STR_IN_SET(state, "locked", "dirty")) + return ansi_highlight_yellow(); + + return NULL; +} + +void user_record_show(UserRecord *hr, bool show_full_group_info) { + const char *hd, *ip, *shell; + UserStorage storage; + usec_t t; + size_t k; + int r, b; + + printf(" User name: %s\n", + user_record_user_name_and_realm(hr)); + + if (hr->state) { + const char *color; + + color = user_record_state_color(hr->state); + + printf(" State: %s%s%s\n", + strempty(color), hr->state, color ? ansi_normal() : ""); + } + + printf(" Disposition: %s\n", user_disposition_to_string(user_record_disposition(hr))); + + if (hr->last_change_usec != USEC_INFINITY) { + char buf[FORMAT_TIMESTAMP_MAX]; + printf(" Last Change: %s\n", format_timestamp(buf, sizeof(buf), hr->last_change_usec)); + + if (hr->last_change_usec > now(CLOCK_REALTIME)) + printf(" %sModification time lies in the future, system clock wrong?%s\n", + ansi_highlight_yellow(), ansi_normal()); + } + + if (hr->last_password_change_usec != USEC_INFINITY && + hr->last_password_change_usec != hr->last_change_usec) { + char buf[FORMAT_TIMESTAMP_MAX]; + printf(" Last Passw.: %s\n", format_timestamp(buf, sizeof(buf), hr->last_password_change_usec)); + } + + r = user_record_test_blocked(hr); + switch (r) { + + case -ENOLCK: + printf(" Login OK: %sno%s (record is locked)\n", ansi_highlight_red(), ansi_normal()); + break; + + case -EL2HLT: + printf(" Login OK: %sno%s (record not valid yet))\n", ansi_highlight_red(), ansi_normal()); + break; + + case -EL3HLT: + printf(" Login OK: %sno%s (record not valid anymore))\n", ansi_highlight_red(), ansi_normal()); + break; + + case -ESTALE: + default: { + usec_t y; + + if (r < 0 && r != -ESTALE) { + errno = -r; + printf(" Login OK: %sno%s (%m)\n", ansi_highlight_red(), ansi_normal()); + break; + } + + if (is_nologin_shell(user_record_shell(hr))) { + printf(" Login OK: %sno%s (nologin shell)\n", ansi_highlight_red(), ansi_normal()); + break; + } + + y = user_record_ratelimit_next_try(hr); + if (y != USEC_INFINITY && y > now(CLOCK_REALTIME)) { + printf(" Login OK: %sno%s (ratelimit)\n", ansi_highlight_red(), ansi_normal()); + break; + } + + printf(" Login OK: %syes%s\n", ansi_highlight_green(), ansi_normal()); + break; + }} + + r = user_record_test_password_change_required(hr); + switch (r) { + + case -EKEYREVOKED: + printf(" Password OK: %schange now%s\n", ansi_highlight_yellow(), ansi_normal()); + break; + + case -EOWNERDEAD: + printf(" Password OK: %sexpired%s (change now!)\n", ansi_highlight_yellow(), ansi_normal()); + break; + + case -EKEYREJECTED: + printf(" Password OK: %sexpired%s (for good)\n", ansi_highlight_red(), ansi_normal()); + break; + + case -EKEYEXPIRED: + printf(" Password OK: %sexpires soon%s\n", ansi_highlight_yellow(), ansi_normal()); + break; + + case -ENETDOWN: + printf(" Password OK: %sno timestamp%s\n", ansi_highlight_red(), ansi_normal()); + break; + + case -EROFS: + printf(" Password OK: %schange not permitted%s\n", ansi_highlight_yellow(), ansi_normal()); + break; + + case -ESTALE: + printf(" Password OK: %slast password change in future%s\n", ansi_highlight_yellow(), ansi_normal()); + break; + + default: + if (r < 0) { + errno = -r; + printf(" Password OK: %sno%s (%m)\n", ansi_highlight_yellow(), ansi_normal()); + break; + } + + printf(" Password OK: %syes%s\n", ansi_highlight_green(), ansi_normal()); + break; + } + + if (uid_is_valid(hr->uid)) + printf(" UID: " UID_FMT "\n", hr->uid); + if (gid_is_valid(hr->gid)) { + if (show_full_group_info) { + _cleanup_(group_record_unrefp) GroupRecord *gr = NULL; + + r = groupdb_by_gid(hr->gid, 0, &gr); + if (r < 0) { + errno = -r; + printf(" GID: " GID_FMT " (unresolvable: %m)\n", hr->gid); + } else + printf(" GID: " GID_FMT " (%s)\n", hr->gid, gr->group_name); + } else + printf(" GID: " GID_FMT "\n", hr->gid); + } else if (uid_is_valid(hr->uid)) /* Show UID as GID if not separately configured */ + printf(" GID: " GID_FMT "\n", (gid_t) hr->uid); + + if (show_full_group_info) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + + r = membershipdb_by_user(hr->user_name, 0, &iterator); + if (r < 0) { + errno = -r; + printf(" Aux. Groups: (can't acquire: %m)\n"); + } else { + const char *prefix = " Aux. Groups:"; + + for (;;) { + _cleanup_free_ char *group = NULL; + + r = membershipdb_iterator_get(iterator, NULL, &group); + if (r == -ESRCH) + break; + if (r < 0) { + errno = -r; + printf("%s (can't iterate: %m)\n", prefix); + break; + } + + printf("%s %s\n", prefix, group); + prefix = " "; + } + } + } + + if (hr->real_name && !streq(hr->real_name, hr->user_name)) + printf(" Real Name: %s\n", hr->real_name); + + hd = user_record_home_directory(hr); + if (hd) + printf(" Directory: %s\n", hd); + + storage = user_record_storage(hr); + if (storage >= 0) /* Let's be political, and clarify which storage we like, and which we don't. About CIFS we don't complain. */ + printf(" Storage: %s%s\n", user_storage_to_string(storage), + storage == USER_LUKS ? " (strong encryption)" : + storage == USER_FSCRYPT ? " (weak encryption)" : + IN_SET(storage, USER_DIRECTORY, USER_SUBVOLUME) ? " (no encryption)" : ""); + + ip = user_record_image_path(hr); + if (ip && !streq_ptr(ip, hd)) + printf(" Image Path: %s\n", ip); + + b = user_record_removable(hr); + if (b >= 0) + printf(" Removable: %s\n", yes_no(b)); + + shell = user_record_shell(hr); + if (shell) + printf(" Shell: %s\n", shell); + + if (hr->email_address) + printf(" Email: %s\n", hr->email_address); + if (hr->location) + printf(" Location: %s\n", hr->location); + if (hr->password_hint) + printf(" Passw. Hint: %s\n", hr->password_hint); + if (hr->icon_name) + printf(" Icon Name: %s\n", hr->icon_name); + + if (hr->time_zone) + printf(" Time Zone: %s\n", hr->time_zone); + + if (hr->preferred_language) + printf(" Language: %s\n", hr->preferred_language); + + if (!strv_isempty(hr->environment)) { + char **i; + + STRV_FOREACH(i, hr->environment) { + printf(i == hr->environment ? + " Environment: %s\n" : + " %s\n", *i); + } + } + + if (hr->locked >= 0) + printf(" Locked: %s\n", yes_no(hr->locked)); + + if (hr->not_before_usec != UINT64_MAX) { + char buf[FORMAT_TIMESTAMP_MAX]; + printf(" Not Before: %s\n", format_timestamp(buf, sizeof(buf), hr->not_before_usec)); + } + + if (hr->not_after_usec != UINT64_MAX) { + char buf[FORMAT_TIMESTAMP_MAX]; + printf(" Not After: %s\n", format_timestamp(buf, sizeof(buf), hr->not_after_usec)); + } + + if (hr->umask != MODE_INVALID) + printf(" UMask: 0%03o\n", hr->umask); + + if (nice_is_valid(hr->nice_level)) + printf(" Nice: %i\n", hr->nice_level); + + for (int j = 0; j < _RLIMIT_MAX; j++) { + if (hr->rlimits[j]) + printf(" Limit: RLIMIT_%s=%" PRIu64 ":%" PRIu64 "\n", + rlimit_to_string(j), (uint64_t) hr->rlimits[j]->rlim_cur, (uint64_t) hr->rlimits[j]->rlim_max); + } + + if (hr->tasks_max != UINT64_MAX) + printf(" Tasks Max: %" PRIu64 "\n", hr->tasks_max); + + if (hr->memory_high != UINT64_MAX) { + char buf[FORMAT_BYTES_MAX]; + printf(" Memory High: %s\n", format_bytes(buf, sizeof(buf), hr->memory_high)); + } + + if (hr->memory_max != UINT64_MAX) { + char buf[FORMAT_BYTES_MAX]; + printf(" Memory Max: %s\n", format_bytes(buf, sizeof(buf), hr->memory_max)); + } + + if (hr->cpu_weight != UINT64_MAX) + printf(" CPU Weight: %" PRIu64 "\n", hr->cpu_weight); + + if (hr->io_weight != UINT64_MAX) + printf(" IO Weight: %" PRIu64 "\n", hr->io_weight); + + if (hr->access_mode != MODE_INVALID) + printf(" Access Mode: 0%03oo\n", user_record_access_mode(hr)); + + if (storage == USER_LUKS) { + printf("LUKS Discard: online=%s offline=%s\n", yes_no(user_record_luks_discard(hr)), yes_no(user_record_luks_offline_discard(hr))); + + if (!sd_id128_is_null(hr->luks_uuid)) + printf(" LUKS UUID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(hr->luks_uuid)); + if (!sd_id128_is_null(hr->partition_uuid)) + printf(" Part UUID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(hr->partition_uuid)); + if (!sd_id128_is_null(hr->file_system_uuid)) + printf(" FS UUID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(hr->file_system_uuid)); + + if (hr->file_system_type) + printf(" File System: %s\n", user_record_file_system_type(hr)); + + if (hr->luks_cipher) + printf(" LUKS Cipher: %s\n", hr->luks_cipher); + if (hr->luks_cipher_mode) + printf(" Cipher Mode: %s\n", hr->luks_cipher_mode); + if (hr->luks_volume_key_size != UINT64_MAX) + printf(" Volume Key: %" PRIu64 "bit\n", hr->luks_volume_key_size * 8); + + if (hr->luks_pbkdf_type) + printf(" PBKDF Type: %s\n", hr->luks_pbkdf_type); + if (hr->luks_pbkdf_hash_algorithm) + printf(" PBKDF Hash: %s\n", hr->luks_pbkdf_hash_algorithm); + if (hr->luks_pbkdf_time_cost_usec != UINT64_MAX) { + char buf[FORMAT_TIMESPAN_MAX]; + printf(" PBKDF Time: %s\n", format_timespan(buf, sizeof(buf), hr->luks_pbkdf_time_cost_usec, 0)); + } + if (hr->luks_pbkdf_memory_cost != UINT64_MAX) { + char buf[FORMAT_BYTES_MAX]; + printf(" PBKDF Bytes: %s\n", format_bytes(buf, sizeof(buf), hr->luks_pbkdf_memory_cost)); + } + if (hr->luks_pbkdf_parallel_threads != UINT64_MAX) + printf("PBKDF Thread: %" PRIu64 "\n", hr->luks_pbkdf_parallel_threads); + + } else if (storage == USER_CIFS) { + + if (hr->cifs_service) + printf("CIFS Service: %s\n", hr->cifs_service); + } + + if (hr->cifs_user_name) + printf(" CIFS User: %s\n", user_record_cifs_user_name(hr)); + if (hr->cifs_domain) + printf(" CIFS Domain: %s\n", hr->cifs_domain); + + if (storage != USER_CLASSIC) + printf(" Mount Flags: %s %s %s\n", + hr->nosuid ? "nosuid" : "suid", + hr->nodev ? "nodev" : "dev", + hr->noexec ? "noexec" : "exec"); + + if (hr->skeleton_directory) + printf(" Skel. Dir.: %s\n", user_record_skeleton_directory(hr)); + + if (hr->disk_size != UINT64_MAX) { + char buf[FORMAT_BYTES_MAX]; + printf(" Disk Size: %s\n", format_bytes(buf, sizeof(buf), hr->disk_size)); + } + + if (hr->disk_usage != UINT64_MAX) { + char buf[FORMAT_BYTES_MAX]; + + if (hr->disk_size != UINT64_MAX) { + unsigned permille; + + permille = (unsigned) DIV_ROUND_UP(hr->disk_usage * 1000U, hr->disk_size); /* Round up! */ + printf(" Disk Usage: %s (= %u.%01u%%)\n", + format_bytes(buf, sizeof(buf), hr->disk_usage), + permille / 10, permille % 10); + } else + printf(" Disk Usage: %s\n", format_bytes(buf, sizeof(buf), hr->disk_usage)); + } + + if (hr->disk_free != UINT64_MAX) { + char buf[FORMAT_BYTES_MAX]; + + if (hr->disk_size != UINT64_MAX) { + const char *color_on, *color_off; + unsigned permille; + + permille = (unsigned) ((hr->disk_free * 1000U) / hr->disk_size); /* Round down! */ + + /* Color the output red or yellow if we are below 10% resp. 25% free. Because 10% and + * 25% can be a lot of space still, let's additionally make some absolute + * restrictions: 1G and 2G */ + if (permille <= 100U && + hr->disk_free < 1024U*1024U*1024U /* 1G */) { + color_on = ansi_highlight_red(); + color_off = ansi_normal(); + } else if (permille <= 250U && + hr->disk_free < 2U*1024U*1024U*1024U /* 2G */) { + color_on = ansi_highlight_yellow(); + color_off = ansi_normal(); + } else + color_on = color_off = ""; + + printf(" Disk Free: %s%s (= %u.%01u%%)%s\n", + color_on, + format_bytes(buf, sizeof(buf), hr->disk_free), + permille / 10, permille % 10, + color_off); + } else + printf(" Disk Free: %s\n", format_bytes(buf, sizeof(buf), hr->disk_free)); + } + + if (hr->disk_floor != UINT64_MAX) { + char buf[FORMAT_BYTES_MAX]; + printf(" Disk Floor: %s\n", format_bytes(buf, sizeof(buf), hr->disk_floor)); + } + + if (hr->disk_ceiling != UINT64_MAX) { + char buf[FORMAT_BYTES_MAX]; + printf("Disk Ceiling: %s\n", format_bytes(buf, sizeof(buf), hr->disk_ceiling)); + } + + if (hr->good_authentication_counter != UINT64_MAX) + printf(" Good Auth.: %" PRIu64 "\n", hr->good_authentication_counter); + + if (hr->last_good_authentication_usec != UINT64_MAX) { + char buf[FORMAT_TIMESTAMP_MAX]; + printf(" Last Good: %s\n", format_timestamp(buf, sizeof(buf), hr->last_good_authentication_usec)); + } + + if (hr->bad_authentication_counter != UINT64_MAX) + printf(" Bad Auth.: %" PRIu64 "\n", hr->bad_authentication_counter); + + if (hr->last_bad_authentication_usec != UINT64_MAX) { + char buf[FORMAT_TIMESTAMP_MAX]; + printf(" Last Bad: %s\n", format_timestamp(buf, sizeof(buf), hr->last_bad_authentication_usec)); + } + + t = user_record_ratelimit_next_try(hr); + if (t != USEC_INFINITY) { + usec_t n = now(CLOCK_REALTIME); + + if (t <= n) + printf(" Next Try: anytime\n"); + else { + char buf[FORMAT_TIMESPAN_MAX]; + printf(" Next Try: %sin %s%s\n", + ansi_highlight_red(), + format_timespan(buf, sizeof(buf), t - n, USEC_PER_SEC), + ansi_normal()); + } + } + + if (storage != USER_CLASSIC) { + char buf[FORMAT_TIMESPAN_MAX]; + printf(" Auth. Limit: %" PRIu64 " attempts per %s\n", user_record_ratelimit_burst(hr), + format_timespan(buf, sizeof(buf), user_record_ratelimit_interval_usec(hr), 0)); + } + + if (hr->enforce_password_policy >= 0) + printf(" Passwd Pol.: %s\n", yes_no(hr->enforce_password_policy)); + + if (hr->password_change_min_usec != UINT64_MAX || + hr->password_change_max_usec != UINT64_MAX || + hr->password_change_warn_usec != UINT64_MAX || + hr->password_change_inactive_usec != UINT64_MAX) { + + char buf[FORMAT_TIMESPAN_MAX]; + printf(" Passwd Chg.:"); + + if (hr->password_change_min_usec != UINT64_MAX) { + printf(" min %s", format_timespan(buf, sizeof(buf), hr->password_change_min_usec, 0)); + + if (hr->password_change_max_usec != UINT64_MAX) + printf(" …"); + } + + if (hr->password_change_max_usec != UINT64_MAX) + printf(" max %s", format_timespan(buf, sizeof(buf), hr->password_change_max_usec, 0)); + + if (hr->password_change_warn_usec != UINT64_MAX) + printf("/warn %s", format_timespan(buf, sizeof(buf), hr->password_change_warn_usec, 0)); + + if (hr->password_change_inactive_usec != UINT64_MAX) + printf("/inactive %s", format_timespan(buf, sizeof(buf), hr->password_change_inactive_usec, 0)); + + printf("\n"); + } + + if (hr->password_change_now >= 0) + printf("Pas. Ch. Now: %s\n", yes_no(hr->password_change_now)); + + if (!strv_isempty(hr->ssh_authorized_keys)) + printf("SSH Pub. Key: %zu\n", strv_length(hr->ssh_authorized_keys)); + + if (!strv_isempty(hr->pkcs11_token_uri)) { + char **i; + + STRV_FOREACH(i, hr->pkcs11_token_uri) + printf(i == hr->pkcs11_token_uri ? + "PKCS11 Token: %s\n" : + " %s\n", *i); + } + + if (hr->n_fido2_hmac_credential > 0) + printf(" FIDO2 Token: %zu\n", hr->n_fido2_hmac_credential); + + if (!strv_isempty(hr->recovery_key_type)) + printf("Recovery Key: %zu\n", strv_length(hr->recovery_key_type)); + + k = strv_length(hr->hashed_password); + if (k == 0) + printf(" Passwords: %snone%s\n", + user_record_disposition(hr) == USER_REGULAR ? ansi_highlight_yellow() : ansi_normal(), ansi_normal()); + else + printf(" Passwords: %zu\n", k); + + if (hr->signed_locally >= 0) + printf(" Local Sig.: %s\n", yes_no(hr->signed_locally)); + + if (hr->stop_delay_usec != UINT64_MAX) { + char buf[FORMAT_TIMESPAN_MAX]; + printf(" Stop Delay: %s\n", format_timespan(buf, sizeof(buf), hr->stop_delay_usec, 0)); + } + + if (hr->auto_login >= 0) + printf("Autom. Login: %s\n", yes_no(hr->auto_login)); + + if (hr->kill_processes >= 0) + printf(" Kill Proc.: %s\n", yes_no(hr->kill_processes)); + + if (hr->service) + printf(" Service: %s\n", hr->service); +} + +void group_record_show(GroupRecord *gr, bool show_full_user_info) { + int r; + + printf(" Group name: %s\n", + group_record_group_name_and_realm(gr)); + + printf(" Disposition: %s\n", user_disposition_to_string(group_record_disposition(gr))); + + if (gr->last_change_usec != USEC_INFINITY) { + char buf[FORMAT_TIMESTAMP_MAX]; + printf(" Last Change: %s\n", format_timestamp(buf, sizeof(buf), gr->last_change_usec)); + } + + if (gid_is_valid(gr->gid)) + printf(" GID: " GID_FMT "\n", gr->gid); + + if (show_full_user_info) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + + r = membershipdb_by_group(gr->group_name, 0, &iterator); + if (r < 0) { + errno = -r; + printf(" Members: (can't acquire: %m)"); + } else { + const char *prefix = " Members:"; + + for (;;) { + _cleanup_free_ char *user = NULL; + + r = membershipdb_iterator_get(iterator, &user, NULL); + if (r == -ESRCH) + break; + if (r < 0) { + errno = -r; + printf("%s (can't iterate: %m\n", prefix); + break; + } + + printf("%s %s\n", prefix, user); + prefix = " "; + } + } + } else { + const char *prefix = " Members:"; + char **i; + + STRV_FOREACH(i, gr->members) { + printf("%s %s\n", prefix, *i); + prefix = " "; + } + } + + if (!strv_isempty(gr->administrators)) { + const char *prefix = " Admins:"; + char **i; + + STRV_FOREACH(i, gr->administrators) { + printf("%s %s\n", prefix, *i); + prefix = " "; + } + } + + if (gr->description && !streq(gr->description, gr->group_name)) + printf(" Description: %s\n", gr->description); + + if (!strv_isempty(gr->hashed_password)) + printf(" Passwords: %zu\n", strv_length(gr->hashed_password)); + + if (gr->service) + printf(" Service: %s\n", gr->service); +} diff --git a/src/shared/user-record-show.h b/src/shared/user-record-show.h new file mode 100644 index 0000000..dcef065 --- /dev/null +++ b/src/shared/user-record-show.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "user-record.h" +#include "group-record.h" + +const char *user_record_state_color(const char *state); + +void user_record_show(UserRecord *hr, bool show_full_group_info); +void group_record_show(GroupRecord *gr, bool show_full_user_info); diff --git a/src/shared/user-record.c b/src/shared/user-record.c new file mode 100644 index 0000000..6c48c56 --- /dev/null +++ b/src/shared/user-record.c @@ -0,0 +1,2272 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/mount.h> + +#include "cgroup-util.h" +#include "dns-domain.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "hexdecoct.h" +#include "hostname-util.h" +#include "memory-util.h" +#include "path-util.h" +#include "pkcs11-util.h" +#include "rlimit-util.h" +#include "stat-util.h" +#include "string-table.h" +#include "strv.h" +#include "user-record.h" +#include "user-util.h" + +#define DEFAULT_RATELIMIT_BURST 30 +#define DEFAULT_RATELIMIT_INTERVAL_USEC (1*USEC_PER_MINUTE) + +#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES +static int parse_alloc_uid(const char *path, const char *name, const char *t, uid_t *ret_uid) { + uid_t uid; + int r; + + r = parse_uid(t, &uid); + if (r < 0) + return log_debug_errno(r, "%s: failed to parse %s %s, ignoring: %m", path, name, t); + if (uid == 0) + uid = 1; + + *ret_uid = uid; + return 0; +} +#endif + +int read_login_defs(UGIDAllocationRange *ret_defs, const char *path, const char *root) { + UGIDAllocationRange defs = { + .system_alloc_uid_min = SYSTEM_ALLOC_UID_MIN, + .system_uid_max = SYSTEM_UID_MAX, + .system_alloc_gid_min = SYSTEM_ALLOC_GID_MIN, + .system_gid_max = SYSTEM_GID_MAX, + }; + +#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES + _cleanup_fclose_ FILE *f = NULL; + int r; + + if (!path) + path = "/etc/login.defs"; + + r = chase_symlinks_and_fopen_unlocked(path, root, CHASE_PREFIX_ROOT, "re", &f, NULL); + if (r == -ENOENT) + goto assign; + if (r < 0) + return log_debug_errno(r, "Failed to open %s: %m", path); + + for (;;) { + _cleanup_free_ char *line = NULL; + char *t; + + r = read_line(f, LINE_MAX, &line); + if (r < 0) + return log_debug_errno(r, "Failed to read %s: %m", path); + if (r == 0) + break; + + if ((t = first_word(line, "SYS_UID_MIN"))) + (void) parse_alloc_uid(path, "SYS_UID_MIN", t, &defs.system_alloc_uid_min); + else if ((t = first_word(line, "SYS_UID_MAX"))) + (void) parse_alloc_uid(path, "SYS_UID_MAX", t, &defs.system_uid_max); + else if ((t = first_word(line, "SYS_GID_MIN"))) + (void) parse_alloc_uid(path, "SYS_GID_MIN", t, &defs.system_alloc_gid_min); + else if ((t = first_word(line, "SYS_GID_MAX"))) + (void) parse_alloc_uid(path, "SYS_GID_MAX", t, &defs.system_gid_max); + } + + assign: + if (defs.system_alloc_uid_min > defs.system_uid_max) { + log_debug("%s: SYS_UID_MIN > SYS_UID_MAX, resetting.", path); + defs.system_alloc_uid_min = MIN(defs.system_uid_max - 1, (uid_t) SYSTEM_ALLOC_UID_MIN); + /* Look at sys_uid_max to make sure sys_uid_min..sys_uid_max remains a valid range. */ + } + if (defs.system_alloc_gid_min > defs.system_gid_max) { + log_debug("%s: SYS_GID_MIN > SYS_GID_MAX, resetting.", path); + defs.system_alloc_gid_min = MIN(defs.system_gid_max - 1, (gid_t) SYSTEM_ALLOC_GID_MIN); + /* Look at sys_gid_max to make sure sys_gid_min..sys_gid_max remains a valid range. */ + } +#endif + + *ret_defs = defs; + return 0; +} + +const UGIDAllocationRange *acquire_ugid_allocation_range(void) { +#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES + static thread_local UGIDAllocationRange defs = { +#else + static const UGIDAllocationRange defs = { +#endif + .system_alloc_uid_min = SYSTEM_ALLOC_UID_MIN, + .system_uid_max = SYSTEM_UID_MAX, + .system_alloc_gid_min = SYSTEM_ALLOC_GID_MIN, + .system_gid_max = SYSTEM_GID_MAX, + }; + +#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES + /* This function will ignore failure to read the file, so it should only be called from places where + * we don't crucially depend on the answer. In other words, it's appropriate for journald, but + * probably not for sysusers. */ + + static thread_local bool initialized = false; + + if (!initialized) { + (void) read_login_defs(&defs, NULL, NULL); + initialized = true; + } +#endif + + return &defs; +} + +bool uid_is_system(uid_t uid) { + const UGIDAllocationRange *defs; + assert_se(defs = acquire_ugid_allocation_range()); + + return uid <= defs->system_uid_max; +} + +bool gid_is_system(gid_t gid) { + const UGIDAllocationRange *defs; + assert_se(defs = acquire_ugid_allocation_range()); + + return gid <= defs->system_gid_max; +} + +UserRecord* user_record_new(void) { + UserRecord *h; + + h = new(UserRecord, 1); + if (!h) + return NULL; + + *h = (UserRecord) { + .n_ref = 1, + .disposition = _USER_DISPOSITION_INVALID, + .last_change_usec = UINT64_MAX, + .last_password_change_usec = UINT64_MAX, + .umask = MODE_INVALID, + .nice_level = INT_MAX, + .not_before_usec = UINT64_MAX, + .not_after_usec = UINT64_MAX, + .locked = -1, + .storage = _USER_STORAGE_INVALID, + .access_mode = MODE_INVALID, + .disk_size = UINT64_MAX, + .disk_size_relative = UINT64_MAX, + .tasks_max = UINT64_MAX, + .memory_high = UINT64_MAX, + .memory_max = UINT64_MAX, + .cpu_weight = UINT64_MAX, + .io_weight = UINT64_MAX, + .uid = UID_INVALID, + .gid = GID_INVALID, + .nodev = true, + .nosuid = true, + .luks_discard = -1, + .luks_offline_discard = -1, + .luks_volume_key_size = UINT64_MAX, + .luks_pbkdf_time_cost_usec = UINT64_MAX, + .luks_pbkdf_memory_cost = UINT64_MAX, + .luks_pbkdf_parallel_threads = UINT64_MAX, + .disk_usage = UINT64_MAX, + .disk_free = UINT64_MAX, + .disk_ceiling = UINT64_MAX, + .disk_floor = UINT64_MAX, + .signed_locally = -1, + .good_authentication_counter = UINT64_MAX, + .bad_authentication_counter = UINT64_MAX, + .last_good_authentication_usec = UINT64_MAX, + .last_bad_authentication_usec = UINT64_MAX, + .ratelimit_begin_usec = UINT64_MAX, + .ratelimit_count = UINT64_MAX, + .ratelimit_interval_usec = UINT64_MAX, + .ratelimit_burst = UINT64_MAX, + .removable = -1, + .enforce_password_policy = -1, + .auto_login = -1, + .stop_delay_usec = UINT64_MAX, + .kill_processes = -1, + .password_change_min_usec = UINT64_MAX, + .password_change_max_usec = UINT64_MAX, + .password_change_warn_usec = UINT64_MAX, + .password_change_inactive_usec = UINT64_MAX, + .password_change_now = -1, + .pkcs11_protected_authentication_path_permitted = -1, + .fido2_user_presence_permitted = -1, + }; + + return h; +} + +static void pkcs11_encrypted_key_done(Pkcs11EncryptedKey *k) { + if (!k) + return; + + free(k->uri); + erase_and_free(k->data); + erase_and_free(k->hashed_password); +} + +static void fido2_hmac_credential_done(Fido2HmacCredential *c) { + if (!c) + return; + + free(c->id); +} + +static void fido2_hmac_salt_done(Fido2HmacSalt *s) { + if (!s) + return; + + fido2_hmac_credential_done(&s->credential); + erase_and_free(s->salt); + erase_and_free(s->hashed_password); +} + +static void recovery_key_done(RecoveryKey *k) { + if (!k) + return; + + free(k->type); + erase_and_free(k->hashed_password); +} + +static UserRecord* user_record_free(UserRecord *h) { + if (!h) + return NULL; + + free(h->user_name); + free(h->realm); + free(h->user_name_and_realm_auto); + free(h->real_name); + free(h->email_address); + erase_and_free(h->password_hint); + free(h->location); + free(h->icon_name); + + free(h->shell); + + strv_free(h->environment); + free(h->time_zone); + free(h->preferred_language); + rlimit_free_all(h->rlimits); + + free(h->skeleton_directory); + + strv_free_erase(h->hashed_password); + strv_free_erase(h->ssh_authorized_keys); + strv_free_erase(h->password); + strv_free_erase(h->token_pin); + + free(h->cifs_service); + free(h->cifs_user_name); + free(h->cifs_domain); + + free(h->image_path); + free(h->image_path_auto); + free(h->home_directory); + free(h->home_directory_auto); + + strv_free(h->member_of); + + free(h->file_system_type); + free(h->luks_cipher); + free(h->luks_cipher_mode); + free(h->luks_pbkdf_hash_algorithm); + free(h->luks_pbkdf_type); + + free(h->state); + free(h->service); + + strv_free(h->pkcs11_token_uri); + for (size_t i = 0; i < h->n_pkcs11_encrypted_key; i++) + pkcs11_encrypted_key_done(h->pkcs11_encrypted_key + i); + free(h->pkcs11_encrypted_key); + + for (size_t i = 0; i < h->n_fido2_hmac_credential; i++) + fido2_hmac_credential_done(h->fido2_hmac_credential + i); + for (size_t i = 0; i < h->n_fido2_hmac_salt; i++) + fido2_hmac_salt_done(h->fido2_hmac_salt + i); + + strv_free(h->recovery_key_type); + for (size_t i = 0; i < h->n_recovery_key; i++) + recovery_key_done(h->recovery_key + i); + + json_variant_unref(h->json); + + return mfree(h); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(UserRecord, user_record, user_record_free); + +int json_dispatch_realm(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + char **s = userdata; + const char *n; + int r; + + if (json_variant_is_null(variant)) { + *s = mfree(*s); + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + n = json_variant_string(variant); + r = dns_name_is_valid(n); + if (r < 0) + return json_log(variant, flags, r, "Failed to check if JSON field '%s' is a valid DNS domain.", strna(name)); + if (r == 0) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid DNS domain.", strna(name)); + + r = free_and_strdup(s, n); + if (r < 0) + return json_log(variant, flags, r, "Failed to allocate string: %m"); + + return 0; +} + +int json_dispatch_gecos(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + char **s = userdata; + const char *n; + + if (json_variant_is_null(variant)) { + *s = mfree(*s); + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + n = json_variant_string(variant); + if (valid_gecos(n)) { + if (free_and_strdup(s, n) < 0) + return json_log_oom(variant, flags); + } else { + _cleanup_free_ char *m = NULL; + + json_log(variant, flags|JSON_DEBUG, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid GECOS compatible string, mangling.", strna(name)); + + m = mangle_gecos(n); + if (!m) + return json_log_oom(variant, flags); + + free_and_replace(*s, m); + } + + return 0; +} + +static int json_dispatch_nice(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + int *nl = userdata; + intmax_t m; + + if (json_variant_is_null(variant)) { + *nl = INT_MAX; + return 0; + } + + if (!json_variant_is_integer(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + m = json_variant_integer(variant); + if (m < PRIO_MIN || m >= PRIO_MAX) + return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not a valid nice level.", strna(name)); + + *nl = m; + return 0; +} + +static int json_dispatch_rlimit_value(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + rlim_t *ret = userdata; + + if (json_variant_is_null(variant)) + *ret = RLIM_INFINITY; + else if (json_variant_is_unsigned(variant)) { + uintmax_t w; + + w = json_variant_unsigned(variant); + if (w == RLIM_INFINITY || (uintmax_t) w != json_variant_unsigned(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "Resource limit value '%s' is out of range.", name); + + *ret = (rlim_t) w; + } else + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit value '%s' is not an unsigned integer.", name); + + return 0; +} + +static int json_dispatch_rlimits(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + struct rlimit** limits = userdata; + JsonVariant *value; + const char *key; + int r; + + assert_se(limits); + + if (json_variant_is_null(variant)) { + rlimit_free_all(limits); + return 0; + } + + if (!json_variant_is_object(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name)); + + JSON_VARIANT_OBJECT_FOREACH(key, value, variant) { + JsonVariant *jcur, *jmax; + struct rlimit rl; + const char *p; + int l; + + p = startswith(key, "RLIMIT_"); + if (!p) + l = -1; + else + l = rlimit_from_string(p); + if (l < 0) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' not known.", key); + + if (!json_variant_is_object(value)) + return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' has invalid value.", key); + + if (json_variant_elements(value) != 4) + return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' value is does not have two fields as expected.", key); + + jcur = json_variant_by_key(value, "cur"); + if (!jcur) + return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' lacks 'cur' field.", key); + r = json_dispatch_rlimit_value("cur", jcur, flags, &rl.rlim_cur); + if (r < 0) + return r; + + jmax = json_variant_by_key(value, "max"); + if (!jmax) + return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' lacks 'max' field.", key); + r = json_dispatch_rlimit_value("max", jmax, flags, &rl.rlim_max); + if (r < 0) + return r; + + if (limits[l]) + *(limits[l]) = rl; + else { + limits[l] = newdup(struct rlimit, &rl, 1); + if (!limits[l]) + return log_oom(); + } + } + + return 0; +} + +static int json_dispatch_filename_or_path(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + char **s = userdata; + const char *n; + int r; + + assert(s); + + if (json_variant_is_null(variant)) { + *s = mfree(*s); + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + n = json_variant_string(variant); + if (!filename_is_valid(n) && !path_is_normalized(n)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid file name or normalized path.", strna(name)); + + r = free_and_strdup(s, n); + if (r < 0) + return json_log(variant, flags, r, "Failed to allocate string: %m"); + + return 0; +} + +static int json_dispatch_path(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + char **s = userdata; + const char *n; + int r; + + if (json_variant_is_null(variant)) { + *s = mfree(*s); + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + n = json_variant_string(variant); + if (!path_is_normalized(n)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a normalized file system path.", strna(name)); + if (!path_is_absolute(n)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an absolute file system path.", strna(name)); + + r = free_and_strdup(s, n); + if (r < 0) + return json_log(variant, flags, r, "Failed to allocate string: %m"); + + return 0; +} + +static int json_dispatch_home_directory(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + char **s = userdata; + const char *n; + int r; + + if (json_variant_is_null(variant)) { + *s = mfree(*s); + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + n = json_variant_string(variant); + if (!valid_home(n)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid home directory path.", strna(name)); + + r = free_and_strdup(s, n); + if (r < 0) + return json_log(variant, flags, r, "Failed to allocate string: %m"); + + return 0; +} + +static int json_dispatch_image_path(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + char **s = userdata; + const char *n; + int r; + + if (json_variant_is_null(variant)) { + *s = mfree(*s); + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + n = json_variant_string(variant); + if (empty_or_root(n) || !path_is_valid(n) || !path_is_absolute(n)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid image path.", strna(name)); + + r = free_and_strdup(s, n); + if (r < 0) + return json_log(variant, flags, r, "Failed to allocate string: %m"); + + return 0; +} + +static int json_dispatch_umask(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + mode_t *m = userdata; + uintmax_t k; + + if (json_variant_is_null(variant)) { + *m = (mode_t) -1; + return 0; + } + + if (!json_variant_is_unsigned(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a number.", strna(name)); + + k = json_variant_unsigned(variant); + if (k > 0777) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' outside of valid range 0…0777.", strna(name)); + + *m = (mode_t) k; + return 0; +} + +static int json_dispatch_access_mode(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + mode_t *m = userdata; + uintmax_t k; + + if (json_variant_is_null(variant)) { + *m = (mode_t) -1; + return 0; + } + + if (!json_variant_is_unsigned(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a number.", strna(name)); + + k = json_variant_unsigned(variant); + if (k > 07777) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' outside of valid range 0…07777.", strna(name)); + + *m = (mode_t) k; + return 0; +} + +static int json_dispatch_environment(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + _cleanup_strv_free_ char **n = NULL; + char ***l = userdata; + size_t i; + int r; + + if (json_variant_is_null(variant)) { + *l = strv_free(*l); + return 0; + } + + if (!json_variant_is_array(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name)); + + for (i = 0; i < json_variant_elements(variant); i++) { + _cleanup_free_ char *c = NULL; + JsonVariant *e; + const char *a; + + e = json_variant_by_index(variant, i); + if (!json_variant_is_string(e)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of strings.", strna(name)); + + assert_se(a = json_variant_string(e)); + + if (!env_assignment_is_valid(a)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of environment variables.", strna(name)); + + c = strdup(a); + if (!c) + return json_log_oom(variant, flags); + + r = strv_env_replace(&n, c); + if (r < 0) + return json_log_oom(variant, flags); + + c = NULL; + } + + strv_free_and_replace(*l, n); + return 0; +} + +int json_dispatch_user_disposition(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + UserDisposition *disposition = userdata, k; + + if (json_variant_is_null(variant)) { + *disposition = _USER_DISPOSITION_INVALID; + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + k = user_disposition_from_string(json_variant_string(variant)); + if (k < 0) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Disposition type '%s' not known.", json_variant_string(variant)); + + *disposition = k; + return 0; +} + +static int json_dispatch_storage(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + UserStorage *storage = userdata, k; + + if (json_variant_is_null(variant)) { + *storage = _USER_STORAGE_INVALID; + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + k = user_storage_from_string(json_variant_string(variant)); + if (k < 0) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Storage type '%s' not known.", json_variant_string(variant)); + + *storage = k; + return 0; +} + +static int json_dispatch_disk_size(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + uint64_t *size = userdata; + uintmax_t k; + + if (json_variant_is_null(variant)) { + *size = UINT64_MAX; + return 0; + } + + if (!json_variant_is_unsigned(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an integer.", strna(name)); + + k = json_variant_unsigned(variant); + if (k < USER_DISK_SIZE_MIN || k > USER_DISK_SIZE_MAX) + return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not in valid range %" PRIu64 "…%" PRIu64 ".", strna(name), USER_DISK_SIZE_MIN, USER_DISK_SIZE_MAX); + + *size = k; + return 0; +} + +static int json_dispatch_tasks_or_memory_max(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + uint64_t *limit = userdata; + uintmax_t k; + + if (json_variant_is_null(variant)) { + *limit = UINT64_MAX; + return 0; + } + + if (!json_variant_is_unsigned(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a integer.", strna(name)); + + k = json_variant_unsigned(variant); + if (k <= 0 || k >= UINT64_MAX) + return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not in valid range %" PRIu64 "…%" PRIu64 ".", strna(name), (uint64_t) 1, UINT64_MAX-1); + + *limit = k; + return 0; +} + +static int json_dispatch_weight(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + uint64_t *weight = userdata; + uintmax_t k; + + if (json_variant_is_null(variant)) { + *weight = UINT64_MAX; + return 0; + } + + if (!json_variant_is_unsigned(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a integer.", strna(name)); + + k = json_variant_unsigned(variant); + if (k <= CGROUP_WEIGHT_MIN || k >= CGROUP_WEIGHT_MAX) + return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not in valid range %" PRIu64 "…%" PRIu64 ".", strna(name), (uint64_t) CGROUP_WEIGHT_MIN, (uint64_t) CGROUP_WEIGHT_MAX); + + *weight = k; + return 0; +} + +int json_dispatch_user_group_list(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + _cleanup_strv_free_ char **l = NULL; + char ***list = userdata; + JsonVariant *e; + int r; + + if (!json_variant_is_array(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of strings.", strna(name)); + + JSON_VARIANT_ARRAY_FOREACH(e, variant) { + + if (!json_variant_is_string(e)) + return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string."); + + if (!valid_user_group_name(json_variant_string(e), FLAGS_SET(flags, JSON_RELAX) ? VALID_USER_RELAX : 0)) + return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a valid user/group name: %s", json_variant_string(e)); + + r = strv_extend(&l, json_variant_string(e)); + if (r < 0) + return json_log(e, flags, r, "Failed to append array element: %m"); + } + + r = strv_extend_strv(list, l, true); + if (r < 0) + return json_log(variant, flags, r, "Failed to merge user/group arrays: %m"); + + return 0; +} + +static int dispatch_secret(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + + static const JsonDispatch secret_dispatch_table[] = { + { "password", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, password), 0 }, + { "tokenPin", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, token_pin), 0 }, + { "pkcs11Pin", /* legacy alias */ _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, token_pin), 0 }, + { "pkcs11ProtectedAuthenticationPathPermitted", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, pkcs11_protected_authentication_path_permitted), 0 }, + { "fido2UserPresencePermitted", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, fido2_user_presence_permitted), 0 }, + {}, + }; + + return json_dispatch(variant, secret_dispatch_table, NULL, flags, userdata); +} + +static int dispatch_pkcs11_uri(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + char **s = userdata; + const char *n; + int r; + + if (json_variant_is_null(variant)) { + *s = mfree(*s); + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + n = json_variant_string(variant); + if (!pkcs11_uri_valid(n)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid RFC7512 PKCS#11 URI.", strna(name)); + + r = free_and_strdup(s, n); + if (r < 0) + return json_log(variant, flags, r, "Failed to allocate string: %m"); + + return 0; +} + +static int dispatch_pkcs11_uri_array(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + _cleanup_strv_free_ char **z = NULL; + char ***l = userdata; + JsonVariant *e; + int r; + + if (json_variant_is_null(variant)) { + *l = strv_free(*l); + return 0; + } + + if (json_variant_is_string(variant)) { + const char *n; + + n = json_variant_string(variant); + if (!pkcs11_uri_valid(n)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid RFC7512 PKCS#11 URI.", strna(name)); + + z = strv_new(n); + if (!z) + return log_oom(); + + } else { + + if (!json_variant_is_array(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string or array of strings.", strna(name)); + + JSON_VARIANT_ARRAY_FOREACH(e, variant) { + const char *n; + + if (!json_variant_is_string(e)) + return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string."); + + n = json_variant_string(e); + if (!pkcs11_uri_valid(n)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element in '%s' is not a valid RFC7512 PKCS#11 URI: %s", strna(name), n); + + r = strv_extend(&z, n); + if (r < 0) + return log_oom(); + } + } + + strv_free_and_replace(*l, z); + return 0; +} + +static int dispatch_pkcs11_key_data(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + Pkcs11EncryptedKey *k = userdata; + size_t l; + void *b; + int r; + + if (json_variant_is_null(variant)) { + k->data = erase_and_free(k->data); + k->size = 0; + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + r = unbase64mem(json_variant_string(variant), (size_t) -1, &b, &l); + if (r < 0) + return json_log(variant, flags, r, "Failed to decode encrypted PKCS#11 key: %m"); + + erase_and_free(k->data); + k->data = b; + k->size = l; + + return 0; +} + +static int dispatch_pkcs11_key(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + UserRecord *h = userdata; + JsonVariant *e; + int r; + + if (!json_variant_is_array(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name)); + + JSON_VARIANT_ARRAY_FOREACH(e, variant) { + Pkcs11EncryptedKey *array, *k; + + static const JsonDispatch pkcs11_key_dispatch_table[] = { + { "uri", JSON_VARIANT_STRING, dispatch_pkcs11_uri, offsetof(Pkcs11EncryptedKey, uri), JSON_MANDATORY }, + { "data", JSON_VARIANT_STRING, dispatch_pkcs11_key_data, 0, JSON_MANDATORY }, + { "hashedPassword", JSON_VARIANT_STRING, json_dispatch_string, offsetof(Pkcs11EncryptedKey, hashed_password), JSON_MANDATORY }, + {}, + }; + + if (!json_variant_is_object(e)) + return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not an object."); + + array = reallocarray(h->pkcs11_encrypted_key, h->n_pkcs11_encrypted_key + 1, sizeof(Pkcs11EncryptedKey)); + if (!array) + return log_oom(); + + h->pkcs11_encrypted_key = array; + k = h->pkcs11_encrypted_key + h->n_pkcs11_encrypted_key; + *k = (Pkcs11EncryptedKey) {}; + + r = json_dispatch(e, pkcs11_key_dispatch_table, NULL, flags, k); + if (r < 0) { + pkcs11_encrypted_key_done(k); + return r; + } + + h->n_pkcs11_encrypted_key++; + } + + return 0; +} + +static int dispatch_fido2_hmac_credential(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + Fido2HmacCredential *k = userdata; + size_t l; + void *b; + int r; + + if (json_variant_is_null(variant)) { + k->id = mfree(k->id); + k->size = 0; + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + r = unbase64mem(json_variant_string(variant), (size_t) -1, &b, &l); + if (r < 0) + return json_log(variant, flags, r, "Failed to decode FIDO2 credential ID: %m"); + + free_and_replace(k->id, b); + k->size = l; + + return 0; +} + +static int dispatch_fido2_hmac_credential_array(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + UserRecord *h = userdata; + JsonVariant *e; + int r; + + if (!json_variant_is_array(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of strings.", strna(name)); + + JSON_VARIANT_ARRAY_FOREACH(e, variant) { + Fido2HmacCredential *array; + size_t l; + void *b; + + if (!json_variant_is_string(e)) + return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string."); + + array = reallocarray(h->fido2_hmac_credential, h->n_fido2_hmac_credential + 1, sizeof(Fido2HmacCredential)); + if (!array) + return log_oom(); + + r = unbase64mem(json_variant_string(e), (size_t) -1, &b, &l); + if (r < 0) + return json_log(variant, flags, r, "Failed to decode FIDO2 credential ID: %m"); + + h->fido2_hmac_credential = array; + + h->fido2_hmac_credential[h->n_fido2_hmac_credential++] = (Fido2HmacCredential) { + .id = b, + .size = l, + }; + } + + return 0; +} + +static int dispatch_fido2_hmac_salt_value(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + Fido2HmacSalt *k = userdata; + size_t l; + void *b; + int r; + + if (json_variant_is_null(variant)) { + k->salt = erase_and_free(k->salt); + k->salt_size = 0; + return 0; + } + + if (!json_variant_is_string(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name)); + + r = unbase64mem(json_variant_string(variant), (size_t) -1, &b, &l); + if (r < 0) + return json_log(variant, flags, r, "Failed to decode FIDO2 salt: %m"); + + erase_and_free(k->salt); + k->salt = b; + k->salt_size = l; + + return 0; +} + +static int dispatch_fido2_hmac_salt(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + UserRecord *h = userdata; + JsonVariant *e; + int r; + + if (!json_variant_is_array(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name)); + + JSON_VARIANT_ARRAY_FOREACH(e, variant) { + Fido2HmacSalt *array, *k; + + static const JsonDispatch fido2_hmac_salt_dispatch_table[] = { + { "credential", JSON_VARIANT_STRING, dispatch_fido2_hmac_credential, offsetof(Fido2HmacSalt, credential), JSON_MANDATORY }, + { "salt", JSON_VARIANT_STRING, dispatch_fido2_hmac_salt_value, 0, JSON_MANDATORY }, + { "hashedPassword", JSON_VARIANT_STRING, json_dispatch_string, offsetof(Fido2HmacSalt, hashed_password), JSON_MANDATORY }, + {}, + }; + + if (!json_variant_is_object(e)) + return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not an object."); + + array = reallocarray(h->fido2_hmac_salt, h->n_fido2_hmac_salt + 1, sizeof(Fido2HmacSalt)); + if (!array) + return log_oom(); + + h->fido2_hmac_salt = array; + k = h->fido2_hmac_salt + h->n_fido2_hmac_salt; + *k = (Fido2HmacSalt) {}; + + r = json_dispatch(e, fido2_hmac_salt_dispatch_table, NULL, flags, k); + if (r < 0) { + fido2_hmac_salt_done(k); + return r; + } + + h->n_fido2_hmac_salt++; + } + + return 0; +} + +static int dispatch_recovery_key(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + UserRecord *h = userdata; + JsonVariant *e; + int r; + + if (!json_variant_is_array(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name)); + + JSON_VARIANT_ARRAY_FOREACH(e, variant) { + RecoveryKey *array, *k; + + static const JsonDispatch recovery_key_dispatch_table[] = { + { "type", JSON_VARIANT_STRING, json_dispatch_string, 0, JSON_MANDATORY }, + { "hashedPassword", JSON_VARIANT_STRING, json_dispatch_string, offsetof(RecoveryKey, hashed_password), JSON_MANDATORY }, + {}, + }; + + if (!json_variant_is_object(e)) + return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not an object."); + + array = reallocarray(h->recovery_key, h->n_recovery_key + 1, sizeof(RecoveryKey)); + if (!array) + return log_oom(); + + h->recovery_key = array; + k = h->recovery_key + h->n_recovery_key; + *k = (RecoveryKey) {}; + + r = json_dispatch(e, recovery_key_dispatch_table, NULL, flags, k); + if (r < 0) { + recovery_key_done(k); + return r; + } + + h->n_recovery_key++; + } + + return 0; +} + +static int dispatch_privileged(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + + static const JsonDispatch privileged_dispatch_table[] = { + { "passwordHint", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, password_hint), 0 }, + { "hashedPassword", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, hashed_password), JSON_SAFE }, + { "sshAuthorizedKeys", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, ssh_authorized_keys), 0 }, + { "pkcs11EncryptedKey", JSON_VARIANT_ARRAY, dispatch_pkcs11_key, 0, 0 }, + { "fido2HmacSalt", JSON_VARIANT_ARRAY, dispatch_fido2_hmac_salt, 0, 0 }, + { "recoveryKey", JSON_VARIANT_ARRAY, dispatch_recovery_key, 0, 0 }, + {}, + }; + + return json_dispatch(variant, privileged_dispatch_table, NULL, flags, userdata); +} + +static int dispatch_binding(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + + static const JsonDispatch binding_dispatch_table[] = { + { "imagePath", JSON_VARIANT_STRING, json_dispatch_image_path, offsetof(UserRecord, image_path), 0 }, + { "homeDirectory", JSON_VARIANT_STRING, json_dispatch_home_directory, offsetof(UserRecord, home_directory), 0 }, + { "partitionUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, partition_uuid), 0 }, + { "luksUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, luks_uuid), 0 }, + { "fileSystemUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, file_system_uuid), 0 }, + { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, uid), 0 }, + { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, gid), 0 }, + { "storage", JSON_VARIANT_STRING, json_dispatch_storage, offsetof(UserRecord, storage), 0 }, + { "fileSystemType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, file_system_type), JSON_SAFE }, + { "luksCipher", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher), JSON_SAFE }, + { "luksCipherMode", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher_mode), JSON_SAFE }, + { "luksVolumeKeySize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_volume_key_size), 0 }, + {}, + }; + + char smid[SD_ID128_STRING_MAX]; + JsonVariant *m; + sd_id128_t mid; + int r; + + if (!variant) + return 0; + + if (!json_variant_is_object(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name)); + + r = sd_id128_get_machine(&mid); + if (r < 0) + return json_log(variant, flags, r, "Failed to determine machine ID: %m"); + + m = json_variant_by_key(variant, sd_id128_to_string(mid, smid)); + if (!m) + return 0; + + return json_dispatch(m, binding_dispatch_table, NULL, flags, userdata); +} + +int per_machine_id_match(JsonVariant *ids, JsonDispatchFlags flags) { + sd_id128_t mid; + int r; + + r = sd_id128_get_machine(&mid); + if (r < 0) + return json_log(ids, flags, r, "Failed to acquire machine ID: %m"); + + if (json_variant_is_string(ids)) { + sd_id128_t k; + + r = sd_id128_from_string(json_variant_string(ids), &k); + if (r < 0) { + json_log(ids, flags, r, "%s is not a valid machine ID, ignoring: %m", json_variant_string(ids)); + return 0; + } + + return sd_id128_equal(mid, k); + } + + if (json_variant_is_array(ids)) { + JsonVariant *e; + + JSON_VARIANT_ARRAY_FOREACH(e, ids) { + sd_id128_t k; + + if (!json_variant_is_string(e)) { + json_log(e, flags, 0, "Machine ID is not a string, ignoring: %m"); + continue; + } + + r = sd_id128_from_string(json_variant_string(e), &k); + if (r < 0) { + json_log(e, flags, r, "%s is not a valid machine ID, ignoring: %m", json_variant_string(e)); + continue; + } + + if (sd_id128_equal(mid, k)) + return true; + } + + return false; + } + + json_log(ids, flags, 0, "Machine ID is not a string or array of strings, ignoring: %m"); + return false; +} + +int per_machine_hostname_match(JsonVariant *hns, JsonDispatchFlags flags) { + _cleanup_free_ char *hn = NULL; + int r; + + r = gethostname_strict(&hn); + if (r == -ENXIO) { + json_log(hns, flags, r, "No hostname set, not matching perMachine hostname record: %m"); + return false; + } + if (r < 0) + return json_log(hns, flags, r, "Failed to acquire hostname: %m"); + + if (json_variant_is_string(hns)) + return streq(json_variant_string(hns), hn); + + if (json_variant_is_array(hns)) { + JsonVariant *e; + + JSON_VARIANT_ARRAY_FOREACH(e, hns) { + + if (!json_variant_is_string(e)) { + json_log(e, flags, 0, "Hostname is not a string, ignoring: %m"); + continue; + } + + if (streq(json_variant_string(hns), hn)) + return true; + } + + return false; + } + + json_log(hns, flags, 0, "Hostname is not a string or array of strings, ignoring: %m"); + return false; +} + +static int dispatch_per_machine(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + + static const JsonDispatch per_machine_dispatch_table[] = { + { "matchMachineId", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 }, + { "matchHostname", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 }, + { "iconName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, icon_name), JSON_SAFE }, + { "location", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, location), 0 }, + { "shell", JSON_VARIANT_STRING, json_dispatch_filename_or_path, offsetof(UserRecord, shell), 0 }, + { "umask", JSON_VARIANT_UNSIGNED, json_dispatch_umask, offsetof(UserRecord, umask), 0 }, + { "environment", JSON_VARIANT_ARRAY, json_dispatch_environment, offsetof(UserRecord, environment), 0 }, + { "timeZone", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, time_zone), JSON_SAFE }, + { "preferredLanguage", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, preferred_language), JSON_SAFE }, + { "niceLevel", _JSON_VARIANT_TYPE_INVALID, json_dispatch_nice, offsetof(UserRecord, nice_level), 0 }, + { "resourceLimits", _JSON_VARIANT_TYPE_INVALID, json_dispatch_rlimits, offsetof(UserRecord, rlimits), 0 }, + { "locked", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, locked), 0 }, + { "notBeforeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_before_usec), 0 }, + { "notAfterUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_after_usec), 0 }, + { "storage", JSON_VARIANT_STRING, json_dispatch_storage, offsetof(UserRecord, storage), 0 }, + { "diskSize", JSON_VARIANT_UNSIGNED, json_dispatch_disk_size, offsetof(UserRecord, disk_size), 0 }, + { "diskSizeRelative", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_size_relative), 0 }, + { "skeletonDirectory", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, skeleton_directory), 0 }, + { "accessMode", JSON_VARIANT_UNSIGNED, json_dispatch_access_mode, offsetof(UserRecord, access_mode), 0 }, + { "tasksMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, tasks_max), 0 }, + { "memoryHigh", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_high), 0 }, + { "memoryMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_max), 0 }, + { "cpuWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, cpu_weight), 0 }, + { "ioWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, io_weight), 0 }, + { "mountNoDevices", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nodev), 0 }, + { "mountNoSuid", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nosuid), 0 }, + { "mountNoExecute", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, noexec), 0 }, + { "cifsDomain", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_domain), JSON_SAFE }, + { "cifsUserName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_user_name), JSON_SAFE }, + { "cifsService", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_service), JSON_SAFE }, + { "imagePath", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, image_path), 0 }, + { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, uid), 0 }, + { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, gid), 0 }, + { "memberOf", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(UserRecord, member_of), JSON_RELAX}, + { "fileSystemType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, file_system_type), JSON_SAFE }, + { "partitionUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, partition_uuid), 0 }, + { "luksUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, luks_uuid), 0 }, + { "fileSystemUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, file_system_uuid), 0 }, + { "luksDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_discard), 0, }, + { "luksOfflineDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_offline_discard), 0, }, + { "luksCipher", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher), JSON_SAFE }, + { "luksCipherMode", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher_mode), JSON_SAFE }, + { "luksVolumeKeySize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_volume_key_size), 0 }, + { "luksPbkdfHashAlgorithm", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_hash_algorithm), JSON_SAFE }, + { "luksPbkdfType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_type), JSON_SAFE }, + { "luksPbkdfTimeCostUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_time_cost_usec), 0 }, + { "luksPbkdfMemoryCost", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_memory_cost), 0 }, + { "luksPbkdfParallelThreads", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_parallel_threads), 0 }, + { "rateLimitIntervalUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_interval_usec), 0 }, + { "rateLimitBurst", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_burst), 0 }, + { "enforcePasswordPolicy", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, enforce_password_policy), 0 }, + { "autoLogin", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, auto_login), 0 }, + { "stopDelayUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, stop_delay_usec), 0 }, + { "killProcesses", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, kill_processes), 0 }, + { "passwordChangeMinUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_min_usec), 0 }, + { "passwordChangeMaxUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_max_usec), 0 }, + { "passwordChangeWarnUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_warn_usec), 0 }, + { "passwordChangeInactiveUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_inactive_usec), 0 }, + { "passwordChangeNow", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, password_change_now), 0 }, + { "pkcs11TokenUri", JSON_VARIANT_ARRAY, dispatch_pkcs11_uri_array, offsetof(UserRecord, pkcs11_token_uri), 0 }, + { "fido2HmacCredential", JSON_VARIANT_ARRAY, dispatch_fido2_hmac_credential_array, 0, 0 }, + {}, + }; + + JsonVariant *e; + int r; + + if (!variant) + return 0; + + if (!json_variant_is_array(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name)); + + JSON_VARIANT_ARRAY_FOREACH(e, variant) { + bool matching = false; + JsonVariant *m; + + if (!json_variant_is_object(e)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name)); + + m = json_variant_by_key(e, "matchMachineId"); + if (m) { + r = per_machine_id_match(m, flags); + if (r < 0) + return r; + + matching = r > 0; + } + + if (!matching) { + m = json_variant_by_key(e, "matchHostname"); + if (m) { + r = per_machine_hostname_match(m, flags); + if (r < 0) + return r; + + matching = r > 0; + } + } + + if (!matching) + continue; + + r = json_dispatch(e, per_machine_dispatch_table, NULL, flags, userdata); + if (r < 0) + return r; + } + + return 0; +} + +static int dispatch_status(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + + static const JsonDispatch status_dispatch_table[] = { + { "diskUsage", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_usage), 0 }, + { "diskFree", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_free), 0 }, + { "diskSize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_size), 0 }, + { "diskCeiling", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_ceiling), 0 }, + { "diskFloor", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_floor), 0 }, + { "state", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, state), JSON_SAFE }, + { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, service), JSON_SAFE }, + { "signedLocally", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, signed_locally), 0 }, + { "goodAuthenticationCounter", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, good_authentication_counter), 0 }, + { "badAuthenticationCounter", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, bad_authentication_counter), 0 }, + { "lastGoodAuthenticationUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_good_authentication_usec), 0 }, + { "lastBadAuthenticationUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_bad_authentication_usec), 0 }, + { "rateLimitBeginUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_begin_usec), 0 }, + { "rateLimitCount", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_count), 0 }, + { "removable", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, removable), 0 }, + {}, + }; + + char smid[SD_ID128_STRING_MAX]; + JsonVariant *m; + sd_id128_t mid; + int r; + + if (!variant) + return 0; + + if (!json_variant_is_object(variant)) + return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name)); + + r = sd_id128_get_machine(&mid); + if (r < 0) + return json_log(variant, flags, r, "Failed to determine machine ID: %m"); + + m = json_variant_by_key(variant, sd_id128_to_string(mid, smid)); + if (!m) + return 0; + + return json_dispatch(m, status_dispatch_table, NULL, flags, userdata); +} + +int user_record_build_image_path(UserStorage storage, const char *user_name_and_realm, char **ret) { + const char *suffix; + char *z; + + assert(storage >= 0); + assert(user_name_and_realm); + assert(ret); + + if (storage == USER_LUKS) + suffix = ".home"; + else if (IN_SET(storage, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT)) + suffix = ".homedir"; + else { + *ret = NULL; + return 0; + } + + z = strjoin("/home/", user_name_and_realm, suffix); + if (!z) + return -ENOMEM; + + *ret = z; + return 1; +} + +static int user_record_augment(UserRecord *h, JsonDispatchFlags json_flags) { + int r; + + assert(h); + + if (!FLAGS_SET(h->mask, USER_RECORD_REGULAR)) + return 0; + + assert(h->user_name); + + if (!h->user_name_and_realm_auto && h->realm) { + h->user_name_and_realm_auto = strjoin(h->user_name, "@", h->realm); + if (!h->user_name_and_realm_auto) + return json_log_oom(h->json, json_flags); + } + + /* Let's add in the following automatisms only for regular users, they don't make sense for any others */ + if (user_record_disposition(h) != USER_REGULAR) + return 0; + + if (!h->home_directory && !h->home_directory_auto) { + h->home_directory_auto = path_join("/home/", h->user_name); + if (!h->home_directory_auto) + return json_log_oom(h->json, json_flags); + } + + if (!h->image_path && !h->image_path_auto) { + r = user_record_build_image_path(user_record_storage(h), user_record_user_name_and_realm(h), &h->image_path_auto); + if (r < 0) + return json_log(h->json, json_flags, r, "Failed to determine default image path: %m"); + } + + return 0; +} + +int user_group_record_mangle( + JsonVariant *v, + UserRecordLoadFlags load_flags, + JsonVariant **ret_variant, + UserRecordMask *ret_mask) { + + static const struct { + UserRecordMask mask; + const char *name; + } mask_field[] = { + { USER_RECORD_PRIVILEGED, "privileged" }, + { USER_RECORD_SECRET, "secret" }, + { USER_RECORD_BINDING, "binding" }, + { USER_RECORD_PER_MACHINE, "perMachine" }, + { USER_RECORD_STATUS, "status" }, + { USER_RECORD_SIGNATURE, "signature" }, + }; + + JsonDispatchFlags json_flags = USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(load_flags); + _cleanup_(json_variant_unrefp) JsonVariant *w = NULL; + JsonVariant *array[ELEMENTSOF(mask_field) * 2]; + size_t n_retain = 0, i; + UserRecordMask m = 0; + int r; + + assert((load_flags & _USER_RECORD_MASK_MAX) == 0); /* detect mistakes when accidentally passing + * UserRecordMask bit masks as UserRecordLoadFlags + * value */ + + assert(v); + assert(ret_variant); + assert(ret_mask); + + /* Note that this function is shared with the group record parser, hence we try to be generic in our + * log message wording here, to cover both cases. */ + + if (!json_variant_is_object(v)) + return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record is not a JSON object, refusing."); + + if (USER_RECORD_ALLOW_MASK(load_flags) == 0) /* allow nothing? */ + return json_log(v, json_flags, SYNTHETIC_ERRNO(EINVAL), "Nothing allowed in record, refusing."); + + if (USER_RECORD_STRIP_MASK(load_flags) == _USER_RECORD_MASK_MAX) /* strip everything? */ + return json_log(v, json_flags, SYNTHETIC_ERRNO(EINVAL), "Stripping everything from record, refusing."); + + /* Check if we have the special sections and if they match our flags set */ + for (i = 0; i < ELEMENTSOF(mask_field); i++) { + JsonVariant *e, *k; + + if (FLAGS_SET(USER_RECORD_STRIP_MASK(load_flags), mask_field[i].mask)) { + if (!w) + w = json_variant_ref(v); + + r = json_variant_filter(&w, STRV_MAKE(mask_field[i].name)); + if (r < 0) + return json_log(w, json_flags, r, "Failed to remove field from variant: %m"); + + continue; + } + + e = json_variant_by_key_full(v, mask_field[i].name, &k); + if (e) { + if (!FLAGS_SET(USER_RECORD_ALLOW_MASK(load_flags), mask_field[i].mask)) + return json_log(e, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record contains '%s' field, which is not allowed.", mask_field[i].name); + + if (FLAGS_SET(load_flags, USER_RECORD_STRIP_REGULAR)) { + array[n_retain++] = k; + array[n_retain++] = e; + } + + m |= mask_field[i].mask; + } else { + if (FLAGS_SET(USER_RECORD_REQUIRE_MASK(load_flags), mask_field[i].mask)) + return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record lacks '%s' field, which is required.", mask_field[i].name); + } + } + + if (FLAGS_SET(load_flags, USER_RECORD_STRIP_REGULAR)) { + /* If we are supposed to strip regular items, then let's instead just allocate a new object + * with just the stuff we need. */ + + w = json_variant_unref(w); + r = json_variant_new_object(&w, array, n_retain); + if (r < 0) + return json_log(v, json_flags, r, "Failed to allocate new object: %m"); + } else { + /* And now check if there's anything else in the record */ + for (i = 0; i < json_variant_elements(v); i += 2) { + const char *f; + bool special = false; + size_t j; + + assert_se(f = json_variant_string(json_variant_by_index(v, i))); + + for (j = 0; j < ELEMENTSOF(mask_field); j++) + if (streq(f, mask_field[j].name)) { /* already covered in the loop above */ + special = true; + continue; + } + + if (!special) { + if ((load_flags & (USER_RECORD_ALLOW_REGULAR|USER_RECORD_REQUIRE_REGULAR)) == 0) + return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record contains '%s' field, which is not allowed.", f); + + m |= USER_RECORD_REGULAR; + break; + } + } + } + + if (FLAGS_SET(load_flags, USER_RECORD_REQUIRE_REGULAR) && !FLAGS_SET(m, USER_RECORD_REGULAR)) + return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record lacks basic identity fields, which are required."); + + if (m == 0) + return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record is empty."); + + if (w) + *ret_variant = TAKE_PTR(w); + else + *ret_variant = json_variant_ref(v); + + *ret_mask = m; + return 0; +} + +int user_record_load(UserRecord *h, JsonVariant *v, UserRecordLoadFlags load_flags) { + + static const JsonDispatch user_dispatch_table[] = { + { "userName", JSON_VARIANT_STRING, json_dispatch_user_group_name, offsetof(UserRecord, user_name), JSON_RELAX}, + { "realm", JSON_VARIANT_STRING, json_dispatch_realm, offsetof(UserRecord, realm), 0 }, + { "realName", JSON_VARIANT_STRING, json_dispatch_gecos, offsetof(UserRecord, real_name), 0 }, + { "emailAddress", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, email_address), JSON_SAFE }, + { "iconName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, icon_name), JSON_SAFE }, + { "location", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, location), 0 }, + { "disposition", JSON_VARIANT_STRING, json_dispatch_user_disposition, offsetof(UserRecord, disposition), 0 }, + { "lastChangeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_change_usec), 0 }, + { "lastPasswordChangeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_password_change_usec), 0 }, + { "shell", JSON_VARIANT_STRING, json_dispatch_filename_or_path, offsetof(UserRecord, shell), 0 }, + { "umask", JSON_VARIANT_UNSIGNED, json_dispatch_umask, offsetof(UserRecord, umask), 0 }, + { "environment", JSON_VARIANT_ARRAY, json_dispatch_environment, offsetof(UserRecord, environment), 0 }, + { "timeZone", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, time_zone), JSON_SAFE }, + { "preferredLanguage", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, preferred_language), JSON_SAFE }, + { "niceLevel", _JSON_VARIANT_TYPE_INVALID, json_dispatch_nice, offsetof(UserRecord, nice_level), 0 }, + { "resourceLimits", _JSON_VARIANT_TYPE_INVALID, json_dispatch_rlimits, offsetof(UserRecord, rlimits), 0 }, + { "locked", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, locked), 0 }, + { "notBeforeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_before_usec), 0 }, + { "notAfterUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_after_usec), 0 }, + { "storage", JSON_VARIANT_STRING, json_dispatch_storage, offsetof(UserRecord, storage), 0 }, + { "diskSize", JSON_VARIANT_UNSIGNED, json_dispatch_disk_size, offsetof(UserRecord, disk_size), 0 }, + { "diskSizeRelative", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_size_relative), 0 }, + { "skeletonDirectory", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, skeleton_directory), 0 }, + { "accessMode", JSON_VARIANT_UNSIGNED, json_dispatch_access_mode, offsetof(UserRecord, access_mode), 0 }, + { "tasksMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, tasks_max), 0 }, + { "memoryHigh", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_high), 0 }, + { "memoryMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_max), 0 }, + { "cpuWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, cpu_weight), 0 }, + { "ioWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, io_weight), 0 }, + { "mountNoDevices", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nodev), 0 }, + { "mountNoSuid", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nosuid), 0 }, + { "mountNoExecute", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, noexec), 0 }, + { "cifsDomain", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_domain), JSON_SAFE }, + { "cifsUserName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_user_name), JSON_SAFE }, + { "cifsService", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_service), JSON_SAFE }, + { "imagePath", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, image_path), 0 }, + { "homeDirectory", JSON_VARIANT_STRING, json_dispatch_home_directory, offsetof(UserRecord, home_directory), 0 }, + { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, uid), 0 }, + { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, gid), 0 }, + { "memberOf", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(UserRecord, member_of), JSON_RELAX}, + { "fileSystemType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, file_system_type), JSON_SAFE }, + { "partitionUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, partition_uuid), 0 }, + { "luksUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, luks_uuid), 0 }, + { "fileSystemUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, file_system_uuid), 0 }, + { "luksDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_discard), 0 }, + { "luksOfflineDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_offline_discard), 0 }, + { "luksCipher", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher), JSON_SAFE }, + { "luksCipherMode", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher_mode), JSON_SAFE }, + { "luksVolumeKeySize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_volume_key_size), 0 }, + { "luksPbkdfHashAlgorithm", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_hash_algorithm), JSON_SAFE }, + { "luksPbkdfType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_type), JSON_SAFE }, + { "luksPbkdfTimeCostUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_time_cost_usec), 0 }, + { "luksPbkdfMemoryCost", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_memory_cost), 0 }, + { "luksPbkdfParallelThreads", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_parallel_threads), 0 }, + { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, service), JSON_SAFE }, + { "rateLimitIntervalUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_interval_usec), 0 }, + { "rateLimitBurst", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_burst), 0 }, + { "enforcePasswordPolicy", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, enforce_password_policy), 0 }, + { "autoLogin", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, auto_login), 0 }, + { "stopDelayUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, stop_delay_usec), 0 }, + { "killProcesses", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, kill_processes), 0 }, + { "passwordChangeMinUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_min_usec), 0 }, + { "passwordChangeMaxUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_max_usec), 0 }, + { "passwordChangeWarnUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_warn_usec), 0 }, + { "passwordChangeInactiveUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_inactive_usec), 0 }, + { "passwordChangeNow", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, password_change_now), 0 }, + { "pkcs11TokenUri", JSON_VARIANT_ARRAY, dispatch_pkcs11_uri_array, offsetof(UserRecord, pkcs11_token_uri), 0 }, + { "fido2HmacCredential", JSON_VARIANT_ARRAY, dispatch_fido2_hmac_credential_array, 0, 0 }, + { "recoveryKeyType", JSON_VARIANT_ARRAY, json_dispatch_strv, offsetof(UserRecord, recovery_key_type), 0 }, + + { "secret", JSON_VARIANT_OBJECT, dispatch_secret, 0, 0 }, + { "privileged", JSON_VARIANT_OBJECT, dispatch_privileged, 0, 0 }, + + /* Ignore the perMachine, binding, status stuff here, and process it later, so that it overrides whatever is set above */ + { "perMachine", JSON_VARIANT_ARRAY, NULL, 0, 0 }, + { "binding", JSON_VARIANT_OBJECT, NULL, 0, 0 }, + { "status", JSON_VARIANT_OBJECT, NULL, 0, 0 }, + + /* Ignore 'signature', we check it with explicit accessors instead */ + { "signature", JSON_VARIANT_ARRAY, NULL, 0, 0 }, + {}, + }; + + JsonDispatchFlags json_flags = USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(load_flags); + int r; + + assert(h); + assert(!h->json); + + /* Note that this call will leave a half-initialized record around on failure! */ + + r = user_group_record_mangle(v, load_flags, &h->json, &h->mask); + if (r < 0) + return r; + + r = json_dispatch(h->json, user_dispatch_table, NULL, json_flags, h); + if (r < 0) + return r; + + /* During the parsing operation above we ignored the 'perMachine', 'binding' and 'status' fields, + * since we want them to override the global options. Let's process them now. */ + + r = dispatch_per_machine("perMachine", json_variant_by_key(h->json, "perMachine"), json_flags, h); + if (r < 0) + return r; + + r = dispatch_binding("binding", json_variant_by_key(h->json, "binding"), json_flags, h); + if (r < 0) + return r; + + r = dispatch_status("status", json_variant_by_key(h->json, "status"), json_flags, h); + if (r < 0) + return r; + + if (FLAGS_SET(h->mask, USER_RECORD_REGULAR) && !h->user_name) + return json_log(h->json, json_flags, SYNTHETIC_ERRNO(EINVAL), "User name field missing, refusing."); + + r = user_record_augment(h, json_flags); + if (r < 0) + return r; + + return 0; +} + +int user_record_build(UserRecord **ret, ...) { + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + _cleanup_(user_record_unrefp) UserRecord *u = NULL; + va_list ap; + int r; + + assert(ret); + + va_start(ap, ret); + r = json_buildv(&v, ap); + va_end(ap); + + if (r < 0) + return r; + + u = user_record_new(); + if (!u) + return -ENOMEM; + + r = user_record_load(u, v, USER_RECORD_LOAD_FULL); + if (r < 0) + return r; + + *ret = TAKE_PTR(u); + return 0; +} + +const char *user_record_user_name_and_realm(UserRecord *h) { + assert(h); + + /* Return the pre-initialized joined string if it is defined */ + if (h->user_name_and_realm_auto) + return h->user_name_and_realm_auto; + + /* If it's not defined then we cannot have a realm */ + assert(!h->realm); + return h->user_name; +} + +UserStorage user_record_storage(UserRecord *h) { + assert(h); + + if (h->storage >= 0) + return h->storage; + + return USER_CLASSIC; +} + +const char *user_record_file_system_type(UserRecord *h) { + assert(h); + + return h->file_system_type ?: "btrfs"; +} + +const char *user_record_skeleton_directory(UserRecord *h) { + assert(h); + + return h->skeleton_directory ?: "/etc/skel"; +} + +mode_t user_record_access_mode(UserRecord *h) { + assert(h); + + return h->access_mode != (mode_t) -1 ? h->access_mode : 0700; +} + +const char* user_record_home_directory(UserRecord *h) { + assert(h); + + if (h->home_directory) + return h->home_directory; + if (h->home_directory_auto) + return h->home_directory_auto; + + /* The root user is special, hence be special about it */ + if (streq_ptr(h->user_name, "root")) + return "/root"; + + return "/"; +} + +const char *user_record_image_path(UserRecord *h) { + assert(h); + + if (h->image_path) + return h->image_path; + if (h->image_path_auto) + return h->image_path_auto; + + return IN_SET(user_record_storage(h), USER_CLASSIC, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT) ? user_record_home_directory(h) : NULL; +} + +const char *user_record_cifs_user_name(UserRecord *h) { + assert(h); + + return h->cifs_user_name ?: h->user_name; +} + +unsigned long user_record_mount_flags(UserRecord *h) { + assert(h); + + return (h->nosuid ? MS_NOSUID : 0) | + (h->noexec ? MS_NOEXEC : 0) | + (h->nodev ? MS_NODEV : 0); +} + +const char *user_record_shell(UserRecord *h) { + assert(h); + + if (h->shell) + return h->shell; + + if (streq_ptr(h->user_name, "root")) + return "/bin/sh"; + + if (user_record_disposition(h) == USER_REGULAR) + return "/bin/bash"; + + return NOLOGIN; +} + +const char *user_record_real_name(UserRecord *h) { + assert(h); + + return h->real_name ?: h->user_name; +} + +bool user_record_luks_discard(UserRecord *h) { + const char *ip; + + assert(h); + + if (h->luks_discard >= 0) + return h->luks_discard; + + ip = user_record_image_path(h); + if (!ip) + return false; + + /* Use discard by default if we are referring to a real block device, but not when operating on a + * loopback device. We want to optimize for SSD and flash storage after all, but we should be careful + * when storing stuff on top of regular file systems in loopback files as doing discard then would + * mean thin provisioning and we should not do that willy-nilly since it means we'll risk EIO later + * on should the disk space to back our file systems not be available. */ + + return path_startswith(ip, "/dev/"); +} + +bool user_record_luks_offline_discard(UserRecord *h) { + const char *ip; + + assert(h); + + if (h->luks_offline_discard >= 0) + return h->luks_offline_discard; + + /* Discard while we are logged out should generally be a good idea, except when operating directly on + * physical media, where we should just bind it to the online discard mode. */ + + ip = user_record_image_path(h); + if (!ip) + return false; + + if (path_startswith(ip, "/dev/")) + return user_record_luks_discard(h); + + return true; +} + +const char *user_record_luks_cipher(UserRecord *h) { + assert(h); + + return h->luks_cipher ?: "aes"; +} + +const char *user_record_luks_cipher_mode(UserRecord *h) { + assert(h); + + return h->luks_cipher_mode ?: "xts-plain64"; +} + +uint64_t user_record_luks_volume_key_size(UserRecord *h) { + assert(h); + + /* We return a value here that can be cast without loss into size_t which is what libcrypsetup expects */ + + if (h->luks_volume_key_size == UINT64_MAX) + return 256 / 8; + + return MIN(h->luks_volume_key_size, SIZE_MAX); +} + +const char* user_record_luks_pbkdf_type(UserRecord *h) { + assert(h); + + return h->luks_pbkdf_type ?: "argon2i"; +} + +uint64_t user_record_luks_pbkdf_time_cost_usec(UserRecord *h) { + assert(h); + + /* Returns a value with ms granularity, since that's what libcryptsetup expects */ + + if (h->luks_pbkdf_time_cost_usec == UINT64_MAX) + return 500 * USEC_PER_MSEC; /* We default to 500ms, in contrast to libcryptsetup's 2s, which is just awfully slow on every login */ + + return MIN(DIV_ROUND_UP(h->luks_pbkdf_time_cost_usec, USEC_PER_MSEC), UINT32_MAX) * USEC_PER_MSEC; +} + +uint64_t user_record_luks_pbkdf_memory_cost(UserRecord *h) { + assert(h); + + /* Returns a value with kb granularity, since that's what libcryptsetup expects */ + + if (h->luks_pbkdf_memory_cost == UINT64_MAX) + return 64*1024*1024; /* We default to 64M, since this should work on smaller systems too */ + + return MIN(DIV_ROUND_UP(h->luks_pbkdf_memory_cost, 1024), UINT32_MAX) * 1024; +} + +uint64_t user_record_luks_pbkdf_parallel_threads(UserRecord *h) { + assert(h); + + if (h->luks_pbkdf_memory_cost == UINT64_MAX) + return 1; /* We default to 1, since this should work on smaller systems too */ + + return MIN(h->luks_pbkdf_parallel_threads, UINT32_MAX); +} + +const char *user_record_luks_pbkdf_hash_algorithm(UserRecord *h) { + assert(h); + + return h->luks_pbkdf_hash_algorithm ?: "sha512"; +} + +gid_t user_record_gid(UserRecord *h) { + assert(h); + + if (gid_is_valid(h->gid)) + return h->gid; + + return (gid_t) h->uid; +} + +UserDisposition user_record_disposition(UserRecord *h) { + assert(h); + + if (h->disposition >= 0) + return h->disposition; + + /* If not declared, derive from UID */ + + if (!uid_is_valid(h->uid)) + return _USER_DISPOSITION_INVALID; + + if (h->uid == 0 || h->uid == UID_NOBODY) + return USER_INTRINSIC; + + if (uid_is_system(h->uid)) + return USER_SYSTEM; + + if (uid_is_dynamic(h->uid)) + return USER_DYNAMIC; + + if (uid_is_container(h->uid)) + return USER_CONTAINER; + + if (h->uid > INT32_MAX) + return USER_RESERVED; + + return USER_REGULAR; +} + +int user_record_removable(UserRecord *h) { + UserStorage storage; + assert(h); + + if (h->removable >= 0) + return h->removable; + + /* Refuse to decide for classic records */ + storage = user_record_storage(h); + if (h->storage < 0 || h->storage == USER_CLASSIC) + return -1; + + /* For now consider only LUKS home directories with a reference by path as removable */ + return storage == USER_LUKS && path_startswith(user_record_image_path(h), "/dev/"); +} + +uint64_t user_record_ratelimit_interval_usec(UserRecord *h) { + assert(h); + + if (h->ratelimit_interval_usec == UINT64_MAX) + return DEFAULT_RATELIMIT_INTERVAL_USEC; + + return h->ratelimit_interval_usec; +} + +uint64_t user_record_ratelimit_burst(UserRecord *h) { + assert(h); + + if (h->ratelimit_burst == UINT64_MAX) + return DEFAULT_RATELIMIT_BURST; + + return h->ratelimit_burst; +} + +bool user_record_can_authenticate(UserRecord *h) { + assert(h); + + /* Returns true if there's some form of property configured that the user can authenticate against */ + + if (h->n_pkcs11_encrypted_key > 0) + return true; + + if (h->n_fido2_hmac_salt > 0) + return true; + + return !strv_isempty(h->hashed_password); +} + +uint64_t user_record_ratelimit_next_try(UserRecord *h) { + assert(h); + + /* Calculates when the it's possible to login next. Returns: + * + * UINT64_MAX → Nothing known + * 0 → Right away + * Any other → Next time in CLOCK_REALTIME in usec (which could be in the past) + */ + + if (h->ratelimit_begin_usec == UINT64_MAX || + h->ratelimit_count == UINT64_MAX) + return UINT64_MAX; + + if (h->ratelimit_begin_usec > now(CLOCK_REALTIME)) /* If the ratelimit time is in the future, then + * the local clock is probably incorrect. Let's + * not refuse login then. */ + return UINT64_MAX; + + if (h->ratelimit_count < user_record_ratelimit_burst(h)) + return 0; + + return usec_add(h->ratelimit_begin_usec, user_record_ratelimit_interval_usec(h)); +} + +bool user_record_equal(UserRecord *a, UserRecord *b) { + assert(a); + assert(b); + + /* We assume that when a record is modified its JSON data is updated at the same time, hence it's + * sufficient to compare the JSON data. */ + + return json_variant_equal(a->json, b->json); +} + +bool user_record_compatible(UserRecord *a, UserRecord *b) { + assert(a); + assert(b); + + /* If either lacks the regular section, we can't really decide, let's hence say they are + * incompatible. */ + if (!(a->mask & b->mask & USER_RECORD_REGULAR)) + return false; + + return streq_ptr(a->user_name, b->user_name) && + streq_ptr(a->realm, b->realm); +} + +int user_record_compare_last_change(UserRecord *a, UserRecord *b) { + assert(a); + assert(b); + + if (a->last_change_usec == b->last_change_usec) + return 0; + + /* Always consider a record with a timestamp newer than one without */ + if (a->last_change_usec == UINT64_MAX) + return -1; + if (b->last_change_usec == UINT64_MAX) + return 1; + + return CMP(a->last_change_usec, b->last_change_usec); +} + +int user_record_clone(UserRecord *h, UserRecordLoadFlags flags, UserRecord **ret) { + _cleanup_(user_record_unrefp) UserRecord *c = NULL; + int r; + + assert(h); + assert(ret); + + c = user_record_new(); + if (!c) + return -ENOMEM; + + r = user_record_load(c, h->json, flags); + if (r < 0) + return r; + + *ret = TAKE_PTR(c); + return 0; +} + +int user_record_masked_equal(UserRecord *a, UserRecord *b, UserRecordMask mask) { + _cleanup_(user_record_unrefp) UserRecord *x = NULL, *y = NULL; + int r; + + assert(a); + assert(b); + + /* Compares the two records, but ignores anything not listed in the specified mask */ + + if ((a->mask & ~mask) != 0) { + r = user_record_clone(a, USER_RECORD_ALLOW(mask) | USER_RECORD_STRIP(~mask & _USER_RECORD_MASK_MAX), &x); + if (r < 0) + return r; + + a = x; + } + + if ((b->mask & ~mask) != 0) { + r = user_record_clone(b, USER_RECORD_ALLOW(mask) | USER_RECORD_STRIP(~mask & _USER_RECORD_MASK_MAX), &y); + if (r < 0) + return r; + + b = y; + } + + return user_record_equal(a, b); +} + +int user_record_test_blocked(UserRecord *h) { + usec_t n; + + /* Checks whether access to the specified user shall be allowed at the moment. Returns: + * + * -ESTALE: Record is from the future + * -ENOLCK: Record is blocked + * -EL2HLT: Record is not valid yet + * -EL3HLT: Record is not valid anymore + * + */ + + assert(h); + + if (h->locked > 0) + return -ENOLCK; + + n = now(CLOCK_REALTIME); + + if (h->not_before_usec != UINT64_MAX && n < h->not_before_usec) + return -EL2HLT; + if (h->not_after_usec != UINT64_MAX && n > h->not_after_usec) + return -EL3HLT; + + if (h->last_change_usec != UINT64_MAX && + h->last_change_usec > n) /* Complain during log-ins when the record is from the future */ + return -ESTALE; + + return 0; +} + +int user_record_test_password_change_required(UserRecord *h) { + bool change_permitted; + usec_t n; + + assert(h); + + /* Checks whether the user must change the password when logging in + + -EKEYREVOKED: Change password now because admin said so + -EOWNERDEAD: Change password now because it expired + -EKEYREJECTED: Password is expired, no changing is allowed + -EKEYEXPIRED: Password is about to expire, warn user + -ENETDOWN: Record has expiration info but no password change timestamp + -EROFS: No password change required nor permitted + -ESTALE: RTC likely incorrect, last password change is in the future + 0: No password change required, but permitted + */ + + /* If a password change request has been set explicitly, it overrides everything */ + if (h->password_change_now > 0) + return -EKEYREVOKED; + + n = now(CLOCK_REALTIME); + + /* Password change in the future? Then our RTC is likely incorrect */ + if (h->last_password_change_usec != UINT64_MAX && + h->last_password_change_usec > n && + (h->password_change_min_usec != UINT64_MAX || + h->password_change_max_usec != UINT64_MAX || + h->password_change_inactive_usec != UINT64_MAX)) + return -ESTALE; + + /* Then, let's check if password changing is currently allowed at all */ + if (h->password_change_min_usec != UINT64_MAX) { + + /* Expiry configured but no password change timestamp known? */ + if (h->last_password_change_usec == UINT64_MAX) + return -ENETDOWN; + + if (h->password_change_min_usec >= UINT64_MAX - h->last_password_change_usec) + change_permitted = false; + else + change_permitted = n >= h->last_password_change_usec + h->password_change_min_usec; + + } else + change_permitted = true; + + /* Let's check whether the password has expired. */ + if (!(h->password_change_max_usec == UINT64_MAX || + h->password_change_max_usec >= UINT64_MAX - h->last_password_change_usec)) { + + uint64_t change_before; + + /* Expiry configured but no password change timestamp known? */ + if (h->last_password_change_usec == UINT64_MAX) + return -ENETDOWN; + + /* Password is in inactive phase? */ + if (h->password_change_inactive_usec != UINT64_MAX && + h->password_change_inactive_usec < UINT64_MAX - h->password_change_max_usec) { + usec_t added; + + added = h->password_change_inactive_usec + h->password_change_max_usec; + if (added < UINT64_MAX - h->last_password_change_usec && + n >= h->last_password_change_usec + added) + return -EKEYREJECTED; + } + + /* Password needs to be changed now? */ + change_before = h->last_password_change_usec + h->password_change_max_usec; + if (n >= change_before) + return change_permitted ? -EOWNERDEAD : -EKEYREJECTED; + + /* Warn user? */ + if (h->password_change_warn_usec != UINT64_MAX && + (change_before < h->password_change_warn_usec || + n >= change_before - h->password_change_warn_usec)) + return change_permitted ? -EKEYEXPIRED : -EROFS; + } + + /* No password changing necessary */ + return change_permitted ? 0 : -EROFS; +} + +static const char* const user_storage_table[_USER_STORAGE_MAX] = { + [USER_CLASSIC] = "classic", + [USER_LUKS] = "luks", + [USER_DIRECTORY] = "directory", + [USER_SUBVOLUME] = "subvolume", + [USER_FSCRYPT] = "fscrypt", + [USER_CIFS] = "cifs", +}; + +DEFINE_STRING_TABLE_LOOKUP(user_storage, UserStorage); + +static const char* const user_disposition_table[_USER_DISPOSITION_MAX] = { + [USER_INTRINSIC] = "intrinsic", + [USER_SYSTEM] = "system", + [USER_DYNAMIC] = "dynamic", + [USER_REGULAR] = "regular", + [USER_CONTAINER] = "container", + [USER_RESERVED] = "reserved", +}; + +DEFINE_STRING_TABLE_LOOKUP(user_disposition, UserDisposition); diff --git a/src/shared/user-record.h b/src/shared/user-record.h new file mode 100644 index 0000000..542a0dc --- /dev/null +++ b/src/shared/user-record.h @@ -0,0 +1,444 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <inttypes.h> +#include <sys/types.h> + +#include "sd-id128.h" + +#include "json.h" +#include "missing_resource.h" +#include "time-util.h" + +/* But some limits on disk sizes: not less than 5M, not more than 5T */ +#define USER_DISK_SIZE_MIN (UINT64_C(5)*1024*1024) +#define USER_DISK_SIZE_MAX (UINT64_C(5)*1024*1024*1024*1024) + +/* The default disk size to use when nothing else is specified, relative to free disk space */ +#define USER_DISK_SIZE_DEFAULT_PERCENT 85 + +bool uid_is_system(uid_t uid); +bool gid_is_system(gid_t gid); + +static inline bool uid_is_dynamic(uid_t uid) { + return DYNAMIC_UID_MIN <= uid && uid <= DYNAMIC_UID_MAX; +} + +static inline bool gid_is_dynamic(gid_t gid) { + return uid_is_dynamic((uid_t) gid); +} + +static inline bool uid_is_container(uid_t uid) { + return CONTAINER_UID_BASE_MIN <= uid && uid <= CONTAINER_UID_BASE_MAX; +} + +static inline bool gid_is_container(gid_t gid) { + return uid_is_container((uid_t) gid); +} + +typedef struct UGIDAllocationRange { + uid_t system_alloc_uid_min; + uid_t system_uid_max; + gid_t system_alloc_gid_min; + gid_t system_gid_max; +} UGIDAllocationRange; + +int read_login_defs(UGIDAllocationRange *ret_defs, const char *path, const char *root); +const UGIDAllocationRange *acquire_ugid_allocation_range(void); + +typedef enum UserDisposition { + USER_INTRINSIC, /* root and nobody */ + USER_SYSTEM, /* statically allocated users for system services */ + USER_DYNAMIC, /* dynamically allocated users for system services */ + USER_REGULAR, /* regular (typically human users) */ + USER_CONTAINER, /* UID ranges allocated for container uses */ + USER_RESERVED, /* Range above 2^31 */ + _USER_DISPOSITION_MAX, + _USER_DISPOSITION_INVALID = -1, +} UserDisposition; + +typedef enum UserHomeStorage { + USER_CLASSIC, + USER_LUKS, + USER_DIRECTORY, /* A directory, and a .identity file in it, which USER_CLASSIC lacks */ + USER_SUBVOLUME, + USER_FSCRYPT, + USER_CIFS, + _USER_STORAGE_MAX, + _USER_STORAGE_INVALID = -1 +} UserStorage; + +typedef enum UserRecordMask { + /* The various sections an identity record may have, as bit mask */ + USER_RECORD_REGULAR = 1U << 0, + USER_RECORD_SECRET = 1U << 1, + USER_RECORD_PRIVILEGED = 1U << 2, + USER_RECORD_PER_MACHINE = 1U << 3, + USER_RECORD_BINDING = 1U << 4, + USER_RECORD_STATUS = 1U << 5, + USER_RECORD_SIGNATURE = 1U << 6, + _USER_RECORD_MASK_MAX = (1U << 7)-1 +} UserRecordMask; + +typedef enum UserRecordLoadFlags { + /* A set of flags used while loading a user record from JSON data. We leave the lower 6 bits free, + * just as a safety precaution so that we can detect borked conversions between UserRecordMask and + * UserRecordLoadFlags. */ + + /* What to require */ + USER_RECORD_REQUIRE_REGULAR = USER_RECORD_REGULAR << 7, + USER_RECORD_REQUIRE_SECRET = USER_RECORD_SECRET << 7, + USER_RECORD_REQUIRE_PRIVILEGED = USER_RECORD_PRIVILEGED << 7, + USER_RECORD_REQUIRE_PER_MACHINE = USER_RECORD_PER_MACHINE << 7, + USER_RECORD_REQUIRE_BINDING = USER_RECORD_BINDING << 7, + USER_RECORD_REQUIRE_STATUS = USER_RECORD_STATUS << 7, + USER_RECORD_REQUIRE_SIGNATURE = USER_RECORD_SIGNATURE << 7, + + /* What to allow */ + USER_RECORD_ALLOW_REGULAR = USER_RECORD_REGULAR << 14, + USER_RECORD_ALLOW_SECRET = USER_RECORD_SECRET << 14, + USER_RECORD_ALLOW_PRIVILEGED = USER_RECORD_PRIVILEGED << 14, + USER_RECORD_ALLOW_PER_MACHINE = USER_RECORD_PER_MACHINE << 14, + USER_RECORD_ALLOW_BINDING = USER_RECORD_BINDING << 14, + USER_RECORD_ALLOW_STATUS = USER_RECORD_STATUS << 14, + USER_RECORD_ALLOW_SIGNATURE = USER_RECORD_SIGNATURE << 14, + + /* What to strip */ + USER_RECORD_STRIP_REGULAR = USER_RECORD_REGULAR << 21, + USER_RECORD_STRIP_SECRET = USER_RECORD_SECRET << 21, + USER_RECORD_STRIP_PRIVILEGED = USER_RECORD_PRIVILEGED << 21, + USER_RECORD_STRIP_PER_MACHINE = USER_RECORD_PER_MACHINE << 21, + USER_RECORD_STRIP_BINDING = USER_RECORD_BINDING << 21, + USER_RECORD_STRIP_STATUS = USER_RECORD_STATUS << 21, + USER_RECORD_STRIP_SIGNATURE = USER_RECORD_SIGNATURE << 21, + + /* Some special combinations that deserve explicit names */ + USER_RECORD_LOAD_FULL = USER_RECORD_REQUIRE_REGULAR | + USER_RECORD_ALLOW_SECRET | + USER_RECORD_ALLOW_PRIVILEGED | + USER_RECORD_ALLOW_PER_MACHINE | + USER_RECORD_ALLOW_BINDING | + USER_RECORD_ALLOW_STATUS | + USER_RECORD_ALLOW_SIGNATURE, + + USER_RECORD_LOAD_REFUSE_SECRET = USER_RECORD_REQUIRE_REGULAR | + USER_RECORD_ALLOW_PRIVILEGED | + USER_RECORD_ALLOW_PER_MACHINE | + USER_RECORD_ALLOW_BINDING | + USER_RECORD_ALLOW_STATUS | + USER_RECORD_ALLOW_SIGNATURE, + + USER_RECORD_LOAD_MASK_SECRET = USER_RECORD_REQUIRE_REGULAR | + USER_RECORD_ALLOW_PRIVILEGED | + USER_RECORD_ALLOW_PER_MACHINE | + USER_RECORD_ALLOW_BINDING | + USER_RECORD_ALLOW_STATUS | + USER_RECORD_ALLOW_SIGNATURE | + USER_RECORD_STRIP_SECRET, + + USER_RECORD_EXTRACT_SECRET = USER_RECORD_REQUIRE_SECRET | + USER_RECORD_STRIP_REGULAR | + USER_RECORD_STRIP_PRIVILEGED | + USER_RECORD_STRIP_PER_MACHINE | + USER_RECORD_STRIP_BINDING | + USER_RECORD_STRIP_STATUS | + USER_RECORD_STRIP_SIGNATURE, + + USER_RECORD_LOAD_SIGNABLE = USER_RECORD_REQUIRE_REGULAR | + USER_RECORD_ALLOW_PRIVILEGED | + USER_RECORD_ALLOW_PER_MACHINE, + + USER_RECORD_EXTRACT_SIGNABLE = USER_RECORD_LOAD_SIGNABLE | + USER_RECORD_STRIP_SECRET | + USER_RECORD_STRIP_BINDING | + USER_RECORD_STRIP_STATUS | + USER_RECORD_STRIP_SIGNATURE, + + USER_RECORD_LOAD_EMBEDDED = USER_RECORD_REQUIRE_REGULAR | + USER_RECORD_ALLOW_PRIVILEGED | + USER_RECORD_ALLOW_PER_MACHINE | + USER_RECORD_ALLOW_SIGNATURE, + + USER_RECORD_EXTRACT_EMBEDDED = USER_RECORD_LOAD_EMBEDDED | + USER_RECORD_STRIP_SECRET | + USER_RECORD_STRIP_BINDING | + USER_RECORD_STRIP_STATUS, + + /* Whether to log about loader errors beyond LOG_DEBUG */ + USER_RECORD_LOG = 1U << 28, + + /* Whether to ignore errors and load what we can */ + USER_RECORD_PERMISSIVE = 1U << 29, +} UserRecordLoadFlags; + +static inline UserRecordLoadFlags USER_RECORD_REQUIRE(UserRecordMask m) { + assert((m & ~_USER_RECORD_MASK_MAX) == 0); + return m << 7; +} + +static inline UserRecordLoadFlags USER_RECORD_ALLOW(UserRecordMask m) { + assert((m & ~_USER_RECORD_MASK_MAX) == 0); + return m << 14; +} + +static inline UserRecordLoadFlags USER_RECORD_STRIP(UserRecordMask m) { + assert((m & ~_USER_RECORD_MASK_MAX) == 0); + return m << 21; +} + +static inline UserRecordMask USER_RECORD_REQUIRE_MASK(UserRecordLoadFlags f) { + return (f >> 7) & _USER_RECORD_MASK_MAX; +} + +static inline UserRecordMask USER_RECORD_ALLOW_MASK(UserRecordLoadFlags f) { + return ((f >> 14) & _USER_RECORD_MASK_MAX) | USER_RECORD_REQUIRE_MASK(f); +} + +static inline UserRecordMask USER_RECORD_STRIP_MASK(UserRecordLoadFlags f) { + return (f >> 21) & _USER_RECORD_MASK_MAX; +} + +static inline JsonDispatchFlags USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(UserRecordLoadFlags flags) { + return (FLAGS_SET(flags, USER_RECORD_LOG) ? JSON_LOG : 0) | + (FLAGS_SET(flags, USER_RECORD_PERMISSIVE) ? JSON_PERMISSIVE : 0); +} + +typedef struct Pkcs11EncryptedKey { + /* The encrypted passphrase, which can be decrypted with the private key indicated below */ + void *data; + size_t size; + + /* Where to find the private key to decrypt the encrypted passphrase above */ + char *uri; + + /* What to test the decrypted passphrase against to allow access (classic UNIX password hash). Note + * that the decrypted passphrase is also used for unlocking LUKS and fscrypt, and if the account is + * backed by LUKS or fscrypt the hashed password is only an additional layer of authentication, not + * the only. */ + char *hashed_password; +} Pkcs11EncryptedKey; + +typedef struct Fido2HmacCredential { + void *id; + size_t size; +} Fido2HmacCredential; + +typedef struct Fido2HmacSalt { + /* The FIDO2 Cridential ID to use */ + Fido2HmacCredential credential; + + /* The FIDO2 salt value */ + void *salt; + size_t salt_size; + + /* What to test the hashed salt value against, usually UNIX password hash here. */ + char *hashed_password; +} Fido2HmacSalt; + +typedef struct RecoveryKey { + /* The type of recovery key, must be "modhex64" right now */ + char *type; + + /* A UNIX password hash of the normalized form of modhex64 */ + char *hashed_password; +} RecoveryKey; + +typedef struct UserRecord { + /* The following three fields are not part of the JSON record */ + unsigned n_ref; + UserRecordMask mask; + bool incomplete; /* incomplete due to security restrictions. */ + + char *user_name; + char *realm; + char *user_name_and_realm_auto; /* the user_name field concatenated with '@' and the realm, if the latter is defined */ + char *real_name; + char *email_address; + char *password_hint; + char *icon_name; + char *location; + + UserDisposition disposition; + uint64_t last_change_usec; + uint64_t last_password_change_usec; + + char *shell; + mode_t umask; + char **environment; + char *time_zone; + char *preferred_language; + int nice_level; + struct rlimit *rlimits[_RLIMIT_MAX]; + + int locked; /* prohibit activation in general */ + uint64_t not_before_usec; /* prohibit activation before this unix time */ + uint64_t not_after_usec; /* prohibit activation after this unix time */ + + UserStorage storage; + uint64_t disk_size; + uint64_t disk_size_relative; /* Disk size, relative to the free bytes of the medium, normalized to UINT32_MAX = 100% */ + char *skeleton_directory; + mode_t access_mode; + + uint64_t tasks_max; + uint64_t memory_high; + uint64_t memory_max; + uint64_t cpu_weight; + uint64_t io_weight; + + bool nosuid; + bool nodev; + bool noexec; + + char **hashed_password; + char **ssh_authorized_keys; + char **password; + char **token_pin; + + char *cifs_domain; + char *cifs_user_name; + char *cifs_service; + + char *image_path; + char *image_path_auto; /* when none is configured explicitly, this is where we place the implicit image */ + char *home_directory; + char *home_directory_auto; /* when none is set explicitly, this is where we place the implicit home directory */ + + uid_t uid; + gid_t gid; + + char **member_of; + + char *file_system_type; + sd_id128_t partition_uuid; + sd_id128_t luks_uuid; + sd_id128_t file_system_uuid; + + int luks_discard; + int luks_offline_discard; + char *luks_cipher; + char *luks_cipher_mode; + uint64_t luks_volume_key_size; + char *luks_pbkdf_hash_algorithm; + char *luks_pbkdf_type; + uint64_t luks_pbkdf_time_cost_usec; + uint64_t luks_pbkdf_memory_cost; + uint64_t luks_pbkdf_parallel_threads; + + uint64_t disk_usage; + uint64_t disk_free; + uint64_t disk_ceiling; + uint64_t disk_floor; + + char *state; + char *service; + int signed_locally; + + uint64_t good_authentication_counter; + uint64_t bad_authentication_counter; + uint64_t last_good_authentication_usec; + uint64_t last_bad_authentication_usec; + + uint64_t ratelimit_begin_usec; + uint64_t ratelimit_count; + uint64_t ratelimit_interval_usec; + uint64_t ratelimit_burst; + + int removable; + int enforce_password_policy; + int auto_login; + + uint64_t stop_delay_usec; /* How long to leave systemd --user around on log-out */ + int kill_processes; /* Whether to kill user processes forcibly on log-out */ + + /* The following exist mostly so that we can cover the full /etc/shadow set of fields */ + uint64_t password_change_min_usec; /* maps to .sp_min */ + uint64_t password_change_max_usec; /* maps to .sp_max */ + uint64_t password_change_warn_usec; /* maps to .sp_warn */ + uint64_t password_change_inactive_usec; /* maps to .sp_inact */ + int password_change_now; /* Require a password change immediately on next login (.sp_lstchg = 0) */ + + char **pkcs11_token_uri; + Pkcs11EncryptedKey *pkcs11_encrypted_key; + size_t n_pkcs11_encrypted_key; + int pkcs11_protected_authentication_path_permitted; + + Fido2HmacCredential *fido2_hmac_credential; + size_t n_fido2_hmac_credential; + Fido2HmacSalt *fido2_hmac_salt; + size_t n_fido2_hmac_salt; + int fido2_user_presence_permitted; + + char **recovery_key_type; + RecoveryKey *recovery_key; + size_t n_recovery_key; + + JsonVariant *json; +} UserRecord; + +UserRecord* user_record_new(void); +UserRecord* user_record_ref(UserRecord *h); +UserRecord* user_record_unref(UserRecord *h); + +DEFINE_TRIVIAL_CLEANUP_FUNC(UserRecord*, user_record_unref); + +int user_record_load(UserRecord *h, JsonVariant *v, UserRecordLoadFlags flags); +int user_record_build(UserRecord **ret, ...); + +const char *user_record_user_name_and_realm(UserRecord *h); +UserStorage user_record_storage(UserRecord *h); +const char *user_record_file_system_type(UserRecord *h); +const char *user_record_skeleton_directory(UserRecord *h); +mode_t user_record_access_mode(UserRecord *h); +const char *user_record_home_directory(UserRecord *h); +const char *user_record_image_path(UserRecord *h); +unsigned long user_record_mount_flags(UserRecord *h); +const char *user_record_cifs_user_name(UserRecord *h); +const char *user_record_shell(UserRecord *h); +const char *user_record_real_name(UserRecord *h); +bool user_record_luks_discard(UserRecord *h); +bool user_record_luks_offline_discard(UserRecord *h); +const char *user_record_luks_cipher(UserRecord *h); +const char *user_record_luks_cipher_mode(UserRecord *h); +uint64_t user_record_luks_volume_key_size(UserRecord *h); +const char* user_record_luks_pbkdf_type(UserRecord *h); +usec_t user_record_luks_pbkdf_time_cost_usec(UserRecord *h); +uint64_t user_record_luks_pbkdf_memory_cost(UserRecord *h); +uint64_t user_record_luks_pbkdf_parallel_threads(UserRecord *h); +const char *user_record_luks_pbkdf_hash_algorithm(UserRecord *h); +gid_t user_record_gid(UserRecord *h); +UserDisposition user_record_disposition(UserRecord *h); +int user_record_removable(UserRecord *h); +usec_t user_record_ratelimit_interval_usec(UserRecord *h); +uint64_t user_record_ratelimit_burst(UserRecord *h); +bool user_record_can_authenticate(UserRecord *h); + +int user_record_build_image_path(UserStorage storage, const char *user_name_and_realm, char **ret); + +bool user_record_equal(UserRecord *a, UserRecord *b); +bool user_record_compatible(UserRecord *a, UserRecord *b); +int user_record_compare_last_change(UserRecord *a, UserRecord *b); + +usec_t user_record_ratelimit_next_try(UserRecord *h); + +int user_record_clone(UserRecord *h, UserRecordLoadFlags flags, UserRecord **ret); +int user_record_masked_equal(UserRecord *a, UserRecord *b, UserRecordMask mask); + +int user_record_test_blocked(UserRecord *h); +int user_record_test_password_change_required(UserRecord *h); + +/* The following six are user by group-record.c, that's why we export them here */ +int json_dispatch_realm(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_gecos(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_user_group_list(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_user_disposition(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); + +int per_machine_id_match(JsonVariant *ids, JsonDispatchFlags flags); +int per_machine_hostname_match(JsonVariant *hns, JsonDispatchFlags flags); +int user_group_record_mangle(JsonVariant *v, UserRecordLoadFlags load_flags, JsonVariant **ret_variant, UserRecordMask *ret_mask); + +const char* user_storage_to_string(UserStorage t) _const_; +UserStorage user_storage_from_string(const char *s) _pure_; + +const char* user_disposition_to_string(UserDisposition t) _const_; +UserDisposition user_disposition_from_string(const char *s) _pure_; diff --git a/src/shared/userdb.c b/src/shared/userdb.c new file mode 100644 index 0000000..2d48028 --- /dev/null +++ b/src/shared/userdb.c @@ -0,0 +1,1249 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/auxv.h> + +#include "dirent-util.h" +#include "dlfcn-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "missing_syscall.h" +#include "parse-util.h" +#include "set.h" +#include "socket-util.h" +#include "strv.h" +#include "user-record-nss.h" +#include "user-util.h" +#include "userdb.h" +#include "varlink.h" + +DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(link_hash_ops, void, trivial_hash_func, trivial_compare_func, Varlink, varlink_unref); + +typedef enum LookupWhat { + LOOKUP_USER, + LOOKUP_GROUP, + LOOKUP_MEMBERSHIP, + _LOOKUP_WHAT_MAX, +} LookupWhat; + +struct UserDBIterator { + LookupWhat what; + Set *links; + bool nss_covered:1; + bool nss_iterating:1; + bool synthesize_root:1; + bool synthesize_nobody:1; + bool nss_systemd_blocked:1; + int error; + unsigned n_found; + sd_event *event; + UserRecord *found_user; /* when .what == LOOKUP_USER */ + GroupRecord *found_group; /* when .what == LOOKUP_GROUP */ + + char *found_user_name, *found_group_name; /* when .what == LOOKUP_MEMBERSHIP */ + char **members_of_group; + size_t index_members_of_group; + char *filter_user_name; +}; + +UserDBIterator* userdb_iterator_free(UserDBIterator *iterator) { + if (!iterator) + return NULL; + + set_free(iterator->links); + + switch (iterator->what) { + + case LOOKUP_USER: + user_record_unref(iterator->found_user); + + if (iterator->nss_iterating) + endpwent(); + + break; + + case LOOKUP_GROUP: + group_record_unref(iterator->found_group); + + if (iterator->nss_iterating) + endgrent(); + + break; + + case LOOKUP_MEMBERSHIP: + free(iterator->found_user_name); + free(iterator->found_group_name); + strv_free(iterator->members_of_group); + free(iterator->filter_user_name); + + if (iterator->nss_iterating) + endgrent(); + + break; + + default: + assert_not_reached("Unexpected state?"); + } + + sd_event_unref(iterator->event); + + if (iterator->nss_systemd_blocked) + assert_se(userdb_block_nss_systemd(false) >= 0); + + return mfree(iterator); +} + +static UserDBIterator* userdb_iterator_new(LookupWhat what) { + UserDBIterator *i; + + assert(what >= 0); + assert(what < _LOOKUP_WHAT_MAX); + + i = new(UserDBIterator, 1); + if (!i) + return NULL; + + *i = (UserDBIterator) { + .what = what, + }; + + return i; +} + +static int userdb_iterator_block_nss_systemd(UserDBIterator *iterator) { + int r; + + assert(iterator); + + if (iterator->nss_systemd_blocked) + return 0; + + r = userdb_block_nss_systemd(true); + if (r < 0) + return r; + + iterator->nss_systemd_blocked = true; + return 1; +} + +struct user_group_data { + JsonVariant *record; + bool incomplete; +}; + +static void user_group_data_release(struct user_group_data *d) { + json_variant_unref(d->record); +} + +static int userdb_on_query_reply( + Varlink *link, + JsonVariant *parameters, + const char *error_id, + VarlinkReplyFlags flags, + void *userdata) { + + UserDBIterator *iterator = userdata; + int r; + + assert(iterator); + + if (error_id) { + log_debug("Got lookup error: %s", error_id); + + if (STR_IN_SET(error_id, + "io.systemd.UserDatabase.NoRecordFound", + "io.systemd.UserDatabase.ConflictingRecordFound")) + r = -ESRCH; + else if (streq(error_id, "io.systemd.UserDatabase.ServiceNotAvailable")) + r = -EHOSTDOWN; + else if (streq(error_id, "io.systemd.UserDatabase.EnumerationNotSupported")) + r = -EOPNOTSUPP; + else if (streq(error_id, VARLINK_ERROR_TIMEOUT)) + r = -ETIMEDOUT; + else + r = -EIO; + + goto finish; + } + + switch (iterator->what) { + + case LOOKUP_USER: { + _cleanup_(user_group_data_release) struct user_group_data user_data = {}; + + static const JsonDispatch dispatch_table[] = { + { "record", _JSON_VARIANT_TYPE_INVALID, json_dispatch_variant, offsetof(struct user_group_data, record), 0 }, + { "incomplete", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(struct user_group_data, incomplete), 0 }, + {} + }; + _cleanup_(user_record_unrefp) UserRecord *hr = NULL; + + assert_se(!iterator->found_user); + + r = json_dispatch(parameters, dispatch_table, NULL, 0, &user_data); + if (r < 0) + goto finish; + + if (!user_data.record) { + r = log_debug_errno(SYNTHETIC_ERRNO(EIO), "Reply is missing record key"); + goto finish; + } + + hr = user_record_new(); + if (!hr) { + r = -ENOMEM; + goto finish; + } + + r = user_record_load(hr, user_data.record, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_PERMISSIVE); + if (r < 0) + goto finish; + + if (!hr->service) { + r = log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "User record does not carry service information, refusing."); + goto finish; + } + + hr->incomplete = user_data.incomplete; + + /* We match the root user by the name since the name is our primary key. We match the nobody + * use by UID though, since the name might differ on OSes */ + if (streq_ptr(hr->user_name, "root")) + iterator->synthesize_root = false; + if (hr->uid == UID_NOBODY) + iterator->synthesize_nobody = false; + + iterator->found_user = TAKE_PTR(hr); + iterator->n_found++; + + /* More stuff coming? then let's just exit cleanly here */ + if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES)) + return 0; + + /* Otherwise, let's remove this link and exit cleanly then */ + r = 0; + goto finish; + } + + case LOOKUP_GROUP: { + _cleanup_(user_group_data_release) struct user_group_data group_data = {}; + + static const JsonDispatch dispatch_table[] = { + { "record", _JSON_VARIANT_TYPE_INVALID, json_dispatch_variant, offsetof(struct user_group_data, record), 0 }, + { "incomplete", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(struct user_group_data, incomplete), 0 }, + {} + }; + _cleanup_(group_record_unrefp) GroupRecord *g = NULL; + + assert_se(!iterator->found_group); + + r = json_dispatch(parameters, dispatch_table, NULL, 0, &group_data); + if (r < 0) + goto finish; + + if (!group_data.record) { + r = log_debug_errno(SYNTHETIC_ERRNO(EIO), "Reply is missing record key"); + goto finish; + } + + g = group_record_new(); + if (!g) { + r = -ENOMEM; + goto finish; + } + + r = group_record_load(g, group_data.record, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_PERMISSIVE); + if (r < 0) + goto finish; + + if (!g->service) { + r = log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Group record does not carry service information, refusing."); + goto finish; + } + + g->incomplete = group_data.incomplete; + + if (streq_ptr(g->group_name, "root")) + iterator->synthesize_root = false; + if (g->gid == GID_NOBODY) + iterator->synthesize_nobody = false; + + iterator->found_group = TAKE_PTR(g); + iterator->n_found++; + + if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES)) + return 0; + + r = 0; + goto finish; + } + + case LOOKUP_MEMBERSHIP: { + struct membership_data { + const char *user_name; + const char *group_name; + } membership_data = {}; + + static const JsonDispatch dispatch_table[] = { + { "userName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(struct membership_data, user_name), JSON_SAFE }, + { "groupName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(struct membership_data, group_name), JSON_SAFE }, + {} + }; + + assert(!iterator->found_user_name); + assert(!iterator->found_group_name); + + r = json_dispatch(parameters, dispatch_table, NULL, 0, &membership_data); + if (r < 0) + goto finish; + + iterator->found_user_name = mfree(iterator->found_user_name); + iterator->found_group_name = mfree(iterator->found_group_name); + + iterator->found_user_name = strdup(membership_data.user_name); + if (!iterator->found_user_name) { + r = -ENOMEM; + goto finish; + } + + iterator->found_group_name = strdup(membership_data.group_name); + if (!iterator->found_group_name) { + r = -ENOMEM; + goto finish; + } + + iterator->n_found++; + + if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES)) + return 0; + + r = 0; + goto finish; + } + + default: + assert_not_reached("unexpected lookup"); + } + +finish: + /* If we got one ESRCH, let that win. This way when we do a wild dump we won't be tripped up by bad + * errors if at least one connection ended cleanly */ + if (r == -ESRCH || iterator->error == 0) + iterator->error = -r; + + assert_se(set_remove(iterator->links, link) == link); + link = varlink_unref(link); + return 0; +} + +static int userdb_connect( + UserDBIterator *iterator, + const char *path, + const char *method, + bool more, + JsonVariant *query) { + + _cleanup_(varlink_unrefp) Varlink *vl = NULL; + int r; + + assert(iterator); + assert(path); + assert(method); + + r = varlink_connect_address(&vl, path); + if (r < 0) + return log_debug_errno(r, "Unable to connect to %s: %m", path); + + varlink_set_userdata(vl, iterator); + + if (!iterator->event) { + r = sd_event_new(&iterator->event); + if (r < 0) + return log_debug_errno(r, "Unable to allocate event loop: %m"); + } + + r = varlink_attach_event(vl, iterator->event, SD_EVENT_PRIORITY_NORMAL); + if (r < 0) + return log_debug_errno(r, "Failed to attach varlink connection to event loop: %m"); + + (void) varlink_set_description(vl, path); + + r = varlink_bind_reply(vl, userdb_on_query_reply); + if (r < 0) + return log_debug_errno(r, "Failed to bind reply callback: %m"); + + if (more) + r = varlink_observe(vl, method, query); + else + r = varlink_invoke(vl, method, query); + if (r < 0) + return log_debug_errno(r, "Failed to invoke varlink method: %m"); + + r = set_ensure_consume(&iterator->links, &link_hash_ops, TAKE_PTR(vl)); + if (r < 0) + return log_debug_errno(r, "Failed to add varlink connection to set: %m"); + return r; +} + +static int userdb_start_query( + UserDBIterator *iterator, + const char *method, + bool more, + JsonVariant *query, + UserDBFlags flags) { + + _cleanup_(strv_freep) char **except = NULL, **only = NULL; + _cleanup_(closedirp) DIR *d = NULL; + struct dirent *de; + const char *e; + int r, ret = 0; + + assert(iterator); + assert(method); + + e = getenv("SYSTEMD_BYPASS_USERDB"); + if (e) { + r = parse_boolean(e); + if (r > 0) + return -ENOLINK; + if (r < 0) { + except = strv_split(e, ":"); + if (!except) + return -ENOMEM; + } + } + + e = getenv("SYSTEMD_ONLY_USERDB"); + if (e) { + only = strv_split(e, ":"); + if (!only) + return -ENOMEM; + } + + /* First, let's talk to the multiplexer, if we can */ + if ((flags & (USERDB_AVOID_MULTIPLEXER|USERDB_AVOID_DYNAMIC_USER|USERDB_AVOID_NSS|USERDB_DONT_SYNTHESIZE)) == 0 && + !strv_contains(except, "io.systemd.Multiplexer") && + (!only || strv_contains(only, "io.systemd.Multiplexer"))) { + _cleanup_(json_variant_unrefp) JsonVariant *patched_query = json_variant_ref(query); + + r = json_variant_set_field_string(&patched_query, "service", "io.systemd.Multiplexer"); + if (r < 0) + return log_debug_errno(r, "Unable to set service JSON field: %m"); + + r = userdb_connect(iterator, "/run/systemd/userdb/io.systemd.Multiplexer", method, more, patched_query); + if (r >= 0) { + iterator->nss_covered = true; /* The multiplexer does NSS */ + return 0; + } + } + + d = opendir("/run/systemd/userdb/"); + if (!d) { + if (errno == ENOENT) + return -ESRCH; + + return -errno; + } + + FOREACH_DIRENT(de, d, return -errno) { + _cleanup_(json_variant_unrefp) JsonVariant *patched_query = NULL; + _cleanup_free_ char *p = NULL; + bool is_nss; + + if (streq(de->d_name, "io.systemd.Multiplexer")) /* We already tried this above, don't try this again */ + continue; + + if (FLAGS_SET(flags, USERDB_AVOID_DYNAMIC_USER) && + streq(de->d_name, "io.systemd.DynamicUser")) + continue; + + /* Avoid NSS is this is requested. Note that we also skip NSS when we were asked to skip the + * multiplexer, since in that case it's safer to do NSS in the client side emulation below + * (and when we run as part of systemd-userdbd.service we don't want to talk to ourselves + * anyway). */ + is_nss = streq(de->d_name, "io.systemd.NameServiceSwitch"); + if ((flags & (USERDB_AVOID_NSS|USERDB_AVOID_MULTIPLEXER)) && is_nss) + continue; + + if (strv_contains(except, de->d_name)) + continue; + + if (only && !strv_contains(only, de->d_name)) + continue; + + p = path_join("/run/systemd/userdb/", de->d_name); + if (!p) + return -ENOMEM; + + patched_query = json_variant_ref(query); + r = json_variant_set_field_string(&patched_query, "service", de->d_name); + if (r < 0) + return log_debug_errno(r, "Unable to set service JSON field: %m"); + + r = userdb_connect(iterator, p, method, more, patched_query); + if (is_nss && r >= 0) /* Turn off fallback NSS if we found the NSS service and could connect + * to it */ + iterator->nss_covered = true; + + if (ret == 0 && r < 0) + ret = r; + } + + if (set_isempty(iterator->links)) + return ret; /* propagate last error we saw if we couldn't connect to anything. */ + + /* We connected to some services, in this case, ignore the ones we failed on */ + return 0; +} + +static int userdb_process( + UserDBIterator *iterator, + UserRecord **ret_user_record, + GroupRecord **ret_group_record, + char **ret_user_name, + char **ret_group_name) { + + int r; + + assert(iterator); + + for (;;) { + if (iterator->what == LOOKUP_USER && iterator->found_user) { + if (ret_user_record) + *ret_user_record = TAKE_PTR(iterator->found_user); + else + iterator->found_user = user_record_unref(iterator->found_user); + + if (ret_group_record) + *ret_group_record = NULL; + if (ret_user_name) + *ret_user_name = NULL; + if (ret_group_name) + *ret_group_name = NULL; + + return 0; + } + + if (iterator->what == LOOKUP_GROUP && iterator->found_group) { + if (ret_group_record) + *ret_group_record = TAKE_PTR(iterator->found_group); + else + iterator->found_group = group_record_unref(iterator->found_group); + + if (ret_user_record) + *ret_user_record = NULL; + if (ret_user_name) + *ret_user_name = NULL; + if (ret_group_name) + *ret_group_name = NULL; + + return 0; + } + + if (iterator->what == LOOKUP_MEMBERSHIP && iterator->found_user_name && iterator->found_group_name) { + if (ret_user_name) + *ret_user_name = TAKE_PTR(iterator->found_user_name); + else + iterator->found_user_name = mfree(iterator->found_user_name); + + if (ret_group_name) + *ret_group_name = TAKE_PTR(iterator->found_group_name); + else + iterator->found_group_name = mfree(iterator->found_group_name); + + if (ret_user_record) + *ret_user_record = NULL; + if (ret_group_record) + *ret_group_record = NULL; + + return 0; + } + + if (set_isempty(iterator->links)) { + if (iterator->error == 0) + return -ESRCH; + + return -abs(iterator->error); + } + + if (!iterator->event) + return -ESRCH; + + r = sd_event_run(iterator->event, UINT64_MAX); + if (r < 0) + return r; + } +} + +static int synthetic_root_user_build(UserRecord **ret) { + return user_record_build( + ret, + JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING("root")), + JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(0)), + JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(0)), + JSON_BUILD_PAIR("homeDirectory", JSON_BUILD_STRING("/root")), + JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic")))); +} + +static int synthetic_nobody_user_build(UserRecord **ret) { + return user_record_build( + ret, + JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(NOBODY_USER_NAME)), + JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(UID_NOBODY)), + JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(GID_NOBODY)), + JSON_BUILD_PAIR("shell", JSON_BUILD_STRING(NOLOGIN)), + JSON_BUILD_PAIR("locked", JSON_BUILD_BOOLEAN(true)), + JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic")))); +} + +int userdb_by_name(const char *name, UserDBFlags flags, UserRecord **ret) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + _cleanup_(json_variant_unrefp) JsonVariant *query = NULL; + int r; + + if (!valid_user_group_name(name, VALID_USER_RELAX)) + return -EINVAL; + + r = json_build(&query, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(name)))); + if (r < 0) + return r; + + iterator = userdb_iterator_new(LOOKUP_USER); + if (!iterator) + return -ENOMEM; + + r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetUserRecord", false, query, flags); + if (r >= 0) { + r = userdb_process(iterator, ret, NULL, NULL, NULL); + if (r >= 0) + return r; + } + + if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !iterator->nss_covered) { + /* Make sure the NSS lookup doesn't recurse back to us. */ + + r = userdb_iterator_block_nss_systemd(iterator); + if (r >= 0) { + /* Client-side NSS fallback */ + r = nss_user_record_by_name(name, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret); + if (r >= 0) + return r; + } + } + + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) { + if (streq(name, "root")) + return synthetic_root_user_build(ret); + + if (streq(name, NOBODY_USER_NAME) && synthesize_nobody()) + return synthetic_nobody_user_build(ret); + } + + return r; +} + +int userdb_by_uid(uid_t uid, UserDBFlags flags, UserRecord **ret) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + _cleanup_(json_variant_unrefp) JsonVariant *query = NULL; + int r; + + if (!uid_is_valid(uid)) + return -EINVAL; + + r = json_build(&query, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(uid)))); + if (r < 0) + return r; + + iterator = userdb_iterator_new(LOOKUP_USER); + if (!iterator) + return -ENOMEM; + + r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetUserRecord", false, query, flags); + if (r >= 0) { + r = userdb_process(iterator, ret, NULL, NULL, NULL); + if (r >= 0) + return r; + } + + if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !iterator->nss_covered) { + r = userdb_iterator_block_nss_systemd(iterator); + if (r >= 0) { + /* Client-side NSS fallback */ + r = nss_user_record_by_uid(uid, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret); + if (r >= 0) + return r; + } + } + + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) { + if (uid == 0) + return synthetic_root_user_build(ret); + + if (uid == UID_NOBODY && synthesize_nobody()) + return synthetic_nobody_user_build(ret); + } + + return r; +} + +int userdb_all(UserDBFlags flags, UserDBIterator **ret) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + int r; + + assert(ret); + + iterator = userdb_iterator_new(LOOKUP_USER); + if (!iterator) + return -ENOMEM; + + iterator->synthesize_root = iterator->synthesize_nobody = !FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE); + + r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetUserRecord", true, NULL, flags); + + if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && (r < 0 || !iterator->nss_covered)) { + r = userdb_iterator_block_nss_systemd(iterator); + if (r < 0) + return r; + + setpwent(); + iterator->nss_iterating = true; + } else if (r < 0) + return r; + + *ret = TAKE_PTR(iterator); + return 0; +} + +int userdb_iterator_get(UserDBIterator *iterator, UserRecord **ret) { + int r; + + assert(iterator); + assert(iterator->what == LOOKUP_USER); + + if (iterator->nss_iterating) { + struct passwd *pw; + + /* If NSS isn't covered elsewhere, let's iterate through it first, since it probably contains + * the more traditional sources, which are probably good to show first. */ + + pw = getpwent(); + if (pw) { + _cleanup_free_ char *buffer = NULL; + bool incomplete = false; + struct spwd spwd; + + if (streq_ptr(pw->pw_name, "root")) + iterator->synthesize_root = false; + if (pw->pw_uid == UID_NOBODY) + iterator->synthesize_nobody = false; + + r = nss_spwd_for_passwd(pw, &spwd, &buffer); + if (r < 0) { + log_debug_errno(r, "Failed to acquire shadow entry for user %s, ignoring: %m", pw->pw_name); + incomplete = ERRNO_IS_PRIVILEGE(r); + } + + r = nss_passwd_to_user_record(pw, r >= 0 ? &spwd : NULL, ret); + if (r < 0) + return r; + + if (ret) + (*ret)->incomplete = incomplete; + return r; + } + + if (errno != 0) + log_debug_errno(errno, "Failure to iterate NSS user database, ignoring: %m"); + + iterator->nss_iterating = false; + endpwent(); + } + + r = userdb_process(iterator, ret, NULL, NULL, NULL); + + if (r < 0) { + if (iterator->synthesize_root) { + iterator->synthesize_root = false; + iterator->n_found++; + return synthetic_root_user_build(ret); + } + + if (iterator->synthesize_nobody) { + iterator->synthesize_nobody = false; + iterator->n_found++; + return synthetic_nobody_user_build(ret); + } + } + + /* if we found at least one entry, then ignore errors and indicate that we reached the end */ + if (r < 0 && iterator->n_found > 0) + return -ESRCH; + + return r; +} + +static int synthetic_root_group_build(GroupRecord **ret) { + return group_record_build( + ret, + JSON_BUILD_OBJECT(JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING("root")), + JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(0)), + JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic")))); +} + +static int synthetic_nobody_group_build(GroupRecord **ret) { + return group_record_build( + ret, + JSON_BUILD_OBJECT(JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(NOBODY_GROUP_NAME)), + JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(GID_NOBODY)), + JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic")))); +} + +int groupdb_by_name(const char *name, UserDBFlags flags, GroupRecord **ret) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + _cleanup_(json_variant_unrefp) JsonVariant *query = NULL; + int r; + + if (!valid_user_group_name(name, VALID_USER_RELAX)) + return -EINVAL; + + r = json_build(&query, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(name)))); + if (r < 0) + return r; + + iterator = userdb_iterator_new(LOOKUP_GROUP); + if (!iterator) + return -ENOMEM; + + r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetGroupRecord", false, query, flags); + if (r >= 0) { + r = userdb_process(iterator, NULL, ret, NULL, NULL); + if (r >= 0) + return r; + } + + if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !(iterator && iterator->nss_covered)) { + r = userdb_iterator_block_nss_systemd(iterator); + if (r >= 0) { + r = nss_group_record_by_name(name, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret); + if (r >= 0) + return r; + } + } + + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) { + if (streq(name, "root")) + return synthetic_root_group_build(ret); + + if (streq(name, NOBODY_GROUP_NAME) && synthesize_nobody()) + return synthetic_nobody_group_build(ret); + } + + return r; +} + +int groupdb_by_gid(gid_t gid, UserDBFlags flags, GroupRecord **ret) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + _cleanup_(json_variant_unrefp) JsonVariant *query = NULL; + int r; + + if (!gid_is_valid(gid)) + return -EINVAL; + + r = json_build(&query, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(gid)))); + if (r < 0) + return r; + + iterator = userdb_iterator_new(LOOKUP_GROUP); + if (!iterator) + return -ENOMEM; + + r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetGroupRecord", false, query, flags); + if (r >= 0) { + r = userdb_process(iterator, NULL, ret, NULL, NULL); + if (r >= 0) + return r; + } + + if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !(iterator && iterator->nss_covered)) { + r = userdb_iterator_block_nss_systemd(iterator); + if (r >= 0) { + r = nss_group_record_by_gid(gid, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret); + if (r >= 0) + return r; + } + } + + if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) { + if (gid == 0) + return synthetic_root_group_build(ret); + + if (gid == GID_NOBODY && synthesize_nobody()) + return synthetic_nobody_group_build(ret); + } + + return r; +} + +int groupdb_all(UserDBFlags flags, UserDBIterator **ret) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + int r; + + assert(ret); + + iterator = userdb_iterator_new(LOOKUP_GROUP); + if (!iterator) + return -ENOMEM; + + iterator->synthesize_root = iterator->synthesize_nobody = !FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE); + + r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetGroupRecord", true, NULL, flags); + + if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && (r < 0 || !iterator->nss_covered)) { + r = userdb_iterator_block_nss_systemd(iterator); + if (r < 0) + return r; + + setgrent(); + iterator->nss_iterating = true; + } if (r < 0) + return r; + + *ret = TAKE_PTR(iterator); + return 0; +} + +int groupdb_iterator_get(UserDBIterator *iterator, GroupRecord **ret) { + int r; + + assert(iterator); + assert(iterator->what == LOOKUP_GROUP); + + if (iterator->nss_iterating) { + struct group *gr; + + errno = 0; + gr = getgrent(); + if (gr) { + _cleanup_free_ char *buffer = NULL; + bool incomplete = false; + struct sgrp sgrp; + + if (streq_ptr(gr->gr_name, "root")) + iterator->synthesize_root = false; + if (gr->gr_gid == GID_NOBODY) + iterator->synthesize_nobody = false; + + r = nss_sgrp_for_group(gr, &sgrp, &buffer); + if (r < 0) { + log_debug_errno(r, "Failed to acquire shadow entry for group %s, ignoring: %m", gr->gr_name); + incomplete = ERRNO_IS_PRIVILEGE(r); + } + + r = nss_group_to_group_record(gr, r >= 0 ? &sgrp : NULL, ret); + if (r < 0) + return r; + + if (ret) + (*ret)->incomplete = incomplete; + return r; + } + + if (errno != 0) + log_debug_errno(errno, "Failure to iterate NSS group database, ignoring: %m"); + + iterator->nss_iterating = false; + endgrent(); + } + + r = userdb_process(iterator, NULL, ret, NULL, NULL); + if (r < 0) { + if (iterator->synthesize_root) { + iterator->synthesize_root = false; + iterator->n_found++; + return synthetic_root_group_build(ret); + } + + if (iterator->synthesize_nobody) { + iterator->synthesize_nobody = false; + iterator->n_found++; + return synthetic_nobody_group_build(ret); + } + } + + /* if we found at least one entry, then ignore errors and indicate that we reached the end */ + if (r < 0 && iterator->n_found > 0) + return -ESRCH; + + return r; +} + +int membershipdb_by_user(const char *name, UserDBFlags flags, UserDBIterator **ret) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + _cleanup_(json_variant_unrefp) JsonVariant *query = NULL; + int r; + + assert(ret); + + if (!valid_user_group_name(name, VALID_USER_RELAX)) + return -EINVAL; + + r = json_build(&query, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(name)))); + if (r < 0) + return r; + + iterator = userdb_iterator_new(LOOKUP_MEMBERSHIP); + if (!iterator) + return -ENOMEM; + + r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetMemberships", true, query, flags); + if ((r >= 0 && iterator->nss_covered) || FLAGS_SET(flags, USERDB_AVOID_NSS)) + goto finish; + + r = userdb_iterator_block_nss_systemd(iterator); + if (r < 0) + return r; + + iterator->filter_user_name = strdup(name); + if (!iterator->filter_user_name) + return -ENOMEM; + + setgrent(); + iterator->nss_iterating = true; + + r = 0; + +finish: + if (r >= 0) + *ret = TAKE_PTR(iterator); + return r; +} + +int membershipdb_by_group(const char *name, UserDBFlags flags, UserDBIterator **ret) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + _cleanup_(json_variant_unrefp) JsonVariant *query = NULL; + _cleanup_(group_record_unrefp) GroupRecord *gr = NULL; + int r; + + assert(ret); + + if (!valid_user_group_name(name, VALID_USER_RELAX)) + return -EINVAL; + + r = json_build(&query, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(name)))); + if (r < 0) + return r; + + iterator = userdb_iterator_new(LOOKUP_MEMBERSHIP); + if (!iterator) + return -ENOMEM; + + r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetMemberships", true, query, flags); + if ((r >= 0 && iterator->nss_covered) || FLAGS_SET(flags, USERDB_AVOID_NSS)) + goto finish; + + r = userdb_iterator_block_nss_systemd(iterator); + if (r < 0) + return r; + + /* We ignore all errors here, since the group might be defined by a userdb native service, and we queried them already above. */ + (void) nss_group_record_by_name(name, false, &gr); + if (gr) { + iterator->members_of_group = strv_copy(gr->members); + if (!iterator->members_of_group) + return -ENOMEM; + + iterator->index_members_of_group = 0; + + iterator->found_group_name = strdup(name); + if (!iterator->found_group_name) + return -ENOMEM; + } + + r = 0; + +finish: + if (r >= 0) + *ret = TAKE_PTR(iterator); + + return r; +} + +int membershipdb_all(UserDBFlags flags, UserDBIterator **ret) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + int r; + + assert(ret); + + iterator = userdb_iterator_new(LOOKUP_MEMBERSHIP); + if (!iterator) + return -ENOMEM; + + r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetMemberships", true, NULL, flags); + if ((r >= 0 && iterator->nss_covered) || FLAGS_SET(flags, USERDB_AVOID_NSS)) + goto finish; + + r = userdb_iterator_block_nss_systemd(iterator); + if (r < 0) + return r; + + setgrent(); + iterator->nss_iterating = true; + + r = 0; + +finish: + if (r >= 0) + *ret = TAKE_PTR(iterator); + + return r; +} + +int membershipdb_iterator_get( + UserDBIterator *iterator, + char **ret_user, + char **ret_group) { + + int r; + + assert(iterator); + + for (;;) { + /* If we are iteratring through NSS acquire a new group entry if we haven't acquired one yet. */ + if (!iterator->members_of_group) { + struct group *g; + + if (!iterator->nss_iterating) + break; + + assert(!iterator->found_user_name); + do { + errno = 0; + g = getgrent(); + if (!g) { + if (errno != 0) + log_debug_errno(errno, "Failure during NSS group iteration, ignoring: %m"); + break; + } + + } while (iterator->filter_user_name ? !strv_contains(g->gr_mem, iterator->filter_user_name) : + strv_isempty(g->gr_mem)); + + if (g) { + r = free_and_strdup(&iterator->found_group_name, g->gr_name); + if (r < 0) + return r; + + if (iterator->filter_user_name) + iterator->members_of_group = strv_new(iterator->filter_user_name); + else + iterator->members_of_group = strv_copy(g->gr_mem); + if (!iterator->members_of_group) + return -ENOMEM; + + iterator->index_members_of_group = 0; + } else { + iterator->nss_iterating = false; + endgrent(); + break; + } + } + + assert(iterator->found_group_name); + assert(iterator->members_of_group); + assert(!iterator->found_user_name); + + if (iterator->members_of_group[iterator->index_members_of_group]) { + _cleanup_free_ char *cu = NULL, *cg = NULL; + + if (ret_user) { + cu = strdup(iterator->members_of_group[iterator->index_members_of_group]); + if (!cu) + return -ENOMEM; + } + + if (ret_group) { + cg = strdup(iterator->found_group_name); + if (!cg) + return -ENOMEM; + } + + if (ret_user) + *ret_user = TAKE_PTR(cu); + + if (ret_group) + *ret_group = TAKE_PTR(cg); + + iterator->index_members_of_group++; + return 0; + } + + iterator->members_of_group = strv_free(iterator->members_of_group); + iterator->found_group_name = mfree(iterator->found_group_name); + } + + r = userdb_process(iterator, NULL, NULL, ret_user, ret_group); + if (r < 0 && iterator->n_found > 0) + return -ESRCH; + + return r; +} + +int membershipdb_by_group_strv(const char *name, UserDBFlags flags, char ***ret) { + _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL; + _cleanup_strv_free_ char **members = NULL; + int r; + + assert(name); + assert(ret); + + r = membershipdb_by_group(name, flags, &iterator); + if (r < 0) + return r; + + for (;;) { + _cleanup_free_ char *user_name = NULL; + + r = membershipdb_iterator_get(iterator, &user_name, NULL); + if (r == -ESRCH) + break; + if (r < 0) + return r; + + r = strv_consume(&members, TAKE_PTR(user_name)); + if (r < 0) + return r; + } + + strv_sort(members); + strv_uniq(members); + + *ret = TAKE_PTR(members); + return 0; +} + +int userdb_block_nss_systemd(int b) { + _cleanup_(dlclosep) void *dl = NULL; + int (*call)(bool b); + + /* Note that we might be called from libnss_systemd.so.2 itself, but that should be fine, really. */ + + dl = dlopen(ROOTLIBDIR "/libnss_systemd.so.2", RTLD_LAZY|RTLD_NODELETE); + if (!dl) { + /* If the file isn't installed, don't complain loudly */ + log_debug("Failed to dlopen(libnss_systemd.so.2), ignoring: %s", dlerror()); + return 0; + } + + call = (int (*)(bool b)) dlsym(dl, "_nss_systemd_block"); + if (!call) + /* If the file is is installed but lacks the symbol we expect, things are weird, let's complain */ + return log_debug_errno(SYNTHETIC_ERRNO(ELIBBAD), + "Unable to find symbol _nss_systemd_block in libnss_systemd.so.2: %s", dlerror()); + + return call(b); +} diff --git a/src/shared/userdb.h b/src/shared/userdb.h new file mode 100644 index 0000000..ee207b5 --- /dev/null +++ b/src/shared/userdb.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <sys/socket.h> +#include <sys/un.h> + +#include "group-record.h" +#include "user-record.h" + +/* Inquire local services for user/group records */ + +typedef struct UserDBIterator UserDBIterator; + +UserDBIterator *userdb_iterator_free(UserDBIterator *iterator); +DEFINE_TRIVIAL_CLEANUP_FUNC(UserDBIterator*, userdb_iterator_free); + +typedef enum UserDBFlags { + USERDB_AVOID_NSS = 1 << 0, /* don't do client-side nor server-side NSS */ + USERDB_AVOID_SHADOW = 1 << 1, /* don't do client-side shadow calls (server side might happen though) */ + USERDB_AVOID_DYNAMIC_USER = 1 << 2, /* exclude looking up in io.systemd.DynamicUser */ + USERDB_AVOID_MULTIPLEXER = 1 << 3, /* exclude looking up via io.systemd.Multiplexer */ + USERDB_DONT_SYNTHESIZE = 1 << 4, /* don't synthesize root/nobody */ +} UserDBFlags; + +int userdb_by_name(const char *name, UserDBFlags flags, UserRecord **ret); +int userdb_by_uid(uid_t uid, UserDBFlags flags, UserRecord **ret); +int userdb_all(UserDBFlags flags, UserDBIterator **ret); +int userdb_iterator_get(UserDBIterator *iterator, UserRecord **ret); + +int groupdb_by_name(const char *name, UserDBFlags flags, GroupRecord **ret); +int groupdb_by_gid(gid_t gid, UserDBFlags flags, GroupRecord **ret); +int groupdb_all(UserDBFlags flags, UserDBIterator **ret); +int groupdb_iterator_get(UserDBIterator *iterator, GroupRecord **ret); + +int membershipdb_by_user(const char *name, UserDBFlags flags, UserDBIterator **ret); +int membershipdb_by_group(const char *name, UserDBFlags flags, UserDBIterator **ret); +int membershipdb_all(UserDBFlags flags, UserDBIterator **ret); +int membershipdb_iterator_get(UserDBIterator *iterator, char **user, char **group); +int membershipdb_by_group_strv(const char *name, UserDBFlags flags, char ***ret); + +int userdb_block_nss_systemd(int b); diff --git a/src/shared/utmp-wtmp.c b/src/shared/utmp-wtmp.c new file mode 100644 index 0000000..b36bc20 --- /dev/null +++ b/src/shared/utmp-wtmp.c @@ -0,0 +1,409 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/time.h> +#include <sys/utsname.h> +#include <unistd.h> +#include <utmpx.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "hostname-util.h" +#include "io-util.h" +#include "macro.h" +#include "memory-util.h" +#include "path-util.h" +#include "string-util.h" +#include "terminal-util.h" +#include "time-util.h" +#include "user-util.h" +#include "utmp-wtmp.h" + +int utmp_get_runlevel(int *runlevel, int *previous) { + _cleanup_(utxent_cleanup) bool utmpx = false; + struct utmpx *found, lookup = { .ut_type = RUN_LVL }; + const char *e; + + assert(runlevel); + + /* If these values are set in the environment this takes + * precedence. Presumably, sysvinit does this to work around a + * race condition that would otherwise exist where we'd always + * go to disk and hence might read runlevel data that might be + * very new and not apply to the current script being executed. */ + + e = getenv("RUNLEVEL"); + if (e && e[0] > 0) { + *runlevel = e[0]; + + if (previous) { + /* $PREVLEVEL seems to be an Upstart thing */ + + e = getenv("PREVLEVEL"); + if (e && e[0] > 0) + *previous = e[0]; + else + *previous = 0; + } + + return 0; + } + + if (utmpxname(_PATH_UTMPX) < 0) + return -errno; + + utmpx = utxent_start(); + + found = getutxid(&lookup); + if (!found) + return -errno; + + *runlevel = found->ut_pid & 0xFF; + if (previous) + *previous = (found->ut_pid >> 8) & 0xFF; + + return 0; +} + +static void init_timestamp(struct utmpx *store, usec_t t) { + assert(store); + + if (t <= 0) + t = now(CLOCK_REALTIME); + + store->ut_tv.tv_sec = t / USEC_PER_SEC; + store->ut_tv.tv_usec = t % USEC_PER_SEC; +} + +static void init_entry(struct utmpx *store, usec_t t) { + struct utsname uts = {}; + + assert(store); + + init_timestamp(store, t); + + if (uname(&uts) >= 0) + strncpy(store->ut_host, uts.release, sizeof(store->ut_host)); + + strncpy(store->ut_line, "~", sizeof(store->ut_line)); /* or ~~ ? */ + strncpy(store->ut_id, "~~", sizeof(store->ut_id)); +} + +static int write_entry_utmp(const struct utmpx *store) { + _cleanup_(utxent_cleanup) bool utmpx = false; + + assert(store); + + /* utmp is similar to wtmp, but there is only one entry for + * each entry type resp. user; i.e. basically a key/value + * table. */ + + if (utmpxname(_PATH_UTMPX) < 0) + return -errno; + + utmpx = utxent_start(); + + if (pututxline(store)) + return 0; + if (errno == ENOENT) { + /* If utmp/wtmp have been disabled, that's a good thing, hence ignore the error. */ + log_debug_errno(errno, "Not writing utmp: %m"); + return 0; + } + return -errno; +} + +static int write_entry_wtmp(const struct utmpx *store) { + assert(store); + + /* wtmp is a simple append-only file where each entry is + * simply appended to the end; i.e. basically a log. */ + + errno = 0; + updwtmpx(_PATH_WTMPX, store); + if (errno == ENOENT) { + /* If utmp/wtmp have been disabled, that's a good thing, hence ignore the error. */ + log_debug_errno(errno, "Not writing wtmp: %m"); + return 0; + } + if (errno == EROFS) { + log_warning_errno(errno, "Failed to write wtmp record, ignoring: %m"); + return 0; + } + return -errno; +} + +static int write_utmp_wtmp(const struct utmpx *store_utmp, const struct utmpx *store_wtmp) { + int r, s; + + r = write_entry_utmp(store_utmp); + s = write_entry_wtmp(store_wtmp); + return r < 0 ? r : s; +} + +static int write_entry_both(const struct utmpx *store) { + return write_utmp_wtmp(store, store); +} + +int utmp_put_shutdown(void) { + struct utmpx store = {}; + + init_entry(&store, 0); + + store.ut_type = RUN_LVL; + strncpy(store.ut_user, "shutdown", sizeof(store.ut_user)); + + return write_entry_both(&store); +} + +int utmp_put_reboot(usec_t t) { + struct utmpx store = {}; + + init_entry(&store, t); + + store.ut_type = BOOT_TIME; + strncpy(store.ut_user, "reboot", sizeof(store.ut_user)); + + return write_entry_both(&store); +} + +static void copy_suffix(char *buf, size_t buf_size, const char *src) { + size_t l; + + l = strlen(src); + if (l < buf_size) + strncpy(buf, src, buf_size); + else + memcpy(buf, src + l - buf_size, buf_size); +} + +int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) { + struct utmpx store = { + .ut_type = INIT_PROCESS, + .ut_pid = pid, + .ut_session = sid, + }; + int r; + + assert(id); + + init_timestamp(&store, 0); + + /* Copy the whole string if it fits, or just the suffix without the terminating NUL. */ + copy_suffix(store.ut_id, sizeof(store.ut_id), id); + + if (line) + strncpy_exact(store.ut_line, line, sizeof(store.ut_line)); + + r = write_entry_both(&store); + if (r < 0) + return r; + + if (IN_SET(ut_type, LOGIN_PROCESS, USER_PROCESS)) { + store.ut_type = LOGIN_PROCESS; + r = write_entry_both(&store); + if (r < 0) + return r; + } + + if (ut_type == USER_PROCESS) { + store.ut_type = USER_PROCESS; + strncpy(store.ut_user, user, sizeof(store.ut_user)-1); + r = write_entry_both(&store); + if (r < 0) + return r; + } + + return 0; +} + +int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) { + struct utmpx lookup = { + .ut_type = INIT_PROCESS /* looks for DEAD_PROCESS, LOGIN_PROCESS, USER_PROCESS, too */ + }, store, store_wtmp, *found; + + assert(id); + + setutxent(); + + /* Copy the whole string if it fits, or just the suffix without the terminating NUL. */ + copy_suffix(store.ut_id, sizeof(store.ut_id), id); + + found = getutxid(&lookup); + if (!found) + return 0; + + if (found->ut_pid != pid) + return 0; + + memcpy(&store, found, sizeof(store)); + store.ut_type = DEAD_PROCESS; + store.ut_exit.e_termination = code; + store.ut_exit.e_exit = status; + + zero(store.ut_user); + zero(store.ut_host); + zero(store.ut_tv); + + memcpy(&store_wtmp, &store, sizeof(store_wtmp)); + /* wtmp wants the current time */ + init_timestamp(&store_wtmp, 0); + + return write_utmp_wtmp(&store, &store_wtmp); +} + +int utmp_put_runlevel(int runlevel, int previous) { + struct utmpx store = {}; + int r; + + assert(runlevel > 0); + + if (previous <= 0) { + /* Find the old runlevel automatically */ + + r = utmp_get_runlevel(&previous, NULL); + if (r < 0) { + if (r != -ESRCH) + return r; + + previous = 0; + } + } + + if (previous == runlevel) + return 0; + + init_entry(&store, 0); + + store.ut_type = RUN_LVL; + store.ut_pid = (runlevel & 0xFF) | ((previous & 0xFF) << 8); + strncpy(store.ut_user, "runlevel", sizeof(store.ut_user)); + + return write_entry_both(&store); +} + +#define TIMEOUT_USEC (50 * USEC_PER_MSEC) + +static int write_to_terminal(const char *tty, const char *message) { + _cleanup_close_ int fd = -1; + const char *p; + size_t left; + usec_t end; + + assert(tty); + assert(message); + + fd = open(tty, O_WRONLY|O_NONBLOCK|O_NOCTTY|O_CLOEXEC); + if (fd < 0 || !isatty(fd)) + return -errno; + + p = message; + left = strlen(message); + + end = now(CLOCK_MONOTONIC) + TIMEOUT_USEC; + + while (left > 0) { + ssize_t n; + usec_t t; + int k; + + t = now(CLOCK_MONOTONIC); + + if (t >= end) + return -ETIME; + + k = fd_wait_for_event(fd, POLLOUT, end - t); + if (k < 0) + return k; + if (k == 0) + return -ETIME; + + n = write(fd, p, left); + if (n < 0) { + if (errno == EAGAIN) + continue; + + return -errno; + } + + assert((size_t) n <= left); + + p += n; + left -= n; + } + + return 0; +} + +int utmp_wall( + const char *message, + const char *username, + const char *origin_tty, + bool (*match_tty)(const char *tty, void *userdata), + void *userdata) { + + _cleanup_free_ char *text = NULL, *hn = NULL, *un = NULL, *stdin_tty = NULL; + char date[FORMAT_TIMESTAMP_MAX]; + struct utmpx *u; + int r; + + hn = gethostname_malloc(); + if (!hn) + return -ENOMEM; + if (!username) { + un = getlogname_malloc(); + if (!un) + return -ENOMEM; + } + + if (!origin_tty) { + getttyname_harder(STDIN_FILENO, &stdin_tty); + origin_tty = stdin_tty; + } + + if (asprintf(&text, + "\a\r\n" + "Broadcast message from %s@%s%s%s (%s):\r\n\r\n" + "%s\r\n\r\n", + un ?: username, hn, + origin_tty ? " on " : "", strempty(origin_tty), + format_timestamp(date, sizeof(date), now(CLOCK_REALTIME)), + message) < 0) + return -ENOMEM; + + setutxent(); + + r = 0; + + while ((u = getutxent())) { + _cleanup_free_ char *buf = NULL; + const char *path; + int q; + + if (u->ut_type != USER_PROCESS || u->ut_user[0] == 0) + continue; + + /* this access is fine, because STRLEN("/dev/") << 32 (UT_LINESIZE) */ + if (path_startswith(u->ut_line, "/dev/")) + path = u->ut_line; + else { + if (asprintf(&buf, "/dev/%.*s", (int) sizeof(u->ut_line), u->ut_line) < 0) + return -ENOMEM; + + path = buf; + } + + if (!match_tty || match_tty(path, userdata)) { + q = write_to_terminal(path, text); + if (q < 0) + r = q; + } + } + + return r; +} diff --git a/src/shared/utmp-wtmp.h b/src/shared/utmp-wtmp.h new file mode 100644 index 0000000..3e71f76 --- /dev/null +++ b/src/shared/utmp-wtmp.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <sys/types.h> + +#include "time-util.h" +#include "util.h" + +#if ENABLE_UTMP +#include <utmpx.h> + +int utmp_get_runlevel(int *runlevel, int *previous); + +int utmp_put_shutdown(void); +int utmp_put_reboot(usec_t timestamp); +int utmp_put_runlevel(int runlevel, int previous); + +int utmp_put_dead_process(const char *id, pid_t pid, int code, int status); +int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user); + +int utmp_wall( + const char *message, + const char *username, + const char *origin_tty, + bool (*match_tty)(const char *tty, void *userdata), + void *userdata); + +static inline bool utxent_start(void) { + setutxent(); + return true; +} +static inline void utxent_cleanup(bool *initialized) { + if (initialized) + endutxent(); +} + +#else /* ENABLE_UTMP */ + +static inline int utmp_get_runlevel(int *runlevel, int *previous) { + return -ESRCH; +} +static inline int utmp_put_shutdown(void) { + return 0; +} +static inline int utmp_put_reboot(usec_t timestamp) { + return 0; +} +static inline int utmp_put_runlevel(int runlevel, int previous) { + return 0; +} +static inline int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) { + return 0; +} +static inline int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) { + return 0; +} +static inline int utmp_wall( + const char *message, + const char *username, + const char *origin_tty, + bool (*match_tty)(const char *tty, void *userdata), + void *userdata) { + return 0; +} + +#endif /* ENABLE_UTMP */ diff --git a/src/shared/varlink.c b/src/shared/varlink.c new file mode 100644 index 0000000..e7be33c --- /dev/null +++ b/src/shared/varlink.c @@ -0,0 +1,2502 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <sys/poll.h> + +#include "alloc-util.h" +#include "errno-util.h" +#include "fd-util.h" +#include "hashmap.h" +#include "io-util.h" +#include "list.h" +#include "process-util.h" +#include "selinux-util.h" +#include "set.h" +#include "socket-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "umask-util.h" +#include "user-util.h" +#include "varlink.h" + +#define VARLINK_DEFAULT_CONNECTIONS_MAX 4096U +#define VARLINK_DEFAULT_CONNECTIONS_PER_UID_MAX 1024U + +#define VARLINK_DEFAULT_TIMEOUT_USEC (45U*USEC_PER_SEC) +#define VARLINK_BUFFER_MAX (16U*1024U*1024U) +#define VARLINK_READ_SIZE (64U*1024U) + +typedef enum VarlinkState { + /* Client side states */ + VARLINK_IDLE_CLIENT, + VARLINK_AWAITING_REPLY, + VARLINK_AWAITING_REPLY_MORE, + VARLINK_CALLING, + VARLINK_CALLED, + VARLINK_PROCESSING_REPLY, + + /* Server side states */ + VARLINK_IDLE_SERVER, + VARLINK_PROCESSING_METHOD, + VARLINK_PROCESSING_METHOD_MORE, + VARLINK_PROCESSING_METHOD_ONEWAY, + VARLINK_PROCESSED_METHOD, + VARLINK_PENDING_METHOD, + VARLINK_PENDING_METHOD_MORE, + + /* Common states (only during shutdown) */ + VARLINK_PENDING_DISCONNECT, + VARLINK_PENDING_TIMEOUT, + VARLINK_PROCESSING_DISCONNECT, + VARLINK_PROCESSING_TIMEOUT, + VARLINK_PROCESSING_FAILURE, + VARLINK_DISCONNECTED, + + _VARLINK_STATE_MAX, + _VARLINK_STATE_INVALID = -1 +} VarlinkState; + +/* Tests whether we are not yet disconnected. Note that this is true during all states where the connection + * is still good for something, and false only when it's dead for good. This means: when we are + * asynchronously connecting to a peer and the connect() is still pending, then this will return 'true', as + * the connection is still good, and we are likely to be able to properly operate on it soon. */ +#define VARLINK_STATE_IS_ALIVE(state) \ + IN_SET(state, \ + VARLINK_IDLE_CLIENT, \ + VARLINK_AWAITING_REPLY, \ + VARLINK_AWAITING_REPLY_MORE, \ + VARLINK_CALLING, \ + VARLINK_CALLED, \ + VARLINK_PROCESSING_REPLY, \ + VARLINK_IDLE_SERVER, \ + VARLINK_PROCESSING_METHOD, \ + VARLINK_PROCESSING_METHOD_MORE, \ + VARLINK_PROCESSING_METHOD_ONEWAY, \ + VARLINK_PROCESSED_METHOD, \ + VARLINK_PENDING_METHOD, \ + VARLINK_PENDING_METHOD_MORE) + +struct Varlink { + unsigned n_ref; + + VarlinkServer *server; + + VarlinkState state; + bool connecting; /* This boolean indicates whether the socket fd we are operating on is currently + * processing an asynchronous connect(). In that state we watch the socket for + * EPOLLOUT, but we refrain from calling read() or write() on the socket as that + * will trigger ENOTCONN. Note that this boolean is kept separate from the + * VarlinkState above on purpose: while the connect() is still not complete we + * already want to allow queuing of messages and similar. Thus it's nice to keep + * these two state concepts separate: the VarlinkState encodes what our own view of + * the connection is, i.e. whether we think it's a server, a client, and has + * something queued already, while 'connecting' tells us a detail about the + * transport used below, that should have no effect on how we otherwise accept and + * process operations from the user. + * + * Or to say this differently: VARLINK_STATE_IS_ALIVE(state) tells you whether the + * connection is good to use, even if it might not be fully connected + * yet. connecting=true then informs you that actually we are still connecting, and + * the connection is actually not established yet and thus any requests you enqueue + * now will still work fine but will be queued only, not sent yet, but that + * shouldn't stop you from using the connection, since eventually whatever you queue + * *will* be sent. + * + * Or to say this even differently: 'state' is a high-level ("application layer" + * high, if you so will) state, while 'conecting' is a low-level ("transport layer" + * low, if you so will) state, and while they are not entirely unrelated and + * sometimes propagate effects to each other they are only asynchronously connected + * at most. */ + unsigned n_pending; + + int fd; + + char *input_buffer; /* valid data starts at input_buffer_index, ends at input_buffer_index+input_buffer_size */ + size_t input_buffer_allocated; + size_t input_buffer_index; + size_t input_buffer_size; + size_t input_buffer_unscanned; + + char *output_buffer; /* valid data starts at output_buffer_index, ends at output_buffer_index+output_buffer_size */ + size_t output_buffer_allocated; + size_t output_buffer_index; + size_t output_buffer_size; + + VarlinkReply reply_callback; + + JsonVariant *current; + JsonVariant *reply; + + struct ucred ucred; + bool ucred_acquired:1; + + bool write_disconnected:1; + bool read_disconnected:1; + bool prefer_read_write:1; + bool got_pollhup:1; + + usec_t timestamp; + usec_t timeout; + + void *userdata; + char *description; + + sd_event *event; + sd_event_source *io_event_source; + sd_event_source *time_event_source; + sd_event_source *quit_event_source; + sd_event_source *defer_event_source; +}; + +typedef struct VarlinkServerSocket VarlinkServerSocket; + +struct VarlinkServerSocket { + VarlinkServer *server; + + int fd; + char *address; + + sd_event_source *event_source; + + LIST_FIELDS(VarlinkServerSocket, sockets); +}; + +struct VarlinkServer { + unsigned n_ref; + VarlinkServerFlags flags; + + LIST_HEAD(VarlinkServerSocket, sockets); + + Hashmap *methods; + VarlinkConnect connect_callback; + VarlinkDisconnect disconnect_callback; + + sd_event *event; + int64_t event_priority; + + unsigned n_connections; + Hashmap *by_uid; + + void *userdata; + char *description; + + unsigned connections_max; + unsigned connections_per_uid_max; +}; + +static const char* const varlink_state_table[_VARLINK_STATE_MAX] = { + [VARLINK_IDLE_CLIENT] = "idle-client", + [VARLINK_AWAITING_REPLY] = "awaiting-reply", + [VARLINK_AWAITING_REPLY_MORE] = "awaiting-reply-more", + [VARLINK_CALLING] = "calling", + [VARLINK_CALLED] = "called", + [VARLINK_PROCESSING_REPLY] = "processing-reply", + [VARLINK_IDLE_SERVER] = "idle-server", + [VARLINK_PROCESSING_METHOD] = "processing-method", + [VARLINK_PROCESSING_METHOD_MORE] = "processing-method-more", + [VARLINK_PROCESSING_METHOD_ONEWAY] = "processing-method-oneway", + [VARLINK_PROCESSED_METHOD] = "processed-method", + [VARLINK_PENDING_METHOD] = "pending-method", + [VARLINK_PENDING_METHOD_MORE] = "pending-method-more", + [VARLINK_PENDING_DISCONNECT] = "pending-disconnect", + [VARLINK_PENDING_TIMEOUT] = "pending-timeout", + [VARLINK_PROCESSING_DISCONNECT] = "processing-disconnect", + [VARLINK_PROCESSING_TIMEOUT] = "processing-timeout", + [VARLINK_PROCESSING_FAILURE] = "processing-failure", + [VARLINK_DISCONNECTED] = "disconnected", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(varlink_state, VarlinkState); + +#define varlink_log_errno(v, error, fmt, ...) \ + log_debug_errno(error, "%s: " fmt, varlink_description(v), ##__VA_ARGS__) + +#define varlink_log(v, fmt, ...) \ + log_debug("%s: " fmt, varlink_description(v), ##__VA_ARGS__) + +#define varlink_server_log_errno(s, error, fmt, ...) \ + log_debug_errno(error, "%s: " fmt, varlink_server_description(s), ##__VA_ARGS__) + +#define varlink_server_log(s, fmt, ...) \ + log_debug("%s: " fmt, varlink_server_description(s), ##__VA_ARGS__) + +static inline const char *varlink_description(Varlink *v) { + return strna(v ? v->description : NULL); +} + +static inline const char *varlink_server_description(VarlinkServer *s) { + return strna(s ? s->description : NULL); +} + +static void varlink_set_state(Varlink *v, VarlinkState state) { + assert(v); + assert(state >= 0 && state < _VARLINK_STATE_MAX); + + if (v->state < 0) + varlink_log(v, "varlink: setting state %s", + varlink_state_to_string(state)); + else + varlink_log(v, "varlink: changing state %s → %s", + varlink_state_to_string(v->state), + varlink_state_to_string(state)); + + v->state = state; +} + +static int varlink_new(Varlink **ret) { + Varlink *v; + + assert(ret); + + v = new(Varlink, 1); + if (!v) + return -ENOMEM; + + *v = (Varlink) { + .n_ref = 1, + .fd = -1, + + .state = _VARLINK_STATE_INVALID, + + .ucred.uid = UID_INVALID, + .ucred.gid = GID_INVALID, + + .timestamp = USEC_INFINITY, + .timeout = VARLINK_DEFAULT_TIMEOUT_USEC + }; + + *ret = v; + return 0; +} + +int varlink_connect_address(Varlink **ret, const char *address) { + _cleanup_(varlink_unrefp) Varlink *v = NULL; + union sockaddr_union sockaddr; + socklen_t sockaddr_len; + int r; + + assert_return(ret, -EINVAL); + assert_return(address, -EINVAL); + + r = sockaddr_un_set_path(&sockaddr.un, address); + if (r < 0) + return r; + sockaddr_len = r; + + r = varlink_new(&v); + if (r < 0) + return r; + + v->fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (v->fd < 0) + return -errno; + + v->fd = fd_move_above_stdio(v->fd); + + if (connect(v->fd, &sockaddr.sa, sockaddr_len) < 0) { + if (!IN_SET(errno, EAGAIN, EINPROGRESS)) + return -errno; + + v->connecting = true; /* We are asynchronously connecting, i.e. the connect() is being + * processed in the background. As long as that's the case the socket + * is in a special state: it's there, we can poll it for EPOLLOUT, but + * if we attempt to write() to it before we see EPOLLOUT we'll get + * ENOTCONN (and not EAGAIN, like we would for a normal connected + * socket that isn't writable at the moment). Since ENOTCONN on write() + * hence can mean two different things (i.e. connection not complete + * yet vs. already disconnected again), we store as a boolean whether + * we are still in connect(). */ + } + + varlink_set_state(v, VARLINK_IDLE_CLIENT); + + *ret = TAKE_PTR(v); + return r; +} + +int varlink_connect_fd(Varlink **ret, int fd) { + Varlink *v; + int r; + + assert_return(ret, -EINVAL); + assert_return(fd >= 0, -EBADF); + + r = fd_nonblock(fd, true); + if (r < 0) + return r; + + r = varlink_new(&v); + if (r < 0) + return r; + + v->fd = fd; + varlink_set_state(v, VARLINK_IDLE_CLIENT); + + /* Note that if this function is called we assume the passed socket (if it is one) is already + * properly connected, i.e. any asynchronous connect() done on it already completed. Because of that + * we'll not set the 'connecting' boolean here, i.e. we don't need to avoid write()ing to the socket + * until the connection is fully set up. Behaviour here is hence a bit different from + * varlink_connect_address() above, as there we do handle asynchronous connections ourselves and + * avoid doing write() on it before we saw EPOLLOUT for the first time. */ + + *ret = v; + return 0; +} + +static void varlink_detach_event_sources(Varlink *v) { + assert(v); + + v->io_event_source = sd_event_source_disable_unref(v->io_event_source); + v->time_event_source = sd_event_source_disable_unref(v->time_event_source); + v->quit_event_source = sd_event_source_disable_unref(v->quit_event_source); + v->defer_event_source = sd_event_source_disable_unref(v->defer_event_source); +} + +static void varlink_clear(Varlink *v) { + assert(v); + + varlink_detach_event_sources(v); + + v->fd = safe_close(v->fd); + + v->input_buffer = mfree(v->input_buffer); + v->output_buffer = mfree(v->output_buffer); + + v->current = json_variant_unref(v->current); + v->reply = json_variant_unref(v->reply); + + v->event = sd_event_unref(v->event); +} + +static Varlink* varlink_destroy(Varlink *v) { + if (!v) + return NULL; + + /* If this is called the server object must already been unreffed here. Why that? because when we + * linked up the varlink connection with the server object we took one ref in each direction */ + assert(!v->server); + + varlink_clear(v); + + free(v->description); + return mfree(v); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(Varlink, varlink, varlink_destroy); + +static int varlink_test_disconnect(Varlink *v) { + assert(v); + + /* Tests whether we the connection has been terminated. We are careful to not stop processing it + * prematurely, since we want to handle half-open connections as well as possible and want to flush + * out and read data before we close down if we can. */ + + /* Already disconnected? */ + if (!VARLINK_STATE_IS_ALIVE(v->state)) + return 0; + + /* Wait until connection setup is complete, i.e. until asynchronous connect() completes */ + if (v->connecting) + return 0; + + /* Still something to write and we can write? Stay around */ + if (v->output_buffer_size > 0 && !v->write_disconnected) + return 0; + + /* Both sides gone already? Then there's no need to stick around */ + if (v->read_disconnected && v->write_disconnected) + goto disconnect; + + /* If we are waiting for incoming data but the read side is shut down, disconnect. */ + if (IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING, VARLINK_IDLE_SERVER) && v->read_disconnected) + goto disconnect; + + /* Similar, if are a client that hasn't written anything yet but the write side is dead, also + * disconnect. We also explicitly check for POLLHUP here since we likely won't notice the write side + * being down if we never wrote anything. */ + if (IN_SET(v->state, VARLINK_IDLE_CLIENT) && (v->write_disconnected || v->got_pollhup)) + goto disconnect; + + /* The server is still expecting to write more, but its write end is disconnected and it got a POLLHUP + * (i.e. from a disconnected client), so disconnect. */ + if (IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE) && v->write_disconnected && v->got_pollhup) + goto disconnect; + + return 0; + +disconnect: + varlink_set_state(v, VARLINK_PENDING_DISCONNECT); + return 1; +} + +static int varlink_write(Varlink *v) { + ssize_t n; + + assert(v); + + if (!VARLINK_STATE_IS_ALIVE(v->state)) + return 0; + if (v->connecting) /* Writing while we are still wait for a non-blocking connect() to complete will + * result in ENOTCONN, hence exit early here */ + return 0; + if (v->output_buffer_size == 0) + return 0; + if (v->write_disconnected) + return 0; + + assert(v->fd >= 0); + + /* We generally prefer recv()/send() (mostly because of MSG_NOSIGNAL) but also want to be compatible + * with non-socket IO, hence fall back automatically */ + if (!v->prefer_read_write) { + n = send(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size, MSG_DONTWAIT|MSG_NOSIGNAL); + if (n < 0 && errno == ENOTSOCK) + v->prefer_read_write = true; + } + if (v->prefer_read_write) + n = write(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size); + if (n < 0) { + if (errno == EAGAIN) + return 0; + + if (ERRNO_IS_DISCONNECT(errno)) { + /* If we get informed about a disconnect on write, then let's remember that, but not + * act on it just yet. Let's wait for read() to report the issue first. */ + v->write_disconnected = true; + return 1; + } + + return -errno; + } + + v->output_buffer_size -= n; + + if (v->output_buffer_size == 0) + v->output_buffer_index = 0; + else + v->output_buffer_index += n; + + v->timestamp = now(CLOCK_MONOTONIC); + return 1; +} + +static int varlink_read(Varlink *v) { + size_t rs; + ssize_t n; + + assert(v); + + if (!IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING, VARLINK_IDLE_SERVER)) + return 0; + if (v->connecting) /* read() on a socket while we are in connect() will fail with EINVAL, hence exit early here */ + return 0; + if (v->current) + return 0; + if (v->input_buffer_unscanned > 0) + return 0; + if (v->read_disconnected) + return 0; + + if (v->input_buffer_size >= VARLINK_BUFFER_MAX) + return -ENOBUFS; + + assert(v->fd >= 0); + + if (v->input_buffer_allocated <= v->input_buffer_index + v->input_buffer_size) { + size_t add; + + add = MIN(VARLINK_BUFFER_MAX - v->input_buffer_size, VARLINK_READ_SIZE); + + if (v->input_buffer_index == 0) { + + if (!GREEDY_REALLOC(v->input_buffer, v->input_buffer_allocated, v->input_buffer_size + add)) + return -ENOMEM; + + } else { + char *b; + + b = new(char, v->input_buffer_size + add); + if (!b) + return -ENOMEM; + + memcpy(b, v->input_buffer + v->input_buffer_index, v->input_buffer_size); + + free_and_replace(v->input_buffer, b); + + v->input_buffer_allocated = v->input_buffer_size + add; + v->input_buffer_index = 0; + } + } + + rs = v->input_buffer_allocated - (v->input_buffer_index + v->input_buffer_size); + + if (!v->prefer_read_write) { + n = recv(v->fd, v->input_buffer + v->input_buffer_index + v->input_buffer_size, rs, MSG_DONTWAIT); + if (n < 0 && errno == ENOTSOCK) + v->prefer_read_write = true; + } + if (v->prefer_read_write) + n = read(v->fd, v->input_buffer + v->input_buffer_index + v->input_buffer_size, rs); + if (n < 0) { + if (errno == EAGAIN) + return 0; + + if (ERRNO_IS_DISCONNECT(errno)) { + v->read_disconnected = true; + return 1; + } + + return -errno; + } + if (n == 0) { /* EOF */ + v->read_disconnected = true; + return 1; + } + + v->input_buffer_size += n; + v->input_buffer_unscanned += n; + + return 1; +} + +static int varlink_parse_message(Varlink *v) { + const char *e, *begin; + size_t sz; + int r; + + assert(v); + + if (v->current) + return 0; + if (v->input_buffer_unscanned <= 0) + return 0; + + assert(v->input_buffer_unscanned <= v->input_buffer_size); + assert(v->input_buffer_index + v->input_buffer_size <= v->input_buffer_allocated); + + begin = v->input_buffer + v->input_buffer_index; + + e = memchr(begin + v->input_buffer_size - v->input_buffer_unscanned, 0, v->input_buffer_unscanned); + if (!e) { + v->input_buffer_unscanned = 0; + return 0; + } + + sz = e - begin + 1; + + varlink_log(v, "New incoming message: %s", begin); /* FIXME: should we output the whole message here before validation? + * This may produce a non-printable journal entry if the message + * is invalid. We may also expose privileged information. */ + + r = json_parse(begin, 0, &v->current, NULL, NULL); + if (r < 0) { + /* If we encounter a parse failure flush all data. We cannot possibly recover from this, + * hence drop all buffered data now. */ + v->input_buffer_index = v->input_buffer_size = v->input_buffer_unscanned = 0; + return varlink_log_errno(v, r, "Failed to parse JSON: %m"); + } + + v->input_buffer_size -= sz; + + if (v->input_buffer_size == 0) + v->input_buffer_index = 0; + else + v->input_buffer_index += sz; + + v->input_buffer_unscanned = v->input_buffer_size; + return 1; +} + +static int varlink_test_timeout(Varlink *v) { + assert(v); + + if (!IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING)) + return 0; + if (v->timeout == USEC_INFINITY) + return 0; + + if (now(CLOCK_MONOTONIC) < usec_add(v->timestamp, v->timeout)) + return 0; + + varlink_set_state(v, VARLINK_PENDING_TIMEOUT); + + return 1; +} + +static int varlink_dispatch_local_error(Varlink *v, const char *error) { + int r; + + assert(v); + assert(error); + + if (!v->reply_callback) + return 0; + + r = v->reply_callback(v, NULL, error, VARLINK_REPLY_ERROR|VARLINK_REPLY_LOCAL, v->userdata); + if (r < 0) + log_debug_errno(r, "Reply callback returned error, ignoring: %m"); + + return 1; +} + +static int varlink_dispatch_timeout(Varlink *v) { + assert(v); + + if (v->state != VARLINK_PENDING_TIMEOUT) + return 0; + + varlink_set_state(v, VARLINK_PROCESSING_TIMEOUT); + varlink_dispatch_local_error(v, VARLINK_ERROR_TIMEOUT); + varlink_close(v); + + return 1; +} + +static int varlink_dispatch_disconnect(Varlink *v) { + assert(v); + + if (v->state != VARLINK_PENDING_DISCONNECT) + return 0; + + varlink_set_state(v, VARLINK_PROCESSING_DISCONNECT); + varlink_dispatch_local_error(v, VARLINK_ERROR_DISCONNECTED); + varlink_close(v); + + return 1; +} + +static int varlink_sanitize_parameters(JsonVariant **v) { + assert(v); + + /* Varlink always wants a parameters list, hence make one if the caller doesn't want any */ + if (!*v) + return json_variant_new_object(v, NULL, 0); + else if (!json_variant_is_object(*v)) + return -EINVAL; + + return 0; +} + +static int varlink_dispatch_reply(Varlink *v) { + _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL; + VarlinkReplyFlags flags = 0; + const char *error = NULL; + JsonVariant *e; + const char *k; + int r; + + assert(v); + + if (!IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING)) + return 0; + if (!v->current) + return 0; + + assert(v->n_pending > 0); + + if (!json_variant_is_object(v->current)) + goto invalid; + + JSON_VARIANT_OBJECT_FOREACH(k, e, v->current) { + + if (streq(k, "error")) { + if (error) + goto invalid; + if (!json_variant_is_string(e)) + goto invalid; + + error = json_variant_string(e); + flags |= VARLINK_REPLY_ERROR; + + } else if (streq(k, "parameters")) { + if (parameters) + goto invalid; + if (!json_variant_is_object(e)) + goto invalid; + + parameters = json_variant_ref(e); + + } else if (streq(k, "continues")) { + if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES)) + goto invalid; + + if (!json_variant_is_boolean(e)) + goto invalid; + + if (json_variant_boolean(e)) + flags |= VARLINK_REPLY_CONTINUES; + } else + goto invalid; + } + + /* Replies with 'continue' set are only OK if we set 'more' when the method call was initiated */ + if (v->state != VARLINK_AWAITING_REPLY_MORE && FLAGS_SET(flags, VARLINK_REPLY_CONTINUES)) + goto invalid; + + /* An error is final */ + if (error && FLAGS_SET(flags, VARLINK_REPLY_CONTINUES)) + goto invalid; + + r = varlink_sanitize_parameters(¶meters); + if (r < 0) + goto invalid; + + if (IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE)) { + varlink_set_state(v, VARLINK_PROCESSING_REPLY); + + if (v->reply_callback) { + r = v->reply_callback(v, parameters, error, flags, v->userdata); + if (r < 0) + log_debug_errno(r, "Reply callback returned error, ignoring: %m"); + } + + v->current = json_variant_unref(v->current); + + if (v->state == VARLINK_PROCESSING_REPLY) { + + assert(v->n_pending > 0); + + if (!FLAGS_SET(flags, VARLINK_REPLY_CONTINUES)) + v->n_pending--; + + varlink_set_state(v, + FLAGS_SET(flags, VARLINK_REPLY_CONTINUES) ? VARLINK_AWAITING_REPLY_MORE : + v->n_pending == 0 ? VARLINK_IDLE_CLIENT : VARLINK_AWAITING_REPLY); + } + } else { + assert(v->state == VARLINK_CALLING); + varlink_set_state(v, VARLINK_CALLED); + } + + return 1; + +invalid: + varlink_set_state(v, VARLINK_PROCESSING_FAILURE); + varlink_dispatch_local_error(v, VARLINK_ERROR_PROTOCOL); + varlink_close(v); + + return 1; +} + +static int varlink_dispatch_method(Varlink *v) { + _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL; + VarlinkMethodFlags flags = 0; + const char *method = NULL, *error; + JsonVariant *e; + VarlinkMethod callback; + const char *k; + int r; + + assert(v); + + if (v->state != VARLINK_IDLE_SERVER) + return 0; + if (!v->current) + return 0; + + if (!json_variant_is_object(v->current)) + goto invalid; + + JSON_VARIANT_OBJECT_FOREACH(k, e, v->current) { + + if (streq(k, "method")) { + if (method) + goto invalid; + if (!json_variant_is_string(e)) + goto invalid; + + method = json_variant_string(e); + + } else if (streq(k, "parameters")) { + if (parameters) + goto invalid; + if (!json_variant_is_object(e)) + goto invalid; + + parameters = json_variant_ref(e); + + } else if (streq(k, "oneway")) { + + if ((flags & (VARLINK_METHOD_ONEWAY|VARLINK_METHOD_MORE)) != 0) + goto invalid; + + if (!json_variant_is_boolean(e)) + goto invalid; + + if (json_variant_boolean(e)) + flags |= VARLINK_METHOD_ONEWAY; + + } else if (streq(k, "more")) { + + if ((flags & (VARLINK_METHOD_ONEWAY|VARLINK_METHOD_MORE)) != 0) + goto invalid; + + if (!json_variant_is_boolean(e)) + goto invalid; + + if (json_variant_boolean(e)) + flags |= VARLINK_METHOD_MORE; + + } else + goto invalid; + } + + if (!method) + goto invalid; + + r = varlink_sanitize_parameters(¶meters); + if (r < 0) + goto fail; + + varlink_set_state(v, (flags & VARLINK_METHOD_MORE) ? VARLINK_PROCESSING_METHOD_MORE : + (flags & VARLINK_METHOD_ONEWAY) ? VARLINK_PROCESSING_METHOD_ONEWAY : + VARLINK_PROCESSING_METHOD); + + assert(v->server); + + if (STR_IN_SET(method, "org.varlink.service.GetInfo", "org.varlink.service.GetInterface")) { + /* For now, we don't implement a single of varlink's own methods */ + callback = NULL; + error = VARLINK_ERROR_METHOD_NOT_IMPLEMENTED; + } else if (startswith(method, "org.varlink.service.")) { + callback = NULL; + error = VARLINK_ERROR_METHOD_NOT_FOUND; + } else { + callback = hashmap_get(v->server->methods, method); + error = VARLINK_ERROR_METHOD_NOT_FOUND; + } + + if (callback) { + r = callback(v, parameters, flags, v->userdata); + if (r < 0) { + log_debug_errno(r, "Callback for %s returned error: %m", method); + + /* We got an error back from the callback. Propagate it to the client if the method call remains unanswered. */ + if (!FLAGS_SET(flags, VARLINK_METHOD_ONEWAY)) { + r = varlink_error_errno(v, r); + if (r < 0) + return r; + } + } + } else if (!FLAGS_SET(flags, VARLINK_METHOD_ONEWAY)) { + assert(error); + + r = varlink_errorb(v, error, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)))); + if (r < 0) + return r; + } + + switch (v->state) { + + case VARLINK_PROCESSED_METHOD: /* Method call is fully processed */ + case VARLINK_PROCESSING_METHOD_ONEWAY: /* ditto */ + v->current = json_variant_unref(v->current); + varlink_set_state(v, VARLINK_IDLE_SERVER); + break; + + case VARLINK_PROCESSING_METHOD: /* Method call wasn't replied to, will be replied to later */ + varlink_set_state(v, VARLINK_PENDING_METHOD); + break; + + case VARLINK_PROCESSING_METHOD_MORE: /* No reply for a "more" message was sent, more to come */ + varlink_set_state(v, VARLINK_PENDING_METHOD_MORE); + break; + + default: + assert_not_reached("Unexpected state"); + + } + + return r; + +invalid: + r = -EINVAL; + +fail: + varlink_set_state(v, VARLINK_PROCESSING_FAILURE); + varlink_dispatch_local_error(v, VARLINK_ERROR_PROTOCOL); + varlink_close(v); + + return r; +} + +int varlink_process(Varlink *v) { + int r; + + assert_return(v, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + + varlink_ref(v); + + r = varlink_write(v); + if (r != 0) + goto finish; + + r = varlink_dispatch_reply(v); + if (r != 0) + goto finish; + + r = varlink_dispatch_method(v); + if (r != 0) + goto finish; + + r = varlink_parse_message(v); + if (r != 0) + goto finish; + + r = varlink_read(v); + if (r != 0) + goto finish; + + r = varlink_test_disconnect(v); + if (r != 0) + goto finish; + + r = varlink_dispatch_disconnect(v); + if (r != 0) + goto finish; + + r = varlink_test_timeout(v); + if (r != 0) + goto finish; + + r = varlink_dispatch_timeout(v); + if (r != 0) + goto finish; + +finish: + if (r >= 0 && v->defer_event_source) { + int q; + + /* If we did some processing, make sure we are called again soon */ + q = sd_event_source_set_enabled(v->defer_event_source, r > 0 ? SD_EVENT_ON : SD_EVENT_OFF); + if (q < 0) + r = q; + } + + if (r < 0) { + if (VARLINK_STATE_IS_ALIVE(v->state)) + /* Initiate disconnection */ + varlink_set_state(v, VARLINK_PENDING_DISCONNECT); + else + /* We failed while disconnecting, in that case close right away */ + varlink_close(v); + } + + varlink_unref(v); + return r; +} + +static void handle_revents(Varlink *v, int revents) { + assert(v); + + if (v->connecting) { + /* If we have seen POLLOUT or POLLHUP on a socket we are asynchronously waiting a connect() + * to complete on, we know we are ready. We don't read the connection error here though, + * we'll get the error on the next read() or write(). */ + if ((revents & (POLLOUT|POLLHUP)) == 0) + return; + + varlink_log(v, "Anynchronous connection completed."); + v->connecting = false; + } else { + /* Note that we don't care much about POLLIN/POLLOUT here, we'll just try reading and writing + * what we can. However, we do care about POLLHUP to detect connection termination even if we + * momentarily don't want to read nor write anything. */ + + if (!FLAGS_SET(revents, POLLHUP)) + return; + + varlink_log(v, "Got POLLHUP from socket."); + v->got_pollhup = true; + } +} + +int varlink_wait(Varlink *v, usec_t timeout) { + int r, fd, events; + usec_t t; + + assert_return(v, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + + r = varlink_get_timeout(v, &t); + if (r < 0) + return r; + if (t != USEC_INFINITY) { + usec_t n; + + n = now(CLOCK_MONOTONIC); + if (t < n) + t = 0; + else + t = usec_sub_unsigned(t, n); + } + + if (timeout != USEC_INFINITY && + (t == USEC_INFINITY || timeout < t)) + t = timeout; + + fd = varlink_get_fd(v); + if (fd < 0) + return fd; + + events = varlink_get_events(v); + if (events < 0) + return events; + + r = fd_wait_for_event(fd, events, t); + if (r <= 0) + return r; + + handle_revents(v, r); + return 1; +} + +int varlink_get_fd(Varlink *v) { + + assert_return(v, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + if (v->fd < 0) + return -EBADF; + + return v->fd; +} + +int varlink_get_events(Varlink *v) { + int ret = 0; + + assert_return(v, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + + if (v->connecting) /* When processing an asynchronous connect(), we only wait for EPOLLOUT, which + * tells us that the connection is now complete. Before that we should neither + * write() or read() from the fd. */ + return EPOLLOUT; + + if (!v->read_disconnected && + IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING, VARLINK_IDLE_SERVER) && + !v->current && + v->input_buffer_unscanned <= 0) + ret |= EPOLLIN; + + if (!v->write_disconnected && + v->output_buffer_size > 0) + ret |= EPOLLOUT; + + return ret; +} + +int varlink_get_timeout(Varlink *v, usec_t *ret) { + assert_return(v, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + + if (IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING) && + v->timeout != USEC_INFINITY) { + if (ret) + *ret = usec_add(v->timestamp, v->timeout); + return 1; + } else { + if (ret) + *ret = USEC_INFINITY; + return 0; + } +} + +int varlink_flush(Varlink *v) { + int ret = 0, r; + + assert_return(v, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + + for (;;) { + if (v->output_buffer_size == 0) + break; + if (v->write_disconnected) + return -ECONNRESET; + + r = varlink_write(v); + if (r < 0) + return r; + if (r > 0) { + ret = 1; + continue; + } + + r = fd_wait_for_event(v->fd, POLLOUT, USEC_INFINITY); + if (r < 0) + return r; + + assert(r != 0); + + handle_revents(v, r); + } + + return ret; +} + +static void varlink_detach_server(Varlink *v) { + VarlinkServer *saved_server; + assert(v); + + if (!v->server) + return; + + if (v->server->by_uid && + v->ucred_acquired && + uid_is_valid(v->ucred.uid)) { + unsigned c; + + c = PTR_TO_UINT(hashmap_get(v->server->by_uid, UID_TO_PTR(v->ucred.uid))); + assert(c > 0); + + if (c == 1) + (void) hashmap_remove(v->server->by_uid, UID_TO_PTR(v->ucred.uid)); + else + (void) hashmap_replace(v->server->by_uid, UID_TO_PTR(v->ucred.uid), UINT_TO_PTR(c - 1)); + } + + assert(v->server->n_connections > 0); + v->server->n_connections--; + + /* If this is a connection associated to a server, then let's disconnect the server and the + * connection from each other. This drops the dangling reference that connect_callback() set up. But + * before we release the references, let's call the disconnection callback if it is defined. */ + + saved_server = TAKE_PTR(v->server); + + if (saved_server->disconnect_callback) + saved_server->disconnect_callback(saved_server, v, saved_server->userdata); + + varlink_server_unref(saved_server); + varlink_unref(v); +} + +int varlink_close(Varlink *v) { + + assert_return(v, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return 0; + + varlink_set_state(v, VARLINK_DISCONNECTED); + + /* Let's take a reference first, since varlink_detach_server() might drop the final (dangling) ref + * which would destroy us before we can call varlink_clear() */ + varlink_ref(v); + varlink_detach_server(v); + varlink_clear(v); + varlink_unref(v); + + return 1; +} + +Varlink* varlink_close_unref(Varlink *v) { + + if (!v) + return NULL; + + (void) varlink_close(v); + return varlink_unref(v); +} + +Varlink* varlink_flush_close_unref(Varlink *v) { + + if (!v) + return NULL; + + (void) varlink_flush(v); + (void) varlink_close(v); + return varlink_unref(v); +} + +static int varlink_enqueue_json(Varlink *v, JsonVariant *m) { + _cleanup_free_ char *text = NULL; + int r; + + assert(v); + assert(m); + + r = json_variant_format(m, 0, &text); + if (r < 0) + return r; + assert(text[r] == '\0'); + + if (v->output_buffer_size + r + 1 > VARLINK_BUFFER_MAX) + return -ENOBUFS; + + varlink_log(v, "Sending message: %s", text); + + if (v->output_buffer_size == 0) { + + free_and_replace(v->output_buffer, text); + + v->output_buffer_size = v->output_buffer_allocated = r + 1; + v->output_buffer_index = 0; + + } else if (v->output_buffer_index == 0) { + + if (!GREEDY_REALLOC(v->output_buffer, v->output_buffer_allocated, v->output_buffer_size + r + 1)) + return -ENOMEM; + + memcpy(v->output_buffer + v->output_buffer_size, text, r + 1); + v->output_buffer_size += r + 1; + + } else { + char *n; + const size_t new_size = v->output_buffer_size + r + 1; + + n = new(char, new_size); + if (!n) + return -ENOMEM; + + memcpy(mempcpy(n, v->output_buffer + v->output_buffer_index, v->output_buffer_size), text, r + 1); + + free_and_replace(v->output_buffer, n); + v->output_buffer_allocated = v->output_buffer_size = new_size; + v->output_buffer_index = 0; + } + + return 0; +} + +int varlink_send(Varlink *v, const char *method, JsonVariant *parameters) { + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + assert_return(v, -EINVAL); + assert_return(method, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + + /* We allow enqueuing multiple method calls at once! */ + if (!IN_SET(v->state, VARLINK_IDLE_CLIENT, VARLINK_AWAITING_REPLY)) + return -EBUSY; + + r = varlink_sanitize_parameters(¶meters); + if (r < 0) + return r; + + r = json_build(&m, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)), + JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)), + JSON_BUILD_PAIR("oneway", JSON_BUILD_BOOLEAN(true)))); + if (r < 0) + return r; + + r = varlink_enqueue_json(v, m); + if (r < 0) + return r; + + /* No state change here, this is one-way only after all */ + v->timestamp = now(CLOCK_MONOTONIC); + return 0; +} + +int varlink_sendb(Varlink *v, const char *method, ...) { + _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL; + va_list ap; + int r; + + assert_return(v, -EINVAL); + + va_start(ap, method); + r = json_buildv(¶meters, ap); + va_end(ap); + + if (r < 0) + return r; + + return varlink_send(v, method, parameters); +} + +int varlink_invoke(Varlink *v, const char *method, JsonVariant *parameters) { + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + assert_return(v, -EINVAL); + assert_return(method, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + + /* We allow enqueuing multiple method calls at once! */ + if (!IN_SET(v->state, VARLINK_IDLE_CLIENT, VARLINK_AWAITING_REPLY)) + return -EBUSY; + + r = varlink_sanitize_parameters(¶meters); + if (r < 0) + return r; + + r = json_build(&m, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)), + JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)))); + if (r < 0) + return r; + + r = varlink_enqueue_json(v, m); + if (r < 0) + return r; + + varlink_set_state(v, VARLINK_AWAITING_REPLY); + v->n_pending++; + v->timestamp = now(CLOCK_MONOTONIC); + + return 0; +} + +int varlink_invokeb(Varlink *v, const char *method, ...) { + _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL; + va_list ap; + int r; + + assert_return(v, -EINVAL); + + va_start(ap, method); + r = json_buildv(¶meters, ap); + va_end(ap); + + if (r < 0) + return r; + + return varlink_invoke(v, method, parameters); +} + +int varlink_observe(Varlink *v, const char *method, JsonVariant *parameters) { + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + assert_return(v, -EINVAL); + assert_return(method, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + /* Note that we don't allow enqueuing multiple method calls when we are in more/continues mode! We + * thus insist on an idle client here. */ + if (v->state != VARLINK_IDLE_CLIENT) + return -EBUSY; + + r = varlink_sanitize_parameters(¶meters); + if (r < 0) + return r; + + r = json_build(&m, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)), + JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)), + JSON_BUILD_PAIR("more", JSON_BUILD_BOOLEAN(true)))); + if (r < 0) + return r; + + r = varlink_enqueue_json(v, m); + if (r < 0) + return r; + + + varlink_set_state(v, VARLINK_AWAITING_REPLY_MORE); + v->n_pending++; + v->timestamp = now(CLOCK_MONOTONIC); + + return 0; +} + +int varlink_observeb(Varlink *v, const char *method, ...) { + _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL; + va_list ap; + int r; + + assert_return(v, -EINVAL); + + va_start(ap, method); + r = json_buildv(¶meters, ap); + va_end(ap); + + if (r < 0) + return r; + + return varlink_observe(v, method, parameters); +} + +int varlink_call( + Varlink *v, + const char *method, + JsonVariant *parameters, + JsonVariant **ret_parameters, + const char **ret_error_id, + VarlinkReplyFlags *ret_flags) { + + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + assert_return(v, -EINVAL); + assert_return(method, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + if (!IN_SET(v->state, VARLINK_IDLE_CLIENT)) + return -EBUSY; + + assert(v->n_pending == 0); /* n_pending can't be > 0 if we are in VARLINK_IDLE_CLIENT state */ + + r = varlink_sanitize_parameters(¶meters); + if (r < 0) + return r; + + r = json_build(&m, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)), + JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)))); + if (r < 0) + return r; + + r = varlink_enqueue_json(v, m); + if (r < 0) + return r; + + varlink_set_state(v, VARLINK_CALLING); + v->n_pending++; + v->timestamp = now(CLOCK_MONOTONIC); + + while (v->state == VARLINK_CALLING) { + + r = varlink_process(v); + if (r < 0) + return r; + if (r > 0) + continue; + + r = varlink_wait(v, USEC_INFINITY); + if (r < 0) + return r; + } + + switch (v->state) { + + case VARLINK_CALLED: + assert(v->current); + + json_variant_unref(v->reply); + v->reply = TAKE_PTR(v->current); + + varlink_set_state(v, VARLINK_IDLE_CLIENT); + assert(v->n_pending == 1); + v->n_pending--; + + if (ret_parameters) + *ret_parameters = json_variant_by_key(v->reply, "parameters"); + if (ret_error_id) + *ret_error_id = json_variant_string(json_variant_by_key(v->reply, "error")); + if (ret_flags) + *ret_flags = 0; + + return 1; + + case VARLINK_PENDING_DISCONNECT: + case VARLINK_DISCONNECTED: + return -ECONNRESET; + + case VARLINK_PENDING_TIMEOUT: + return -ETIME; + + default: + assert_not_reached("Unexpected state after method call."); + } +} + +int varlink_callb( + Varlink *v, + const char *method, + JsonVariant **ret_parameters, + const char **ret_error_id, + VarlinkReplyFlags *ret_flags, ...) { + + _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL; + va_list ap; + int r; + + assert_return(v, -EINVAL); + + va_start(ap, ret_flags); + r = json_buildv(¶meters, ap); + va_end(ap); + + if (r < 0) + return r; + + return varlink_call(v, method, parameters, ret_parameters, ret_error_id, ret_flags); +} + +int varlink_reply(Varlink *v, JsonVariant *parameters) { + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + assert_return(v, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + if (!IN_SET(v->state, + VARLINK_PROCESSING_METHOD, VARLINK_PROCESSING_METHOD_MORE, + VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) + return -EBUSY; + + r = varlink_sanitize_parameters(¶meters); + if (r < 0) + return r; + + r = json_build(&m, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)))); + if (r < 0) + return r; + + r = varlink_enqueue_json(v, m); + if (r < 0) + return r; + + if (IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) { + /* We just replied to a method call that was let hanging for a while (i.e. we were outside of + * the varlink_dispatch_method() stack frame), which means with this reply we are ready to + * process further messages. */ + v->current = json_variant_unref(v->current); + varlink_set_state(v, VARLINK_IDLE_SERVER); + } else + /* We replied to a method call from within the varlink_dispatch_method() stack frame), which + * means we should it handle the rest of the state engine. */ + varlink_set_state(v, VARLINK_PROCESSED_METHOD); + + return 1; +} + +int varlink_replyb(Varlink *v, ...) { + _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL; + va_list ap; + int r; + + assert_return(v, -EINVAL); + + va_start(ap, v); + r = json_buildv(¶meters, ap); + va_end(ap); + + if (r < 0) + return r; + + return varlink_reply(v, parameters); +} + +int varlink_error(Varlink *v, const char *error_id, JsonVariant *parameters) { + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + assert_return(v, -EINVAL); + assert_return(error_id, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + if (!IN_SET(v->state, + VARLINK_PROCESSING_METHOD, VARLINK_PROCESSING_METHOD_MORE, + VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) + return -EBUSY; + + r = varlink_sanitize_parameters(¶meters); + if (r < 0) + return r; + + r = json_build(&m, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("error", JSON_BUILD_STRING(error_id)), + JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)))); + if (r < 0) + return r; + + r = varlink_enqueue_json(v, m); + if (r < 0) + return r; + + if (IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) { + v->current = json_variant_unref(v->current); + varlink_set_state(v, VARLINK_IDLE_SERVER); + } else + varlink_set_state(v, VARLINK_PROCESSED_METHOD); + + return 1; +} + +int varlink_errorb(Varlink *v, const char *error_id, ...) { + _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL; + va_list ap; + int r; + + assert_return(v, -EINVAL); + assert_return(error_id, -EINVAL); + + va_start(ap, error_id); + r = json_buildv(¶meters, ap); + va_end(ap); + + if (r < 0) + return r; + + return varlink_error(v, error_id, parameters); +} + +int varlink_error_invalid_parameter(Varlink *v, JsonVariant *parameters) { + + assert_return(v, -EINVAL); + assert_return(parameters, -EINVAL); + + /* We expect to be called in one of two ways: the 'parameters' argument is a string variant in which + * case it is the parameter key name that is invalid. Or the 'parameters' argument is an object + * variant in which case we'll pull out the first key. The latter mode is useful in functions that + * don't expect any arguments. */ + + if (json_variant_is_string(parameters)) + return varlink_error(v, VARLINK_ERROR_INVALID_PARAMETER, parameters); + + if (json_variant_is_object(parameters) && + json_variant_elements(parameters) > 0) + return varlink_error(v, VARLINK_ERROR_INVALID_PARAMETER, + json_variant_by_index(parameters, 0)); + + return -EINVAL; +} + +int varlink_error_errno(Varlink *v, int error) { + return varlink_errorb( + v, + VARLINK_ERROR_SYSTEM, + JSON_BUILD_OBJECT(JSON_BUILD_PAIR("errno", JSON_BUILD_INTEGER(abs(error))))); +} + +int varlink_notify(Varlink *v, JsonVariant *parameters) { + _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; + int r; + + assert_return(v, -EINVAL); + + if (v->state == VARLINK_DISCONNECTED) + return -ENOTCONN; + if (!IN_SET(v->state, VARLINK_PROCESSING_METHOD_MORE, VARLINK_PENDING_METHOD_MORE)) + return -EBUSY; + + r = varlink_sanitize_parameters(¶meters); + if (r < 0) + return r; + + r = json_build(&m, JSON_BUILD_OBJECT( + JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)), + JSON_BUILD_PAIR("continues", JSON_BUILD_BOOLEAN(true)))); + if (r < 0) + return r; + + r = varlink_enqueue_json(v, m); + if (r < 0) + return r; + + /* No state change, as more is coming */ + return 1; +} + +int varlink_notifyb(Varlink *v, ...) { + _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL; + va_list ap; + int r; + + assert_return(v, -EINVAL); + + va_start(ap, v); + r = json_buildv(¶meters, ap); + va_end(ap); + + if (r < 0) + return r; + + return varlink_notify(v, parameters); +} + +int varlink_bind_reply(Varlink *v, VarlinkReply callback) { + assert_return(v, -EINVAL); + + if (callback && v->reply_callback && callback != v->reply_callback) + return -EBUSY; + + v->reply_callback = callback; + + return 0; +} + +void* varlink_set_userdata(Varlink *v, void *userdata) { + void *old; + + assert_return(v, NULL); + + old = v->userdata; + v->userdata = userdata; + + return old; +} + +void* varlink_get_userdata(Varlink *v) { + assert_return(v, NULL); + + return v->userdata; +} + +static int varlink_acquire_ucred(Varlink *v) { + int r; + + assert(v); + + if (v->ucred_acquired) + return 0; + + r = getpeercred(v->fd, &v->ucred); + if (r < 0) + return r; + + v->ucred_acquired = true; + return 0; +} + +int varlink_get_peer_uid(Varlink *v, uid_t *ret) { + int r; + + assert_return(v, -EINVAL); + assert_return(ret, -EINVAL); + + r = varlink_acquire_ucred(v); + if (r < 0) + return r; + + if (!uid_is_valid(v->ucred.uid)) + return -ENODATA; + + *ret = v->ucred.uid; + return 0; +} + +int varlink_get_peer_pid(Varlink *v, pid_t *ret) { + int r; + + assert_return(v, -EINVAL); + assert_return(ret, -EINVAL); + + r = varlink_acquire_ucred(v); + if (r < 0) + return r; + + if (!pid_is_valid(v->ucred.pid)) + return -ENODATA; + + *ret = v->ucred.pid; + return 0; +} + +int varlink_set_relative_timeout(Varlink *v, usec_t timeout) { + assert_return(v, -EINVAL); + assert_return(timeout > 0, -EINVAL); + + v->timeout = timeout; + return 0; +} + +VarlinkServer *varlink_get_server(Varlink *v) { + assert_return(v, NULL); + + return v->server; +} + +int varlink_set_description(Varlink *v, const char *description) { + assert_return(v, -EINVAL); + + return free_and_strdup(&v->description, description); +} + +static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + Varlink *v = userdata; + + assert(s); + assert(v); + + handle_revents(v, revents); + (void) varlink_process(v); + + return 1; +} + +static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) { + Varlink *v = userdata; + + assert(s); + assert(v); + + (void) varlink_process(v); + return 1; +} + +static int defer_callback(sd_event_source *s, void *userdata) { + Varlink *v = userdata; + + assert(s); + assert(v); + + (void) varlink_process(v); + return 1; +} + +static int prepare_callback(sd_event_source *s, void *userdata) { + Varlink *v = userdata; + int r, e; + usec_t until; + bool have_timeout; + + assert(s); + assert(v); + + e = varlink_get_events(v); + if (e < 0) + return e; + + r = sd_event_source_set_io_events(v->io_event_source, e); + if (r < 0) + return r; + + r = varlink_get_timeout(v, &until); + if (r < 0) + return r; + have_timeout = r > 0; + + if (have_timeout) { + r = sd_event_source_set_time(v->time_event_source, until); + if (r < 0) + return r; + } + + r = sd_event_source_set_enabled(v->time_event_source, have_timeout ? SD_EVENT_ON : SD_EVENT_OFF); + if (r < 0) + return r; + + return 1; +} + +static int quit_callback(sd_event_source *event, void *userdata) { + Varlink *v = userdata; + + assert(event); + assert(v); + + varlink_flush(v); + varlink_close(v); + + return 1; +} + +int varlink_attach_event(Varlink *v, sd_event *e, int64_t priority) { + int r; + + assert_return(v, -EINVAL); + assert_return(!v->event, -EBUSY); + + if (e) + v->event = sd_event_ref(e); + else { + r = sd_event_default(&v->event); + if (r < 0) + return r; + } + + r = sd_event_add_time(v->event, &v->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, v); + if (r < 0) + goto fail; + + r = sd_event_source_set_priority(v->time_event_source, priority); + if (r < 0) + goto fail; + + (void) sd_event_source_set_description(v->time_event_source, "varlink-time"); + + r = sd_event_add_exit(v->event, &v->quit_event_source, quit_callback, v); + if (r < 0) + goto fail; + + r = sd_event_source_set_priority(v->quit_event_source, priority); + if (r < 0) + goto fail; + + (void) sd_event_source_set_description(v->quit_event_source, "varlink-quit"); + + r = sd_event_add_io(v->event, &v->io_event_source, v->fd, 0, io_callback, v); + if (r < 0) + goto fail; + + r = sd_event_source_set_prepare(v->io_event_source, prepare_callback); + if (r < 0) + goto fail; + + r = sd_event_source_set_priority(v->io_event_source, priority); + if (r < 0) + goto fail; + + (void) sd_event_source_set_description(v->io_event_source, "varlink-io"); + + r = sd_event_add_defer(v->event, &v->defer_event_source, defer_callback, v); + if (r < 0) + goto fail; + + r = sd_event_source_set_priority(v->defer_event_source, priority); + if (r < 0) + goto fail; + + (void) sd_event_source_set_description(v->defer_event_source, "varlink-defer"); + + return 0; + +fail: + varlink_detach_event(v); + return r; +} + +void varlink_detach_event(Varlink *v) { + if (!v) + return; + + varlink_detach_event_sources(v); + + v->event = sd_event_unref(v->event); +} + +sd_event *varlink_get_event(Varlink *v) { + assert_return(v, NULL); + + return v->event; +} + +int varlink_server_new(VarlinkServer **ret, VarlinkServerFlags flags) { + VarlinkServer *s; + + assert_return(ret, -EINVAL); + assert_return((flags & ~_VARLINK_SERVER_FLAGS_ALL) == 0, -EINVAL); + + s = new(VarlinkServer, 1); + if (!s) + return -ENOMEM; + + *s = (VarlinkServer) { + .n_ref = 1, + .flags = flags, + .connections_max = varlink_server_connections_max(NULL), + .connections_per_uid_max = varlink_server_connections_per_uid_max(NULL), + }; + + *ret = s; + return 0; +} + +static VarlinkServer* varlink_server_destroy(VarlinkServer *s) { + char *m; + + if (!s) + return NULL; + + varlink_server_shutdown(s); + + while ((m = hashmap_steal_first_key(s->methods))) + free(m); + + hashmap_free(s->methods); + hashmap_free(s->by_uid); + + sd_event_unref(s->event); + + free(s->description); + + return mfree(s); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(VarlinkServer, varlink_server, varlink_server_destroy); + +static int validate_connection(VarlinkServer *server, const struct ucred *ucred) { + int allowed = -1; + + assert(server); + assert(ucred); + + if (FLAGS_SET(server->flags, VARLINK_SERVER_ROOT_ONLY)) + allowed = ucred->uid == 0; + + if (FLAGS_SET(server->flags, VARLINK_SERVER_MYSELF_ONLY)) + allowed = allowed > 0 || ucred->uid == getuid(); + + if (allowed == 0) { /* Allow access when it is explicitly allowed or when neither + * VARLINK_SERVER_ROOT_ONLY nor VARLINK_SERVER_MYSELF_ONLY are specified. */ + varlink_server_log(server, "Unprivileged client attempted connection, refusing."); + return 0; + } + + if (server->n_connections >= server->connections_max) { + varlink_server_log(server, "Connection limit of %u reached, refusing.", server->connections_max); + return 0; + } + + if (FLAGS_SET(server->flags, VARLINK_SERVER_ACCOUNT_UID)) { + unsigned c; + + if (!uid_is_valid(ucred->uid)) { + varlink_server_log(server, "Client with invalid UID attempted connection, refusing."); + return 0; + } + + c = PTR_TO_UINT(hashmap_get(server->by_uid, UID_TO_PTR(ucred->uid))); + if (c >= server->connections_per_uid_max) { + varlink_server_log(server, "Per-UID connection limit of %u reached, refusing.", + server->connections_per_uid_max); + return 0; + } + } + + return 1; +} + +static int count_connection(VarlinkServer *server, struct ucred *ucred) { + unsigned c; + int r; + + assert(server); + assert(ucred); + + server->n_connections++; + + if (FLAGS_SET(server->flags, VARLINK_SERVER_ACCOUNT_UID)) { + r = hashmap_ensure_allocated(&server->by_uid, NULL); + if (r < 0) + return log_debug_errno(r, "Failed to allocate UID hash table: %m"); + + c = PTR_TO_UINT(hashmap_get(server->by_uid, UID_TO_PTR(ucred->uid))); + + varlink_server_log(server, "Connections of user " UID_FMT ": %u (of %u max)", + ucred->uid, c, server->connections_per_uid_max); + + r = hashmap_replace(server->by_uid, UID_TO_PTR(ucred->uid), UINT_TO_PTR(c + 1)); + if (r < 0) + return log_debug_errno(r, "Failed to increment counter in UID hash table: %m"); + } + + return 0; +} + +int varlink_server_add_connection(VarlinkServer *server, int fd, Varlink **ret) { + _cleanup_(varlink_unrefp) Varlink *v = NULL; + bool ucred_acquired; + struct ucred ucred; + int r; + + assert_return(server, -EINVAL); + assert_return(fd >= 0, -EBADF); + + if ((server->flags & (VARLINK_SERVER_ROOT_ONLY|VARLINK_SERVER_ACCOUNT_UID)) != 0) { + r = getpeercred(fd, &ucred); + if (r < 0) + return varlink_server_log_errno(server, r, "Failed to acquire peer credentials of incoming socket, refusing: %m"); + + ucred_acquired = true; + + r = validate_connection(server, &ucred); + if (r < 0) + return r; + if (r == 0) + return -EPERM; + } else + ucred_acquired = false; + + r = varlink_new(&v); + if (r < 0) + return varlink_server_log_errno(server, r, "Failed to allocate connection object: %m"); + + r = count_connection(server, &ucred); + if (r < 0) + return r; + + v->fd = fd; + v->userdata = server->userdata; + if (ucred_acquired) { + v->ucred = ucred; + v->ucred_acquired = true; + } + + (void) asprintf(&v->description, "%s-%i", server->description ?: "varlink", v->fd); + + /* Link up the server and the connection, and take reference in both directions. Note that the + * reference on the connection is left dangling. It will be dropped when the connection is closed, + * which happens in varlink_close(), including in the event loop quit callback. */ + v->server = varlink_server_ref(server); + varlink_ref(v); + + varlink_set_state(v, VARLINK_IDLE_SERVER); + + if (server->event) { + r = varlink_attach_event(v, server->event, server->event_priority); + if (r < 0) { + varlink_log_errno(v, r, "Failed to attach new connection: %m"); + v->fd = -1; /* take the fd out of the connection again */ + varlink_close(v); + return r; + } + } + + if (ret) + *ret = v; + + return 0; +} + +static int connect_callback(sd_event_source *source, int fd, uint32_t revents, void *userdata) { + VarlinkServerSocket *ss = userdata; + _cleanup_close_ int cfd = -1; + Varlink *v = NULL; + int r; + + assert(source); + assert(ss); + + varlink_server_log(ss->server, "New incoming connection."); + + cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC); + if (cfd < 0) { + if (ERRNO_IS_ACCEPT_AGAIN(errno)) + return 0; + + return varlink_server_log_errno(ss->server, errno, "Failed to accept incoming socket: %m"); + } + + r = varlink_server_add_connection(ss->server, cfd, &v); + if (r < 0) + return 0; + + TAKE_FD(cfd); + + if (ss->server->connect_callback) { + r = ss->server->connect_callback(ss->server, v, ss->server->userdata); + if (r < 0) { + varlink_log_errno(v, r, "Connection callback returned error, disconnecting client: %m"); + varlink_close(v); + return 0; + } + } + + return 0; +} + +int varlink_server_listen_fd(VarlinkServer *s, int fd) { + _cleanup_free_ VarlinkServerSocket *ss = NULL; + int r; + + assert_return(s, -EINVAL); + assert_return(fd >= 0, -EBADF); + + r = fd_nonblock(fd, true); + if (r < 0) + return r; + + ss = new(VarlinkServerSocket, 1); + if (!ss) + return -ENOMEM; + + *ss = (VarlinkServerSocket) { + .server = s, + .fd = fd, + }; + + if (s->event) { + r = sd_event_add_io(s->event, &ss->event_source, fd, EPOLLIN, connect_callback, ss); + if (r < 0) + return r; + + r = sd_event_source_set_priority(ss->event_source, s->event_priority); + if (r < 0) + return r; + } + + LIST_PREPEND(sockets, s->sockets, TAKE_PTR(ss)); + return 0; +} + +int varlink_server_listen_address(VarlinkServer *s, const char *address, mode_t m) { + union sockaddr_union sockaddr; + socklen_t sockaddr_len; + _cleanup_close_ int fd = -1; + int r; + + assert_return(s, -EINVAL); + assert_return(address, -EINVAL); + assert_return((m & ~0777) == 0, -EINVAL); + + r = sockaddr_un_set_path(&sockaddr.un, address); + if (r < 0) + return r; + sockaddr_len = r; + + fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (fd < 0) + return -errno; + + fd = fd_move_above_stdio(fd); + + (void) sockaddr_un_unlink(&sockaddr.un); + + RUN_WITH_UMASK(~m & 0777) { + r = mac_selinux_bind(fd, &sockaddr.sa, sockaddr_len); + if (r < 0) + return r; + } + + if (listen(fd, SOMAXCONN) < 0) + return -errno; + + r = varlink_server_listen_fd(s, fd); + if (r < 0) + return r; + + TAKE_FD(fd); + return 0; +} + +void* varlink_server_set_userdata(VarlinkServer *s, void *userdata) { + void *ret; + + assert_return(s, NULL); + + ret = s->userdata; + s->userdata = userdata; + + return ret; +} + +void* varlink_server_get_userdata(VarlinkServer *s) { + assert_return(s, NULL); + + return s->userdata; +} + +static VarlinkServerSocket* varlink_server_socket_destroy(VarlinkServerSocket *ss) { + if (!ss) + return NULL; + + if (ss->server) + LIST_REMOVE(sockets, ss->server->sockets, ss); + + sd_event_source_disable_unref(ss->event_source); + + free(ss->address); + safe_close(ss->fd); + + return mfree(ss); +} + +int varlink_server_shutdown(VarlinkServer *s) { + assert_return(s, -EINVAL); + + while (s->sockets) + varlink_server_socket_destroy(s->sockets); + + return 0; +} + +int varlink_server_attach_event(VarlinkServer *s, sd_event *e, int64_t priority) { + VarlinkServerSocket *ss; + int r; + + assert_return(s, -EINVAL); + assert_return(!s->event, -EBUSY); + + if (e) + s->event = sd_event_ref(e); + else { + r = sd_event_default(&s->event); + if (r < 0) + return r; + } + + LIST_FOREACH(sockets, ss, s->sockets) { + assert(!ss->event_source); + + r = sd_event_add_io(s->event, &ss->event_source, ss->fd, EPOLLIN, connect_callback, ss); + if (r < 0) + goto fail; + + r = sd_event_source_set_priority(ss->event_source, priority); + if (r < 0) + goto fail; + } + + s->event_priority = priority; + return 0; + +fail: + varlink_server_detach_event(s); + return r; +} + +int varlink_server_detach_event(VarlinkServer *s) { + VarlinkServerSocket *ss; + + assert_return(s, -EINVAL); + + LIST_FOREACH(sockets, ss, s->sockets) { + + if (!ss->event_source) + continue; + + (void) sd_event_source_set_enabled(ss->event_source, SD_EVENT_OFF); + ss->event_source = sd_event_source_unref(ss->event_source); + } + + sd_event_unref(s->event); + return 0; +} + +sd_event *varlink_server_get_event(VarlinkServer *s) { + assert_return(s, NULL); + + return s->event; +} + +int varlink_server_bind_method(VarlinkServer *s, const char *method, VarlinkMethod callback) { + char *m; + int r; + + assert_return(s, -EINVAL); + assert_return(method, -EINVAL); + assert_return(callback, -EINVAL); + + if (startswith(method, "org.varlink.service.")) + return -EEXIST; + + r = hashmap_ensure_allocated(&s->methods, &string_hash_ops); + if (r < 0) + return r; + + m = strdup(method); + if (!m) + return -ENOMEM; + + r = hashmap_put(s->methods, m, callback); + if (r < 0) { + free(m); + return r; + } + + return 0; +} + +int varlink_server_bind_method_many_internal(VarlinkServer *s, ...) { + va_list ap; + int r = 0; + + assert_return(s, -EINVAL); + + va_start(ap, s); + for (;;) { + VarlinkMethod callback; + const char *method; + + method = va_arg(ap, const char *); + if (!method) + break; + + callback = va_arg(ap, VarlinkMethod); + + r = varlink_server_bind_method(s, method, callback); + if (r < 0) + break; + } + va_end(ap); + + return r; +} + +int varlink_server_bind_connect(VarlinkServer *s, VarlinkConnect callback) { + assert_return(s, -EINVAL); + + if (callback && s->connect_callback && callback != s->connect_callback) + return -EBUSY; + + s->connect_callback = callback; + return 0; +} + +int varlink_server_bind_disconnect(VarlinkServer *s, VarlinkDisconnect callback) { + assert_return(s, -EINVAL); + + if (callback && s->disconnect_callback && callback != s->disconnect_callback) + return -EBUSY; + + s->disconnect_callback = callback; + return 0; +} + +unsigned varlink_server_connections_max(VarlinkServer *s) { + int dts; + + /* If a server is specified, return the setting for that server, otherwise the default value */ + if (s) + return s->connections_max; + + dts = getdtablesize(); + assert_se(dts > 0); + + /* Make sure we never use up more than ¾th of RLIMIT_NOFILE for IPC */ + if (VARLINK_DEFAULT_CONNECTIONS_MAX > (unsigned) dts / 4 * 3) + return dts / 4 * 3; + + return VARLINK_DEFAULT_CONNECTIONS_MAX; +} + +unsigned varlink_server_connections_per_uid_max(VarlinkServer *s) { + unsigned m; + + if (s) + return s->connections_per_uid_max; + + /* Make sure to never use up more than ¾th of available connections for a single user */ + m = varlink_server_connections_max(NULL); + if (VARLINK_DEFAULT_CONNECTIONS_PER_UID_MAX > m) + return m / 4 * 3; + + return VARLINK_DEFAULT_CONNECTIONS_PER_UID_MAX; +} + +int varlink_server_set_connections_per_uid_max(VarlinkServer *s, unsigned m) { + assert_return(s, -EINVAL); + assert_return(m > 0, -EINVAL); + + s->connections_per_uid_max = m; + return 0; +} + +int varlink_server_set_connections_max(VarlinkServer *s, unsigned m) { + assert_return(s, -EINVAL); + assert_return(m > 0, -EINVAL); + + s->connections_max = m; + return 0; +} + +unsigned varlink_server_current_connections(VarlinkServer *s) { + assert_return(s, UINT_MAX); + + return s->n_connections; +} + +int varlink_server_set_description(VarlinkServer *s, const char *description) { + assert_return(s, -EINVAL); + + return free_and_strdup(&s->description, description); +} diff --git a/src/shared/varlink.h b/src/shared/varlink.h new file mode 100644 index 0000000..7ea1f91 --- /dev/null +++ b/src/shared/varlink.h @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "sd-event.h" + +#include "json.h" +#include "time-util.h" + +/* A minimal Varlink implementation. We only implement the minimal, obvious bits here though. No validation, + * no introspection, no name service, just the stuff actually needed. + * + * You might wonder why we aren't using libvarlink here? Varlink is a very simple protocol, which allows us + * to write our own implementation relatively easily. However, the main reasons are these: + * + * • We want to use our own JSON subsystem, with all the benefits that brings (i.e. accurate unsigned+signed + * 64bit integers, full fuzzing, logging during parsing and so on). If we'd want to use that with + * libvarlink we'd have to serialize and deserialize all the time from its own representation which is + * inefficient and nasty. + * + * • We want integration into sd-event, but also synchronous event-loop-less operation + * + * • We need proper per-UID accounting and access control, since we want to allow communication between + * unprivileged clients and privileged servers. + * + * • And of course, we don't want the name service and introspection stuff for now (though that might + * change). + */ + +typedef struct Varlink Varlink; +typedef struct VarlinkServer VarlinkServer; + +typedef enum VarlinkReplyFlags { + VARLINK_REPLY_ERROR = 1 << 0, + VARLINK_REPLY_CONTINUES = 1 << 1, + VARLINK_REPLY_LOCAL = 1 << 2, +} VarlinkReplyFlags; + +typedef enum VarlinkMethodFlags { + VARLINK_METHOD_ONEWAY = 1 << 0, + VARLINK_METHOD_MORE = 2 << 1, +} VarlinkMethodFlags; + +typedef enum VarlinkServerFlags { + VARLINK_SERVER_ROOT_ONLY = 1 << 0, /* Only accessible by root */ + VARLINK_SERVER_MYSELF_ONLY = 1 << 1, /* Only accessible by our own UID */ + VARLINK_SERVER_ACCOUNT_UID = 1 << 2, /* Do per user accounting */ + + _VARLINK_SERVER_FLAGS_ALL = (1 << 3) - 1, +} VarlinkServerFlags; + +typedef int (*VarlinkMethod)(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata); +typedef int (*VarlinkReply)(Varlink *link, JsonVariant *parameters, const char *error_id, VarlinkReplyFlags flags, void *userdata); +typedef int (*VarlinkConnect)(VarlinkServer *server, Varlink *link, void *userdata); +typedef void (*VarlinkDisconnect)(VarlinkServer *server, Varlink *link, void *userdata); + +int varlink_connect_address(Varlink **ret, const char *address); +int varlink_connect_fd(Varlink **ret, int fd); + +Varlink* varlink_ref(Varlink *link); +Varlink* varlink_unref(Varlink *v); + +int varlink_get_fd(Varlink *v); +int varlink_get_events(Varlink *v); +int varlink_get_timeout(Varlink *v, usec_t *ret); + +int varlink_attach_event(Varlink *v, sd_event *e, int64_t priority); +void varlink_detach_event(Varlink *v); +sd_event *varlink_get_event(Varlink *v); + +int varlink_process(Varlink *v); +int varlink_wait(Varlink *v, usec_t timeout); + +int varlink_flush(Varlink *v); +int varlink_close(Varlink *v); + +Varlink* varlink_flush_close_unref(Varlink *v); +Varlink* varlink_close_unref(Varlink *v); + +/* Enqueue method call, not expecting a reply */ +int varlink_send(Varlink *v, const char *method, JsonVariant *parameters); +int varlink_sendb(Varlink *v, const char *method, ...); + +/* Send method call and wait for reply */ +int varlink_call(Varlink *v, const char *method, JsonVariant *parameters, JsonVariant **ret_parameters, const char **ret_error_id, VarlinkReplyFlags *ret_flags); +int varlink_callb(Varlink *v, const char *method, JsonVariant **ret_parameters, const char **ret_error_id, VarlinkReplyFlags *ret_flags, ...); + +/* Enqueue method call, expect a reply, which is eventually delivered to the reply callback */ +int varlink_invoke(Varlink *v, const char *method, JsonVariant *parameters); +int varlink_invokeb(Varlink *v, const char *method, ...); + +/* Enqueue method call, expect a reply now, and possibly more later, which are all delivered to the reply callback */ +int varlink_observe(Varlink *v, const char *method, JsonVariant *parameters); +int varlink_observeb(Varlink *v, const char *method, ...); + +/* Enqueue a final reply */ +int varlink_reply(Varlink *v, JsonVariant *parameters); +int varlink_replyb(Varlink *v, ...); + +/* Enqueue a (final) error */ +int varlink_error(Varlink *v, const char *error_id, JsonVariant *parameters); +int varlink_errorb(Varlink *v, const char *error_id, ...); +int varlink_error_invalid_parameter(Varlink *v, JsonVariant *parameters); +int varlink_error_errno(Varlink *v, int error); + +/* Enqueue a "more" reply */ +int varlink_notify(Varlink *v, JsonVariant *parameters); +int varlink_notifyb(Varlink *v, ...); + +/* Bind a disconnect, reply or timeout callback */ +int varlink_bind_reply(Varlink *v, VarlinkReply reply); + +void* varlink_set_userdata(Varlink *v, void *userdata); +void* varlink_get_userdata(Varlink *v); + +int varlink_get_peer_uid(Varlink *v, uid_t *ret); +int varlink_get_peer_pid(Varlink *v, pid_t *ret); + +int varlink_set_relative_timeout(Varlink *v, usec_t usec); + +VarlinkServer* varlink_get_server(Varlink *v); + +int varlink_set_description(Varlink *v, const char *d); + +/* Create a varlink server */ +int varlink_server_new(VarlinkServer **ret, VarlinkServerFlags flags); +VarlinkServer *varlink_server_ref(VarlinkServer *s); +VarlinkServer *varlink_server_unref(VarlinkServer *s); + +/* Add addresses or fds to listen on */ +int varlink_server_listen_address(VarlinkServer *s, const char *address, mode_t mode); +int varlink_server_listen_fd(VarlinkServer *s, int fd); +int varlink_server_add_connection(VarlinkServer *s, int fd, Varlink **ret); + +/* Bind callbacks */ +int varlink_server_bind_method(VarlinkServer *s, const char *method, VarlinkMethod callback); +int varlink_server_bind_method_many_internal(VarlinkServer *s, ...); +#define varlink_server_bind_method_many(s, ...) varlink_server_bind_method_many_internal(s, __VA_ARGS__, NULL) +int varlink_server_bind_connect(VarlinkServer *s, VarlinkConnect connect); +int varlink_server_bind_disconnect(VarlinkServer *s, VarlinkDisconnect disconnect); + +void* varlink_server_set_userdata(VarlinkServer *s, void *userdata); +void* varlink_server_get_userdata(VarlinkServer *s); + +int varlink_server_attach_event(VarlinkServer *v, sd_event *e, int64_t priority); +int varlink_server_detach_event(VarlinkServer *v); +sd_event *varlink_server_get_event(VarlinkServer *v); + +int varlink_server_shutdown(VarlinkServer *server); + +unsigned varlink_server_connections_max(VarlinkServer *s); +unsigned varlink_server_connections_per_uid_max(VarlinkServer *s); + +int varlink_server_set_connections_per_uid_max(VarlinkServer *s, unsigned m); +int varlink_server_set_connections_max(VarlinkServer *s, unsigned m); + +unsigned varlink_server_current_connections(VarlinkServer *s); + +int varlink_server_set_description(VarlinkServer *s, const char *description); + +DEFINE_TRIVIAL_CLEANUP_FUNC(Varlink *, varlink_unref); +DEFINE_TRIVIAL_CLEANUP_FUNC(Varlink *, varlink_close_unref); +DEFINE_TRIVIAL_CLEANUP_FUNC(Varlink *, varlink_flush_close_unref); +DEFINE_TRIVIAL_CLEANUP_FUNC(VarlinkServer *, varlink_server_unref); + +#define VARLINK_ERROR_DISCONNECTED "io.systemd.Disconnected" +#define VARLINK_ERROR_TIMEOUT "io.systemd.TimedOut" +#define VARLINK_ERROR_PROTOCOL "io.systemd.Protocol" +#define VARLINK_ERROR_SYSTEM "io.systemd.System" + +#define VARLINK_ERROR_INTERFACE_NOT_FOUND "org.varlink.service.InterfaceNotFound" +#define VARLINK_ERROR_METHOD_NOT_FOUND "org.varlink.service.MethodNotFound" +#define VARLINK_ERROR_METHOD_NOT_IMPLEMENTED "org.varlink.service.MethodNotImplemented" +#define VARLINK_ERROR_INVALID_PARAMETER "org.varlink.service.InvalidParameter" +#define VARLINK_ERROR_SUBSCRIPTION_TAKEN "org.varlink.service.SubscriptionTaken" diff --git a/src/shared/verbs.c b/src/shared/verbs.c new file mode 100644 index 0000000..2d19172 --- /dev/null +++ b/src/shared/verbs.c @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <getopt.h> +#include <stdbool.h> +#include <stddef.h> + +#include "env-util.h" +#include "log.h" +#include "macro.h" +#include "process-util.h" +#include "string-util.h" +#include "verbs.h" +#include "virt.h" + +/* Wraps running_in_chroot() which is used in various places, but also adds an environment variable check so external + * processes can reliably force this on. + */ +bool running_in_chroot_or_offline(void) { + int r; + + /* Added to support use cases like rpm-ostree, where from %post scripts we only want to execute "preset", but + * not "start"/"restart" for example. + * + * See docs/ENVIRONMENT.md for docs. + */ + r = getenv_bool("SYSTEMD_OFFLINE"); + if (r < 0 && r != -ENXIO) + log_debug_errno(r, "Failed to parse $SYSTEMD_OFFLINE: %m"); + else if (r >= 0) + return r > 0; + + /* We've had this condition check for a long time which basically checks for legacy chroot case like Fedora's + * "mock", which is used for package builds. We don't want to try to start systemd services there, since + * without --new-chroot we don't even have systemd running, and even if we did, adding a concept of background + * daemons to builds would be an enormous change, requiring considering things like how the journal output is + * handled, etc. And there's really not a use case today for a build talking to a service. + * + * Note this call itself also looks for a different variable SYSTEMD_IGNORE_CHROOT=1. + */ + r = running_in_chroot(); + if (r < 0) + log_debug_errno(r, "running_in_chroot(): %m"); + + return r > 0; +} + +const Verb* verbs_find_verb(const char *name, const Verb verbs[]) { + for (size_t i = 0; verbs[i].dispatch; i++) + if (streq_ptr(name, verbs[i].verb) || + (!name && FLAGS_SET(verbs[i].flags, VERB_DEFAULT))) + return &verbs[i]; + + /* At the end of the list? */ + return NULL; +} + +int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata) { + const Verb *verb; + const char *name; + int left; + + assert(verbs); + assert(verbs[0].dispatch); + assert(argc >= 0); + assert(argv); + assert(argc >= optind); + + left = argc - optind; + argv += optind; + optind = 0; + name = argv[0]; + + verb = verbs_find_verb(name, verbs); + if (!verb) { + if (name) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Unknown command verb %s.", name); + else + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Command verb required."); + } + + if (!name) + left = 1; + + if (verb->min_args != VERB_ANY && + (unsigned) left < verb->min_args) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Too few arguments."); + + if (verb->max_args != VERB_ANY && + (unsigned) left > verb->max_args) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Too many arguments."); + + if ((verb->flags & VERB_ONLINE_ONLY) && running_in_chroot_or_offline()) { + log_info("Running in chroot, ignoring command '%s'", name ?: verb->verb); + return 0; + } + + if (name) + return verb->dispatch(left, argv, userdata); + else { + char* fake[2] = { + (char*) verb->verb, + NULL + }; + + return verb->dispatch(1, fake, userdata); + } +} diff --git a/src/shared/verbs.h b/src/shared/verbs.h new file mode 100644 index 0000000..245bb37 --- /dev/null +++ b/src/shared/verbs.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#define VERB_ANY ((unsigned) -1) + +typedef enum VerbFlags { + VERB_DEFAULT = 1 << 0, /* The verb to run if no verb is specified */ + VERB_ONLINE_ONLY = 1 << 1, /* Just do nothing when running in chroot or offline */ +} VerbFlags; + +typedef struct { + const char *verb; + unsigned min_args, max_args; + VerbFlags flags; + int (* const dispatch)(int argc, char *argv[], void *userdata); +} Verb; + +bool running_in_chroot_or_offline(void); + +const Verb* verbs_find_verb(const char *name, const Verb verbs[]); +int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata); diff --git a/src/shared/vlan-util.c b/src/shared/vlan-util.c new file mode 100644 index 0000000..cb43d50 --- /dev/null +++ b/src/shared/vlan-util.c @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "conf-parser.h" +#include "parse-util.h" +#include "string-util.h" +#include "vlan-util.h" + +int parse_vlanid(const char *p, uint16_t *ret) { + uint16_t id; + int r; + + assert(p); + assert(ret); + + r = safe_atou16(p, &id); + if (r < 0) + return r; + if (!vlanid_is_valid(id)) + return -ERANGE; + + *ret = id; + return 0; +} + +int parse_vid_range(const char *p, uint16_t *vid, uint16_t *vid_end) { + unsigned lower, upper; + int r; + + r = parse_range(p, &lower, &upper); + if (r < 0) + return r; + + if (lower > VLANID_MAX || upper > VLANID_MAX || lower > upper) + return -EINVAL; + + *vid = lower; + *vid_end = upper; + return 0; +} + +int config_parse_default_port_vlanid( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + uint16_t *id = data; + + assert(lvalue); + assert(rvalue); + assert(data); + + if (streq(rvalue, "none")) { + *id = 0; + return 0; + } + + return config_parse_vlanid(unit, filename, line, section, section_line, + lvalue, ltype, rvalue, data, userdata); +} + +int config_parse_vlanid( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint16_t *id = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + r = parse_vlanid(rvalue, id); + if (r == -ERANGE) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "VLAN identifier outside of valid range 0…4094, ignoring: %s", rvalue); + return 0; + } + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse VLAN identifier value, ignoring: %s", rvalue); + return 0; + } + + return 0; +} diff --git a/src/shared/vlan-util.h b/src/shared/vlan-util.h new file mode 100644 index 0000000..0336908 --- /dev/null +++ b/src/shared/vlan-util.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <inttypes.h> + +#include "conf-parser.h" + +#define VLANID_MAX 4094 +#define VLANID_INVALID UINT16_MAX + +/* Note that we permit VLAN Id 0 here, as that is apparently OK by the Linux kernel */ +static inline bool vlanid_is_valid(uint16_t id) { + return id <= VLANID_MAX; +} + +int parse_vlanid(const char *p, uint16_t *ret); +int parse_vid_range(const char *p, uint16_t *vid, uint16_t *vid_end); + +CONFIG_PARSER_PROTOTYPE(config_parse_default_port_vlanid); +CONFIG_PARSER_PROTOTYPE(config_parse_vlanid); diff --git a/src/shared/volatile-util.c b/src/shared/volatile-util.c new file mode 100644 index 0000000..3323897 --- /dev/null +++ b/src/shared/volatile-util.c @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> + +#include "alloc-util.h" +#include "macro.h" +#include "parse-util.h" +#include "proc-cmdline.h" +#include "string-table.h" +#include "string-util.h" +#include "volatile-util.h" + +int query_volatile_mode(VolatileMode *ret) { + _cleanup_free_ char *mode = NULL; + int r; + + r = proc_cmdline_get_key("systemd.volatile", PROC_CMDLINE_VALUE_OPTIONAL, &mode); + if (r < 0) + return r; + if (r == 0) { + *ret = VOLATILE_NO; + return 0; + } + + if (mode) { + VolatileMode m; + + m = volatile_mode_from_string(mode); + if (m < 0) + return -EINVAL; + + *ret = m; + } else + *ret = VOLATILE_YES; + + return 1; +} + +static const char* const volatile_mode_table[_VOLATILE_MODE_MAX] = { + [VOLATILE_NO] = "no", + [VOLATILE_YES] = "yes", + [VOLATILE_STATE] = "state", + [VOLATILE_OVERLAY] = "overlay", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(volatile_mode, VolatileMode, VOLATILE_YES); diff --git a/src/shared/volatile-util.h b/src/shared/volatile-util.h new file mode 100644 index 0000000..9a1bb38 --- /dev/null +++ b/src/shared/volatile-util.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +typedef enum VolatileMode { + VOLATILE_NO, + VOLATILE_YES, + VOLATILE_STATE, + VOLATILE_OVERLAY, + _VOLATILE_MODE_MAX, + _VOLATILE_MODE_INVALID = -1 +} VolatileMode; + +VolatileMode volatile_mode_from_string(const char *s); +const char* volatile_mode_to_string(VolatileMode m); + +int query_volatile_mode(VolatileMode *ret); diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c new file mode 100644 index 0000000..d33acaf --- /dev/null +++ b/src/shared/watchdog.c @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <syslog.h> +#include <unistd.h> +#include <linux/watchdog.h> + +#include "errno-util.h" +#include "fd-util.h" +#include "log.h" +#include "string-util.h" +#include "time-util.h" +#include "watchdog.h" + +static int watchdog_fd = -1; +static char *watchdog_device = NULL; +static usec_t watchdog_timeout = USEC_INFINITY; +static usec_t watchdog_last_ping = USEC_INFINITY; + +static int update_timeout(void) { + if (watchdog_fd < 0) + return 0; + if (watchdog_timeout == USEC_INFINITY) + return 0; + + if (watchdog_timeout == 0) { + int flags; + + flags = WDIOS_DISABLECARD; + if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0) + return log_warning_errno(errno, "Failed to disable hardware watchdog: %m"); + } else { + char buf[FORMAT_TIMESPAN_MAX]; + int sec, flags; + usec_t t; + + t = DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC); + sec = (int) t >= INT_MAX ? INT_MAX : t; /* Saturate */ + if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec) < 0) + return log_warning_errno(errno, "Failed to set timeout to %is: %m", sec); + + watchdog_timeout = (usec_t) sec * USEC_PER_SEC; + log_info("Set hardware watchdog to %s.", format_timespan(buf, sizeof(buf), watchdog_timeout, 0)); + + flags = WDIOS_ENABLECARD; + if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0) { + /* ENOTTY means the watchdog is always enabled so we're fine */ + log_full(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, + "Failed to enable hardware watchdog: %m"); + if (!ERRNO_IS_NOT_SUPPORTED(errno)) + return -errno; + } + + if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0) + return log_warning_errno(errno, "Failed to ping hardware watchdog: %m"); + + watchdog_last_ping = now(clock_boottime_or_monotonic()); + } + + return 0; +} + +static int open_watchdog(void) { + struct watchdog_info ident; + const char *fn; + + if (watchdog_fd >= 0) + return 0; + + fn = watchdog_device ?: "/dev/watchdog"; + watchdog_fd = open(fn, O_WRONLY|O_CLOEXEC); + if (watchdog_fd < 0) + return log_debug_errno(errno, "Failed to open watchdog device %s: %m", fn); + + if (ioctl(watchdog_fd, WDIOC_GETSUPPORT, &ident) < 0) + log_debug_errno(errno, "Hardware watchdog %s does not support WDIOC_GETSUPPORT ioctl: %m", fn); + else + log_info("Using hardware watchdog '%s', version %x, device %s", + ident.identity, + ident.firmware_version, + fn); + + return update_timeout(); +} + +int watchdog_set_device(char *path) { + int r; + + r = free_and_strdup(&watchdog_device, path); + if (r < 0) + return r; + + if (r > 0) /* watchdog_device changed */ + watchdog_fd = safe_close(watchdog_fd); + + return r; +} + +int watchdog_set_timeout(usec_t *usec) { + int r; + + watchdog_timeout = *usec; + + /* If we didn't open the watchdog yet and didn't get any explicit timeout value set, don't do + * anything */ + if (watchdog_fd < 0 && watchdog_timeout == USEC_INFINITY) + return 0; + + if (watchdog_fd < 0) + r = open_watchdog(); + else + r = update_timeout(); + + *usec = watchdog_timeout; + return r; +} + +usec_t watchdog_runtime_wait(void) { + usec_t rtwait, ntime; + + if (!timestamp_is_set(watchdog_timeout)) + return USEC_INFINITY; + + /* Sleep half the watchdog timeout since the last successful ping at most */ + if (timestamp_is_set(watchdog_last_ping)) { + ntime = now(clock_boottime_or_monotonic()); + assert(ntime >= watchdog_last_ping); + rtwait = usec_sub_unsigned(watchdog_last_ping + (watchdog_timeout / 2), ntime); + } else + rtwait = watchdog_timeout / 2; + + return rtwait; +} + +int watchdog_ping(void) { + usec_t ntime; + int r; + + ntime = now(clock_boottime_or_monotonic()); + + /* Never ping earlier than watchdog_timeout/4 and try to ping + * by watchdog_timeout/2 plus scheduling latencies the latest */ + if (timestamp_is_set(watchdog_last_ping)) { + assert(ntime >= watchdog_last_ping); + if ((ntime - watchdog_last_ping) < (watchdog_timeout / 4)) + return 0; + } + + if (watchdog_fd < 0) { + r = open_watchdog(); + if (r < 0) + return r; + } + + if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0) + return log_warning_errno(errno, "Failed to ping hardware watchdog: %m"); + + watchdog_last_ping = ntime; + return 0; +} + +void watchdog_close(bool disarm) { + if (watchdog_fd < 0) + return; + + if (disarm) { + int flags; + + /* Explicitly disarm it */ + flags = WDIOS_DISABLECARD; + if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0) + log_warning_errno(errno, "Failed to disable hardware watchdog: %m"); + + /* To be sure, use magic close logic, too */ + for (;;) { + static const char v = 'V'; + + if (write(watchdog_fd, &v, 1) > 0) + break; + + if (errno != EINTR) { + log_error_errno(errno, "Failed to disarm watchdog timer: %m"); + break; + } + } + } + + watchdog_fd = safe_close(watchdog_fd); +} diff --git a/src/shared/watchdog.h b/src/shared/watchdog.h new file mode 100644 index 0000000..b7587db --- /dev/null +++ b/src/shared/watchdog.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "time-util.h" +#include "util.h" + +int watchdog_set_device(char *path); +int watchdog_set_timeout(usec_t *usec); +int watchdog_ping(void); +void watchdog_close(bool disarm); +usec_t watchdog_runtime_wait(void); + +static inline void watchdog_free_device(void) { + (void) watchdog_set_device(NULL); +} diff --git a/src/shared/web-util.c b/src/shared/web-util.c new file mode 100644 index 0000000..82cd5fb --- /dev/null +++ b/src/shared/web-util.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdbool.h> + +#include "string-util.h" +#include "strv.h" +#include "utf8.h" +#include "web-util.h" + +bool http_etag_is_valid(const char *etag) { + if (isempty(etag)) + return false; + + if (!endswith(etag, "\"")) + return false; + + if (!STARTSWITH_SET(etag, "\"", "W/\"")) + return false; + + return true; +} + +bool http_url_is_valid(const char *url) { + const char *p; + + if (isempty(url)) + return false; + + p = STARTSWITH_SET(url, "http://", "https://"); + if (!p) + return false; + + if (isempty(p)) + return false; + + return ascii_is_valid(p); +} + +bool documentation_url_is_valid(const char *url) { + const char *p; + + if (isempty(url)) + return false; + + if (http_url_is_valid(url)) + return true; + + p = STARTSWITH_SET(url, "file:/", "info:", "man:"); + if (isempty(p)) + return false; + + return ascii_is_valid(p); +} diff --git a/src/shared/web-util.h b/src/shared/web-util.h new file mode 100644 index 0000000..ec54669 --- /dev/null +++ b/src/shared/web-util.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> + +#include "macro.h" + +bool http_url_is_valid(const char *url) _pure_; + +bool documentation_url_is_valid(const char *url) _pure_; + +bool http_etag_is_valid(const char *etag); diff --git a/src/shared/wifi-util.c b/src/shared/wifi-util.c new file mode 100644 index 0000000..2ac8846 --- /dev/null +++ b/src/shared/wifi-util.c @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "log.h" +#include "wifi-util.h" + +int wifi_get_interface(sd_netlink *genl, int ifindex, enum nl80211_iftype *iftype, char **ssid) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL; + sd_genl_family family; + int r; + + assert(genl); + assert(ifindex > 0); + + r = sd_genl_message_new(genl, SD_GENL_NL80211, NL80211_CMD_GET_INTERFACE, &m); + if (r < 0) + return log_debug_errno(r, "Failed to create generic netlink message: %m"); + + r = sd_netlink_message_append_u32(m, NL80211_ATTR_IFINDEX, ifindex); + if (r < 0) + return log_debug_errno(r, "Could not append NL80211_ATTR_IFINDEX attribute: %m"); + + r = sd_netlink_call(genl, m, 0, &reply); + if (r == -ENODEV) { + /* For obsolete WEXT driver. */ + log_debug_errno(r, "Failed to request information about wifi interface %d. " + "The device doesn't seem to have nl80211 interface. Ignoring.", + ifindex); + goto nodata; + } + if (r < 0) + return log_debug_errno(r, "Failed to request information about wifi interface %d: %m", ifindex); + if (!reply) { + log_debug_errno(r, "No reply received to request for information about wifi interface %d, ignoring.", ifindex); + goto nodata; + } + + r = sd_netlink_message_get_errno(reply); + if (r < 0) + return log_debug_errno(r, "Failed to get information about wifi interface %d: %m", ifindex); + + r = sd_genl_message_get_family(genl, reply, &family); + if (r < 0) + return log_debug_errno(r, "Failed to determine genl family: %m"); + if (family != SD_GENL_NL80211) { + log_debug("Received message of unexpected genl family %u, ignoring.", family); + goto nodata; + } + + if (iftype) { + uint32_t t; + + r = sd_netlink_message_read_u32(reply, NL80211_ATTR_IFTYPE, &t); + if (r < 0) + return log_debug_errno(r, "Failed to get NL80211_ATTR_IFTYPE attribute: %m"); + *iftype = t; + } + + if (ssid) { + r = sd_netlink_message_read_string_strdup(reply, NL80211_ATTR_SSID, ssid); + if (r == -ENODATA) + *ssid = NULL; + else if (r < 0) + return log_debug_errno(r, "Failed to get NL80211_ATTR_SSID attribute: %m"); + } + + return 1; + +nodata: + if (iftype) + *iftype = 0; + if (ssid) + *ssid = NULL; + return 0; +} + +int wifi_get_station(sd_netlink *genl, int ifindex, struct ether_addr *bssid) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL; + sd_genl_family family; + int r; + + assert(genl); + assert(ifindex > 0); + assert(bssid); + + r = sd_genl_message_new(genl, SD_GENL_NL80211, NL80211_CMD_GET_STATION, &m); + if (r < 0) + return log_debug_errno(r, "Failed to create generic netlink message: %m"); + + r = sd_netlink_message_set_flags(m, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP); + if (r < 0) + return log_debug_errno(r, "Failed to set dump flag: %m"); + + r = sd_netlink_message_append_u32(m, NL80211_ATTR_IFINDEX, ifindex); + if (r < 0) + return log_debug_errno(r, "Could not append NL80211_ATTR_IFINDEX attribute: %m"); + + r = sd_netlink_call(genl, m, 0, &reply); + if (r < 0) + return log_debug_errno(r, "Failed to request information about wifi station: %m"); + if (!reply) { + log_debug_errno(r, "No reply received to request for information about wifi station, ignoring."); + goto nodata; + } + + r = sd_netlink_message_get_errno(reply); + if (r < 0) + return log_debug_errno(r, "Failed to get information about wifi station: %m"); + + r = sd_genl_message_get_family(genl, reply, &family); + if (r < 0) + return log_debug_errno(r, "Failed to determine genl family: %m"); + if (family != SD_GENL_NL80211) { + log_debug("Received message of unexpected genl family %u, ignoring.", family); + goto nodata; + } + + r = sd_netlink_message_read_ether_addr(reply, NL80211_ATTR_MAC, bssid); + if (r == -ENODATA) + goto nodata; + if (r < 0) + return log_debug_errno(r, "Failed to get NL80211_ATTR_MAC attribute: %m"); + + return 1; + +nodata: + *bssid = (struct ether_addr) {}; + return 0; +} diff --git a/src/shared/wifi-util.h b/src/shared/wifi-util.h new file mode 100644 index 0000000..0ce4137 --- /dev/null +++ b/src/shared/wifi-util.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#pragma once + +#include <linux/nl80211.h> +#include <net/ethernet.h> + +#include "sd-netlink.h" + +int wifi_get_interface(sd_netlink *genl, int ifindex, enum nl80211_iftype *iftype, char **ssid); +int wifi_get_station(sd_netlink *genl, int ifindex, struct ether_addr *bssid); diff --git a/src/shared/xml.c b/src/shared/xml.c new file mode 100644 index 0000000..8ff3fea --- /dev/null +++ b/src/shared/xml.c @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <stddef.h> + +#include "macro.h" +#include "string-util.h" +#include "xml.h" + +enum { + STATE_NULL, + STATE_TEXT, + STATE_TAG, + STATE_ATTRIBUTE, +}; + +static void inc_lines(unsigned *line, const char *s, size_t n) { + const char *p = s; + + if (!line) + return; + + for (;;) { + const char *f; + + f = memchr(p, '\n', n); + if (!f) + return; + + n -= (f - p) + 1; + p = f + 1; + (*line)++; + } +} + +/* We don't actually do real XML here. We only read a simplistic + * subset, that is a bit less strict that XML and lacks all the more + * complex features, like entities, or namespaces. However, we do + * support some HTML5-like simplifications */ + +int xml_tokenize(const char **p, char **name, void **state, unsigned *line) { + const char *c, *e, *b; + char *ret; + int t; + + assert(p); + assert(*p); + assert(name); + assert(state); + + t = PTR_TO_INT(*state); + c = *p; + + if (t == STATE_NULL) { + if (line) + *line = 1; + t = STATE_TEXT; + } + + for (;;) { + if (*c == 0) + return XML_END; + + switch (t) { + + case STATE_TEXT: { + int x; + + e = strchrnul(c, '<'); + if (e > c) { + /* More text... */ + ret = strndup(c, e - c); + if (!ret) + return -ENOMEM; + + inc_lines(line, c, e - c); + + *name = ret; + *p = e; + *state = INT_TO_PTR(STATE_TEXT); + + return XML_TEXT; + } + + assert(*e == '<'); + b = c + 1; + + if (startswith(b, "!--")) { + /* A comment */ + e = strstr(b + 3, "-->"); + if (!e) + return -EINVAL; + + inc_lines(line, b, e + 3 - b); + + c = e + 3; + continue; + } + + if (*b == '?') { + /* Processing instruction */ + + e = strstr(b + 1, "?>"); + if (!e) + return -EINVAL; + + inc_lines(line, b, e + 2 - b); + + c = e + 2; + continue; + } + + if (*b == '!') { + /* DTD */ + + e = strchr(b + 1, '>'); + if (!e) + return -EINVAL; + + inc_lines(line, b, e + 1 - b); + + c = e + 1; + continue; + } + + if (*b == '/') { + /* A closing tag */ + x = XML_TAG_CLOSE; + b++; + } else + x = XML_TAG_OPEN; + + e = strpbrk(b, WHITESPACE "/>"); + if (!e) + return -EINVAL; + + ret = strndup(b, e - b); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = e; + *state = INT_TO_PTR(STATE_TAG); + + return x; + } + + case STATE_TAG: + + b = c + strspn(c, WHITESPACE); + if (*b == 0) + return -EINVAL; + + inc_lines(line, c, b - c); + + e = b + strcspn(b, WHITESPACE "=/>"); + if (e > b) { + /* An attribute */ + + ret = strndup(b, e - b); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = e; + *state = INT_TO_PTR(STATE_ATTRIBUTE); + + return XML_ATTRIBUTE_NAME; + } + + if (startswith(b, "/>")) { + /* An empty tag */ + + *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */ + *p = b + 2; + *state = INT_TO_PTR(STATE_TEXT); + + return XML_TAG_CLOSE_EMPTY; + } + + if (*b != '>') + return -EINVAL; + + c = b + 1; + t = STATE_TEXT; + continue; + + case STATE_ATTRIBUTE: + + if (*c == '=') { + c++; + + if (IN_SET(*c, '\'', '"')) { + /* Tag with a quoted value */ + + e = strchr(c+1, *c); + if (!e) + return -EINVAL; + + inc_lines(line, c, e - c); + + ret = strndup(c+1, e - c - 1); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = e + 1; + *state = INT_TO_PTR(STATE_TAG); + + return XML_ATTRIBUTE_VALUE; + + } + + /* Tag with a value without quotes */ + + b = strpbrk(c, WHITESPACE ">"); + if (!b) + b = c; + + ret = strndup(c, b - c); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = b; + *state = INT_TO_PTR(STATE_TAG); + return XML_ATTRIBUTE_VALUE; + } + + t = STATE_TAG; + continue; + } + + } + + assert_not_reached("Bad state"); +} diff --git a/src/shared/xml.h b/src/shared/xml.h new file mode 100644 index 0000000..217b3b0 --- /dev/null +++ b/src/shared/xml.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +enum { + XML_END, + XML_TEXT, + XML_TAG_OPEN, + XML_TAG_CLOSE, + XML_TAG_CLOSE_EMPTY, + XML_ATTRIBUTE_NAME, + XML_ATTRIBUTE_VALUE, +}; + +int xml_tokenize(const char **p, char **name, void **state, unsigned *line); |