summaryrefslogtreecommitdiffstats
path: root/src/shared
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shared/acl-util.c406
-rw-r--r--src/shared/acl-util.h30
-rw-r--r--src/shared/acpi-fpdt.c147
-rw-r--r--src/shared/acpi-fpdt.h6
-rw-r--r--src/shared/apparmor-util.c22
-rw-r--r--src/shared/apparmor-util.h6
-rw-r--r--src/shared/ask-password-api.c818
-rw-r--r--src/shared/ask-password-api.h20
-rw-r--r--src/shared/barrier.c394
-rw-r--r--src/shared/barrier.h74
-rw-r--r--src/shared/base-filesystem.c117
-rw-r--r--src/shared/base-filesystem.h6
-rw-r--r--src/shared/bitmap.c218
-rw-r--r--src/shared/bitmap.h31
-rw-r--r--src/shared/blkid-util.h10
-rw-r--r--src/shared/boot-timestamps.c46
-rw-r--r--src/shared/boot-timestamps.h6
-rw-r--r--src/shared/bootspec.c665
-rw-r--r--src/shared/bootspec.h51
-rw-r--r--src/shared/bpf-program.c237
-rw-r--r--src/shared/bpf-program.h42
-rw-r--r--src/shared/bus-unit-util.c2547
-rw-r--r--src/shared/bus-unit-util.h42
-rw-r--r--src/shared/bus-util.c1753
-rw-r--r--src/shared/bus-util.h179
-rw-r--r--src/shared/calendarspec.c1370
-rw-r--r--src/shared/calendarspec.h46
-rw-r--r--src/shared/cgroup-show.c354
-rw-r--r--src/shared/cgroup-show.h24
-rw-r--r--src/shared/clean-ipc.c453
-rw-r--r--src/shared/clean-ipc.h17
-rw-r--r--src/shared/clock-util.c157
-rw-r--r--src/shared/clock-util.h11
-rw-r--r--src/shared/condition.c733
-rw-r--r--src/shared/condition.h96
-rw-r--r--src/shared/conf-parser.c1113
-rw-r--r--src/shared/conf-parser.h289
-rw-r--r--src/shared/cpu-set-util.c99
-rw-r--r--src/shared/cpu-set-util.h35
-rw-r--r--src/shared/crypt-util.c28
-rw-r--r--src/shared/crypt-util.h17
-rw-r--r--src/shared/daemon-util.h22
-rw-r--r--src/shared/dev-setup.c115
-rw-r--r--src/shared/dev-setup.h8
-rw-r--r--src/shared/dissect-image.c1507
-rw-r--r--src/shared/dissect-image.h96
-rw-r--r--src/shared/dns-domain.c1375
-rw-r--r--src/shared/dns-domain.h112
-rw-r--r--src/shared/dropin.c255
-rw-r--r--src/shared/dropin.h39
-rw-r--r--src/shared/efivars.c914
-rw-r--r--src/shared/efivars.h141
-rw-r--r--src/shared/enable-mempool.c5
-rw-r--r--src/shared/env-file-label.c21
-rw-r--r--src/shared/env-file-label.h8
-rw-r--r--src/shared/exec-util.c353
-rw-r--r--src/shared/exec-util.h25
-rw-r--r--src/shared/exit-status.c284
-rw-r--r--src/shared/exit-status.h99
-rw-r--r--src/shared/fdset.c255
-rw-r--r--src/shared/fdset.h40
-rw-r--r--src/shared/fileio-label.c37
-rw-r--r--src/shared/fileio-label.h15
-rw-r--r--src/shared/firewall-util.c350
-rw-r--r--src/shared/firewall-util.h65
-rw-r--r--src/shared/format-table.c1625
-rw-r--r--src/shared/format-table.h78
-rw-r--r--src/shared/fstab-util.c268
-rw-r--r--src/shared/fstab-util.h35
-rwxr-xr-xsrc/shared/generate-ip-protocol-list.sh6
-rw-r--r--src/shared/generator.c504
-rw-r--r--src/shared/generator.h67
-rw-r--r--src/shared/gpt.h64
-rw-r--r--src/shared/id128-print.c65
-rw-r--r--src/shared/id128-print.h10
-rw-r--r--src/shared/ima-util.c15
-rw-r--r--src/shared/ima-util.h6
-rw-r--r--src/shared/import-util.c166
-rw-r--r--src/shared/import-util.h25
-rw-r--r--src/shared/initreq.h73
-rw-r--r--src/shared/install-printf.c148
-rw-r--r--src/shared/install-printf.h6
-rw-r--r--src/shared/install.c3383
-rw-r--r--src/shared/install.h230
-rw-r--r--src/shared/ip-protocol-list.c67
-rw-r--r--src/shared/ip-protocol-list.h6
-rw-r--r--src/shared/ip-protocol-to-name.awk9
-rw-r--r--src/shared/journal-importer.c504
-rw-r--r--src/shared/journal-importer.h64
-rw-r--r--src/shared/journal-util.c174
-rw-r--r--src/shared/journal-util.h11
-rw-r--r--src/shared/json-internal.h63
-rw-r--r--src/shared/json.c3480
-rw-r--r--src/shared/json.h285
-rw-r--r--src/shared/libshared.sym3
-rw-r--r--src/shared/linux-3.13/dm-ioctl.h355
-rw-r--r--src/shared/linux/auto_dev-ioctl.h229
-rw-r--r--src/shared/linux/bpf.h1109
-rw-r--r--src/shared/linux/bpf_common.h55
-rw-r--r--src/shared/linux/libbpf.h207
-rw-r--r--src/shared/lockfile-util.c137
-rw-r--r--src/shared/lockfile-util.h14
-rw-r--r--src/shared/logs-show.c1462
-rw-r--r--src/shared/logs-show.h63
-rw-r--r--src/shared/loop-util.c146
-rw-r--r--src/shared/loop-util.h23
-rw-r--r--src/shared/machine-image.c1249
-rw-r--r--src/shared/machine-image.h112
-rw-r--r--src/shared/machine-pool.c46
-rw-r--r--src/shared/machine-pool.h8
-rw-r--r--src/shared/main-func.h34
-rw-r--r--src/shared/meson.build277
-rw-r--r--src/shared/module-util.c72
-rw-r--r--src/shared/module-util.h12
-rw-r--r--src/shared/mount-util.c570
-rw-r--r--src/shared/mount-util.h34
-rw-r--r--src/shared/nscd-flush.c151
-rw-r--r--src/shared/nscd-flush.h4
-rw-r--r--src/shared/nsflags.c77
-rw-r--r--src/shared/nsflags.h29
-rw-r--r--src/shared/os-util.c117
-rw-r--r--src/shared/os-util.h12
-rw-r--r--src/shared/output-mode.c42
-rw-r--r--src/shared/output-mode.h50
-rw-r--r--src/shared/pager.c292
-rw-r--r--src/shared/pager.h17
-rw-r--r--src/shared/path-lookup.c903
-rw-r--r--src/shared/path-lookup.h74
-rw-r--r--src/shared/pretty-print.c247
-rw-r--r--src/shared/pretty-print.h17
-rw-r--r--src/shared/ptyfwd.c631
-rw-r--r--src/shared/ptyfwd.h42
-rw-r--r--src/shared/reboot-util.c83
-rw-r--r--src/shared/reboot-util.h12
-rw-r--r--src/shared/resolve-util.c29
-rw-r--r--src/shared/resolve-util.h60
-rw-r--r--src/shared/seccomp-util.c1764
-rw-r--r--src/shared/seccomp-util.h98
-rw-r--r--src/shared/securebits-util.c66
-rw-r--r--src/shared/securebits-util.h18
-rw-r--r--src/shared/serialize.c214
-rw-r--r--src/shared/serialize.h25
-rw-r--r--src/shared/sleep-config.c436
-rw-r--r--src/shared/sleep-config.h13
-rw-r--r--src/shared/spawn-ask-password-agent.c48
-rw-r--r--src/shared/spawn-ask-password-agent.h5
-rw-r--r--src/shared/spawn-polkit-agent.c85
-rw-r--r--src/shared/spawn-polkit-agent.h22
-rw-r--r--src/shared/specifier.c299
-rw-r--r--src/shared/specifier.h37
-rw-r--r--src/shared/switch-root.c130
-rw-r--r--src/shared/switch-root.h6
-rw-r--r--src/shared/sysctl-util.c71
-rw-r--r--src/shared/sysctl-util.h7
-rw-r--r--src/shared/test-tables.h44
-rw-r--r--src/shared/tests.c151
-rw-r--r--src/shared/tests.h14
-rw-r--r--src/shared/tmpfile-util-label.c26
-rw-r--r--src/shared/tmpfile-util-label.h10
-rw-r--r--src/shared/tomoyo-util.c15
-rw-r--r--src/shared/tomoyo-util.h6
-rw-r--r--src/shared/udev-util.c171
-rw-r--r--src/shared/udev-util.h29
-rw-r--r--src/shared/uid-range.c186
-rw-r--r--src/shared/uid-range.h15
-rw-r--r--src/shared/utmp-wtmp.c427
-rw-r--r--src/shared/utmp-wtmp.h56
-rw-r--r--src/shared/verbs.c128
-rw-r--r--src/shared/verbs.h23
-rw-r--r--src/shared/vlan-util.c82
-rw-r--r--src/shared/vlan-util.h20
-rw-r--r--src/shared/volatile-util.c44
-rw-r--r--src/shared/volatile-util.h15
-rw-r--r--src/shared/watchdog.c154
-rw-r--r--src/shared/watchdog.h16
-rw-r--r--src/shared/web-util.c53
-rw-r--r--src/shared/web-util.h12
-rw-r--r--src/shared/wireguard-netlink.h179
-rw-r--r--src/shared/xml.c238
-rw-r--r--src/shared/xml.h14
180 files changed, 44871 insertions, 0 deletions
diff --git a/src/shared/acl-util.c b/src/shared/acl-util.c
new file mode 100644
index 0000000..9633514
--- /dev/null
+++ b/src/shared/acl-util.c
@@ -0,0 +1,406 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdbool.h>
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry) {
+ acl_entry_t i;
+ int r;
+
+ assert(acl);
+ assert(entry);
+
+ for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) {
+
+ acl_tag_t tag;
+ uid_t *u;
+ bool b;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag != ACL_USER)
+ continue;
+
+ u = acl_get_qualifier(i);
+ if (!u)
+ return -errno;
+
+ b = *u == uid;
+ acl_free(u);
+
+ if (b) {
+ *entry = i;
+ return 1;
+ }
+ }
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int calc_acl_mask_if_needed(acl_t *acl_p) {
+ acl_entry_t i;
+ int r;
+ bool need = false;
+
+ assert(acl_p);
+
+ for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag == ACL_MASK)
+ return 0;
+
+ if (IN_SET(tag, ACL_USER, ACL_GROUP))
+ need = true;
+ }
+ if (r < 0)
+ return -errno;
+
+ if (need && acl_calc_mask(acl_p) < 0)
+ return -errno;
+
+ return need;
+}
+
+int add_base_acls_if_needed(acl_t *acl_p, const char *path) {
+ acl_entry_t i;
+ int r;
+ bool have_user_obj = false, have_group_obj = false, have_other = false;
+ struct stat st;
+ _cleanup_(acl_freep) acl_t basic = NULL;
+
+ assert(acl_p);
+
+ for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag == ACL_USER_OBJ)
+ have_user_obj = true;
+ else if (tag == ACL_GROUP_OBJ)
+ have_group_obj = true;
+ else if (tag == ACL_OTHER)
+ have_other = true;
+ if (have_user_obj && have_group_obj && have_other)
+ return 0;
+ }
+ if (r < 0)
+ return -errno;
+
+ r = stat(path, &st);
+ if (r < 0)
+ return -errno;
+
+ basic = acl_from_mode(st.st_mode);
+ if (!basic)
+ return -errno;
+
+ for (r = acl_get_entry(basic, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(basic, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+ acl_entry_t dst;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if ((tag == ACL_USER_OBJ && have_user_obj) ||
+ (tag == ACL_GROUP_OBJ && have_group_obj) ||
+ (tag == ACL_OTHER && have_other))
+ continue;
+
+ r = acl_create_entry(acl_p, &dst);
+ if (r < 0)
+ return -errno;
+
+ r = acl_copy_entry(dst, i);
+ if (r < 0)
+ return -errno;
+ }
+ if (r < 0)
+ return -errno;
+ return 0;
+}
+
+int acl_search_groups(const char *path, char ***ret_groups) {
+ _cleanup_strv_free_ char **g = NULL;
+ _cleanup_(acl_freep) acl_t acl = NULL;
+ bool ret = false;
+ acl_entry_t entry;
+ int r;
+
+ assert(path);
+
+ acl = acl_get_file(path, ACL_TYPE_DEFAULT);
+ if (!acl)
+ return -errno;
+
+ r = acl_get_entry(acl, ACL_FIRST_ENTRY, &entry);
+ for (;;) {
+ _cleanup_(acl_free_gid_tpp) gid_t *gid = NULL;
+ acl_tag_t tag;
+
+ if (r < 0)
+ return -errno;
+ if (r == 0)
+ break;
+
+ if (acl_get_tag_type(entry, &tag) < 0)
+ return -errno;
+
+ if (tag != ACL_GROUP)
+ goto next;
+
+ gid = acl_get_qualifier(entry);
+ if (!gid)
+ return -errno;
+
+ if (in_gid(*gid) > 0) {
+ if (!ret_groups)
+ return true;
+
+ ret = true;
+ }
+
+ if (ret_groups) {
+ char *name;
+
+ name = gid_to_name(*gid);
+ if (!name)
+ return -ENOMEM;
+
+ r = strv_consume(&g, name);
+ if (r < 0)
+ return r;
+ }
+
+ next:
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &entry);
+ }
+
+ if (ret_groups)
+ *ret_groups = TAKE_PTR(g);
+
+ return ret;
+}
+
+int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask) {
+ _cleanup_free_ char **a = NULL, **d = NULL; /* strings are not freed */
+ _cleanup_strv_free_ char **split;
+ char **entry;
+ int r = -EINVAL;
+ _cleanup_(acl_freep) acl_t a_acl = NULL, d_acl = NULL;
+
+ split = strv_split(text, ",");
+ if (!split)
+ return -ENOMEM;
+
+ STRV_FOREACH(entry, split) {
+ char *p;
+
+ p = STARTSWITH_SET(*entry, "default:", "d:");
+ if (p)
+ r = strv_push(&d, p);
+ else
+ r = strv_push(&a, *entry);
+ if (r < 0)
+ return r;
+ }
+
+ if (!strv_isempty(a)) {
+ _cleanup_free_ char *join;
+
+ join = strv_join(a, ",");
+ if (!join)
+ return -ENOMEM;
+
+ a_acl = acl_from_text(join);
+ if (!a_acl)
+ return -errno;
+
+ if (want_mask) {
+ r = calc_acl_mask_if_needed(&a_acl);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!strv_isempty(d)) {
+ _cleanup_free_ char *join;
+
+ join = strv_join(d, ",");
+ if (!join)
+ return -ENOMEM;
+
+ d_acl = acl_from_text(join);
+ if (!d_acl)
+ return -errno;
+
+ if (want_mask) {
+ r = calc_acl_mask_if_needed(&d_acl);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *acl_access = TAKE_PTR(a_acl);
+ *acl_default = TAKE_PTR(d_acl);
+
+ return 0;
+}
+
+static int acl_entry_equal(acl_entry_t a, acl_entry_t b) {
+ acl_tag_t tag_a, tag_b;
+
+ if (acl_get_tag_type(a, &tag_a) < 0)
+ return -errno;
+
+ if (acl_get_tag_type(b, &tag_b) < 0)
+ return -errno;
+
+ if (tag_a != tag_b)
+ return false;
+
+ switch (tag_a) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ /* can have only one of those */
+ return true;
+ case ACL_USER: {
+ _cleanup_(acl_free_uid_tpp) uid_t *uid_a = NULL, *uid_b = NULL;
+
+ uid_a = acl_get_qualifier(a);
+ if (!uid_a)
+ return -errno;
+
+ uid_b = acl_get_qualifier(b);
+ if (!uid_b)
+ return -errno;
+
+ return *uid_a == *uid_b;
+ }
+ case ACL_GROUP: {
+ _cleanup_(acl_free_gid_tpp) gid_t *gid_a = NULL, *gid_b = NULL;
+
+ gid_a = acl_get_qualifier(a);
+ if (!gid_a)
+ return -errno;
+
+ gid_b = acl_get_qualifier(b);
+ if (!gid_b)
+ return -errno;
+
+ return *gid_a == *gid_b;
+ }
+ default:
+ assert_not_reached("Unknown acl tag type");
+ }
+}
+
+static int find_acl_entry(acl_t acl, acl_entry_t entry, acl_entry_t *out) {
+ acl_entry_t i;
+ int r;
+
+ for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) {
+
+ r = acl_entry_equal(i, entry);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *out = i;
+ return 1;
+ }
+ }
+ if (r < 0)
+ return -errno;
+ return 0;
+}
+
+int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl) {
+ _cleanup_(acl_freep) acl_t old;
+ acl_entry_t i;
+ int r;
+
+ old = acl_get_file(path, type);
+ if (!old)
+ return -errno;
+
+ for (r = acl_get_entry(new, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(new, ACL_NEXT_ENTRY, &i)) {
+
+ acl_entry_t j;
+
+ r = find_acl_entry(old, i, &j);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ if (acl_create_entry(&old, &j) < 0)
+ return -errno;
+
+ if (acl_copy_entry(j, i) < 0)
+ return -errno;
+ }
+ if (r < 0)
+ return -errno;
+
+ *acl = TAKE_PTR(old);
+
+ return 0;
+}
+
+int add_acls_for_user(int fd, uid_t uid) {
+ _cleanup_(acl_freep) acl_t acl = NULL;
+ acl_entry_t entry;
+ acl_permset_t permset;
+ int r;
+
+ acl = acl_get_fd(fd);
+ if (!acl)
+ return -errno;
+
+ r = acl_find_uid(acl, uid, &entry);
+ if (r <= 0) {
+ if (acl_create_entry(&acl, &entry) < 0 ||
+ acl_set_tag_type(entry, ACL_USER) < 0 ||
+ acl_set_qualifier(entry, &uid) < 0)
+ return -errno;
+ }
+
+ /* We do not recalculate the mask unconditionally here,
+ * so that the fchmod() mask above stays intact. */
+ if (acl_get_permset(entry, &permset) < 0 ||
+ acl_add_perm(permset, ACL_READ) < 0)
+ return -errno;
+
+ r = calc_acl_mask_if_needed(&acl);
+ if (r < 0)
+ return r;
+
+ return acl_set_fd(fd, acl);
+}
diff --git a/src/shared/acl-util.h b/src/shared/acl-util.h
new file mode 100644
index 0000000..10b2a3d
--- /dev/null
+++ b/src/shared/acl-util.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if HAVE_ACL
+
+#include <acl/libacl.h>
+#include <stdbool.h>
+#include <sys/acl.h>
+
+#include "macro.h"
+
+int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry);
+int calc_acl_mask_if_needed(acl_t *acl_p);
+int add_base_acls_if_needed(acl_t *acl_p, const char *path);
+int acl_search_groups(const char* path, char ***ret_groups);
+int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask);
+int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl);
+int add_acls_for_user(int fd, uid_t uid);
+
+/* acl_free takes multiple argument types.
+ * Multiple cleanup functions are necessary. */
+DEFINE_TRIVIAL_CLEANUP_FUNC(acl_t, acl_free);
+#define acl_free_charp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, acl_free_charp);
+#define acl_free_uid_tp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(uid_t*, acl_free_uid_tp);
+#define acl_free_gid_tp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(gid_t*, acl_free_gid_tp);
+
+#endif
diff --git a/src/shared/acpi-fpdt.c b/src/shared/acpi-fpdt.c
new file mode 100644
index 0000000..d565ebd
--- /dev/null
+++ b/src/shared/acpi-fpdt.c
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "acpi-fpdt.h"
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "time-util.h"
+
+struct acpi_table_header {
+ char signature[4];
+ uint32_t length;
+ uint8_t revision;
+ uint8_t checksum;
+ char oem_id[6];
+ char oem_table_id[8];
+ uint32_t oem_revision;
+ char asl_compiler_id[4];
+ uint32_t asl_compiler_revision;
+};
+
+enum {
+ ACPI_FPDT_TYPE_BOOT = 0,
+ ACPI_FPDT_TYPE_S3PERF = 1,
+};
+
+struct acpi_fpdt_header {
+ uint16_t type;
+ uint8_t length;
+ uint8_t revision;
+ uint8_t reserved[4];
+ uint64_t ptr;
+};
+
+struct acpi_fpdt_boot_header {
+ char signature[4];
+ uint32_t length;
+};
+
+enum {
+ ACPI_FPDT_S3PERF_RESUME_REC = 0,
+ ACPI_FPDT_S3PERF_SUSPEND_REC = 1,
+ ACPI_FPDT_BOOT_REC = 2,
+};
+
+struct acpi_fpdt_boot {
+ uint16_t type;
+ uint8_t length;
+ uint8_t revision;
+ uint8_t reserved[4];
+ uint64_t reset_end;
+ uint64_t load_start;
+ uint64_t startup_start;
+ uint64_t exit_services_entry;
+ uint64_t exit_services_exit;
+};
+
+int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit) {
+ _cleanup_free_ char *buf = NULL;
+ struct acpi_table_header *tbl;
+ size_t l = 0;
+ struct acpi_fpdt_header *rec;
+ int r;
+ uint64_t ptr = 0;
+ _cleanup_close_ int fd = -1;
+ struct acpi_fpdt_boot_header hbrec;
+ struct acpi_fpdt_boot brec;
+
+ r = read_full_file("/sys/firmware/acpi/tables/FPDT", &buf, &l);
+ if (r < 0)
+ return r;
+
+ if (l < sizeof(struct acpi_table_header) + sizeof(struct acpi_fpdt_header))
+ return -EINVAL;
+
+ tbl = (struct acpi_table_header *)buf;
+ if (l != tbl->length)
+ return -EINVAL;
+
+ if (memcmp(tbl->signature, "FPDT", 4) != 0)
+ return -EINVAL;
+
+ /* find Firmware Basic Boot Performance Pointer Record */
+ for (rec = (struct acpi_fpdt_header *)(buf + sizeof(struct acpi_table_header));
+ (char *)rec < buf + l;
+ rec = (struct acpi_fpdt_header *)((char *)rec + rec->length)) {
+ if (rec->length <= 0)
+ break;
+ if (rec->type != ACPI_FPDT_TYPE_BOOT)
+ continue;
+ if (rec->length != sizeof(struct acpi_fpdt_header))
+ continue;
+
+ ptr = rec->ptr;
+ break;
+ }
+
+ if (ptr == 0)
+ return -ENODATA;
+
+ /* read Firmware Basic Boot Performance Data Record */
+ fd = open("/dev/mem", O_CLOEXEC|O_RDONLY);
+ if (fd < 0)
+ return -errno;
+
+ l = pread(fd, &hbrec, sizeof(struct acpi_fpdt_boot_header), ptr);
+ if (l != sizeof(struct acpi_fpdt_boot_header))
+ return -EINVAL;
+
+ if (memcmp(hbrec.signature, "FBPT", 4) != 0)
+ return -EINVAL;
+
+ if (hbrec.length < sizeof(struct acpi_fpdt_boot_header) + sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ l = pread(fd, &brec, sizeof(struct acpi_fpdt_boot), ptr + sizeof(struct acpi_fpdt_boot_header));
+ if (l != sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ if (brec.length != sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ if (brec.type != ACPI_FPDT_BOOT_REC)
+ return -EINVAL;
+
+ if (brec.exit_services_exit == 0)
+ /* Non-UEFI compatible boot. */
+ return -ENODATA;
+
+ if (brec.startup_start == 0 || brec.exit_services_exit < brec.startup_start)
+ return -EINVAL;
+ if (brec.exit_services_exit > NSEC_PER_HOUR)
+ return -EINVAL;
+
+ if (loader_start)
+ *loader_start = brec.startup_start / 1000;
+ if (loader_exit)
+ *loader_exit = brec.exit_services_exit / 1000;
+
+ return 0;
+}
diff --git a/src/shared/acpi-fpdt.h b/src/shared/acpi-fpdt.h
new file mode 100644
index 0000000..8d28893
--- /dev/null
+++ b/src/shared/acpi-fpdt.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <time-util.h>
+
+int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit);
diff --git a/src/shared/apparmor-util.c b/src/shared/apparmor-util.c
new file mode 100644
index 0000000..c4a4b04
--- /dev/null
+++ b/src/shared/apparmor-util.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stddef.h>
+
+#include "alloc-util.h"
+#include "apparmor-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+
+bool mac_apparmor_use(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ cached_use =
+ read_one_line_file("/sys/module/apparmor/parameters/enabled", &p) >= 0 &&
+ parse_boolean(p) > 0;
+ }
+
+ return cached_use;
+}
diff --git a/src/shared/apparmor-util.h b/src/shared/apparmor-util.h
new file mode 100644
index 0000000..7fbaf90
--- /dev/null
+++ b/src/shared/apparmor-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+bool mac_apparmor_use(void);
diff --git a/src/shared/ask-password-api.c b/src/shared/ask-password-api.c
new file mode 100644
index 0000000..072bf72
--- /dev/null
+++ b/src/shared/ask-password-api.c
@@ -0,0 +1,818 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "utf8.h"
+#include "util.h"
+
+#define KEYRING_TIMEOUT_USEC ((5 * USEC_PER_MINUTE) / 2)
+
+static int lookup_key(const char *keyname, key_serial_t *ret) {
+ key_serial_t serial;
+
+ assert(keyname);
+ assert(ret);
+
+ serial = request_key("user", keyname, NULL, 0);
+ if (serial == -1)
+ return negative_errno();
+
+ *ret = serial;
+ return 0;
+}
+
+static int retrieve_key(key_serial_t serial, char ***ret) {
+ _cleanup_free_ char *p = NULL;
+ long m = 100, n;
+ char **l;
+
+ assert(ret);
+
+ for (;;) {
+ p = new(char, m);
+ if (!p)
+ return -ENOMEM;
+
+ n = keyctl(KEYCTL_READ, (unsigned long) serial, (unsigned long) p, (unsigned long) m, 0);
+ if (n < 0)
+ return -errno;
+
+ if (n < m)
+ break;
+
+ explicit_bzero_safe(p, n);
+ free(p);
+ m *= 2;
+ }
+
+ l = strv_parse_nulstr(p, n);
+ if (!l)
+ return -ENOMEM;
+
+ explicit_bzero_safe(p, n);
+
+ *ret = l;
+ return 0;
+}
+
+static int add_to_keyring(const char *keyname, AskPasswordFlags flags, char **passwords) {
+ _cleanup_strv_free_erase_ char **l = NULL;
+ _cleanup_free_ char *p = NULL;
+ key_serial_t serial;
+ size_t n;
+ int r;
+
+ assert(keyname);
+ assert(passwords);
+
+ if (!(flags & ASK_PASSWORD_PUSH_CACHE))
+ return 0;
+
+ r = lookup_key(keyname, &serial);
+ if (r >= 0) {
+ r = retrieve_key(serial, &l);
+ if (r < 0)
+ return r;
+ } else if (r != -ENOKEY)
+ return r;
+
+ r = strv_extend_strv(&l, passwords, true);
+ if (r <= 0)
+ return r;
+
+ r = strv_make_nulstr(l, &p, &n);
+ if (r < 0)
+ return r;
+
+ serial = add_key("user", keyname, p, n, KEY_SPEC_USER_KEYRING);
+ explicit_bzero_safe(p, n);
+ if (serial == -1)
+ return -errno;
+
+ if (keyctl(KEYCTL_SET_TIMEOUT,
+ (unsigned long) serial,
+ (unsigned long) DIV_ROUND_UP(KEYRING_TIMEOUT_USEC, USEC_PER_SEC), 0, 0) < 0)
+ log_debug_errno(errno, "Failed to adjust timeout: %m");
+
+ /* Tell everyone to check the keyring */
+ (void) touch("/run/systemd/ask-password");
+
+ log_debug("Added key to keyring as %" PRIi32 ".", serial);
+
+ return 1;
+}
+
+static int add_to_keyring_and_log(const char *keyname, AskPasswordFlags flags, char **passwords) {
+ int r;
+
+ assert(keyname);
+ assert(passwords);
+
+ r = add_to_keyring(keyname, flags, passwords);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add password to keyring: %m");
+
+ return 0;
+}
+
+static int ask_password_keyring(const char *keyname, AskPasswordFlags flags, char ***ret) {
+
+ key_serial_t serial;
+ int r;
+
+ assert(keyname);
+ assert(ret);
+
+ if (!(flags & ASK_PASSWORD_ACCEPT_CACHED))
+ return -EUNATCH;
+
+ r = lookup_key(keyname, &serial);
+ if (r == -ENOSYS) /* when retrieving the distinction doesn't matter */
+ return -ENOKEY;
+ if (r < 0)
+ return r;
+
+ return retrieve_key(serial, ret);
+}
+
+static void backspace_chars(int ttyfd, size_t p) {
+
+ if (ttyfd < 0)
+ return;
+
+ while (p > 0) {
+ p--;
+
+ loop_write(ttyfd, "\b \b", 3, false);
+ }
+}
+
+static void backspace_string(int ttyfd, const char *str) {
+ size_t m;
+
+ assert(str);
+
+ if (ttyfd < 0)
+ return;
+
+ /* Backspaces through enough characters to entirely undo printing of the specified string. */
+
+ m = utf8_n_codepoints(str);
+ if (m == (size_t) -1)
+ m = strlen(str); /* Not a valid UTF-8 string? If so, let's backspace the number of bytes output. Most
+ * likely this happened because we are not in an UTF-8 locale, and in that case that
+ * is the correct thing to do. And even if it's not, terminals tend to stop
+ * backspacing at the leftmost column, hence backspacing too much should be mostly
+ * OK. */
+
+ backspace_chars(ttyfd, m);
+}
+
+int ask_password_tty(
+ int ttyfd,
+ const char *message,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ const char *flag_file,
+ char ***ret) {
+
+ enum {
+ POLL_TTY,
+ POLL_INOTIFY,
+ _POLL_MAX,
+ };
+
+ bool reset_tty = false, dirty = false, use_color = false;
+ _cleanup_close_ int cttyfd = -1, notify = -1;
+ struct termios old_termios, new_termios;
+ char passphrase[LINE_MAX + 1] = {}, *x;
+ _cleanup_strv_free_erase_ char **l = NULL;
+ struct pollfd pollfd[_POLL_MAX];
+ size_t p = 0, codepoint = 0;
+ int r;
+
+ assert(ret);
+
+ if (flags & ASK_PASSWORD_NO_TTY)
+ return -EUNATCH;
+
+ if (!message)
+ message = "Password:";
+
+ if (flag_file || ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname)) {
+ notify = inotify_init1(IN_CLOEXEC|IN_NONBLOCK);
+ if (notify < 0)
+ return -errno;
+ }
+ if (flag_file) {
+ if (inotify_add_watch(notify, flag_file, IN_ATTRIB /* for the link count */) < 0)
+ return -errno;
+ }
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0)
+ return 0;
+ else if (r != -ENOKEY)
+ return r;
+
+ if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0)
+ return -errno;
+ }
+
+ /* If the caller didn't specify a TTY, then use the controlling tty, if we can. */
+ if (ttyfd < 0)
+ ttyfd = cttyfd = open("/dev/tty", O_RDWR|O_NOCTTY|O_CLOEXEC);
+
+ if (ttyfd >= 0) {
+ if (tcgetattr(ttyfd, &old_termios) < 0)
+ return -errno;
+
+ if (flags & ASK_PASSWORD_CONSOLE_COLOR)
+ use_color = dev_console_colors_enabled();
+ else
+ use_color = colors_enabled();
+
+ if (use_color)
+ (void) loop_write(ttyfd, ANSI_HIGHLIGHT, STRLEN(ANSI_HIGHLIGHT), false);
+
+ (void) loop_write(ttyfd, message, strlen(message), false);
+ (void) loop_write(ttyfd, " ", 1, false);
+
+ if (use_color)
+ (void) loop_write(ttyfd, ANSI_NORMAL, STRLEN(ANSI_NORMAL), false);
+
+ new_termios = old_termios;
+ new_termios.c_lflag &= ~(ICANON|ECHO);
+ new_termios.c_cc[VMIN] = 1;
+ new_termios.c_cc[VTIME] = 0;
+
+ if (tcsetattr(ttyfd, TCSADRAIN, &new_termios) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ reset_tty = true;
+ }
+
+ pollfd[POLL_TTY] = (struct pollfd) {
+ .fd = ttyfd >= 0 ? ttyfd : STDIN_FILENO,
+ .events = POLLIN,
+ };
+ pollfd[POLL_INOTIFY] = (struct pollfd) {
+ .fd = notify,
+ .events = POLLIN,
+ };
+
+ for (;;) {
+ int sleep_for = -1, k;
+ ssize_t n;
+ char c;
+
+ if (until > 0) {
+ usec_t y;
+
+ y = now(CLOCK_MONOTONIC);
+
+ if (y > until) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ sleep_for = (int) DIV_ROUND_UP(until - y, USEC_PER_MSEC);
+ }
+
+ if (flag_file)
+ if (access(flag_file, F_OK) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ k = poll(pollfd, notify >= 0 ? 2 : 1, sleep_for);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ r = -errno;
+ goto finish;
+ } else if (k == 0) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ if (notify >= 0 && pollfd[POLL_INOTIFY].revents != 0 && keyname) {
+ (void) flush_fd(notify);
+
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+ }
+
+ if (pollfd[POLL_TTY].revents == 0)
+ continue;
+
+ n = read(ttyfd >= 0 ? ttyfd : STDIN_FILENO, &c, 1);
+ if (n < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ continue;
+
+ r = -errno;
+ goto finish;
+
+ }
+
+ /* We treat EOF, newline and NUL byte all as valid end markers */
+ if (n == 0 || c == '\n' || c == 0)
+ break;
+
+ if (c == 21) { /* C-u */
+
+ if (!(flags & ASK_PASSWORD_SILENT))
+ backspace_string(ttyfd, passphrase);
+
+ explicit_bzero_safe(passphrase, sizeof(passphrase));
+ p = codepoint = 0;
+
+ } else if (IN_SET(c, '\b', 127)) {
+
+ if (p > 0) {
+ size_t q;
+
+ if (!(flags & ASK_PASSWORD_SILENT))
+ backspace_chars(ttyfd, 1);
+
+ /* Remove a full UTF-8 codepoint from the end. For that, figure out where the last one
+ * begins */
+ q = 0;
+ for (;;) {
+ size_t z;
+
+ z = utf8_encoded_valid_unichar(passphrase + q);
+ if (z == 0) {
+ q = (size_t) -1; /* Invalid UTF8! */
+ break;
+ }
+
+ if (q + z >= p) /* This one brings us over the edge */
+ break;
+
+ q += z;
+ }
+
+ p = codepoint = q == (size_t) -1 ? p - 1 : q;
+ explicit_bzero_safe(passphrase + p, sizeof(passphrase) - p);
+
+ } else if (!dirty && !(flags & ASK_PASSWORD_SILENT)) {
+
+ flags |= ASK_PASSWORD_SILENT;
+
+ /* There are two ways to enter silent mode. Either by pressing backspace as first key
+ * (and only as first key), or ... */
+
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "(no echo) ", 10, false);
+
+ } else if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "\a", 1, false);
+
+ } else if (c == '\t' && !(flags & ASK_PASSWORD_SILENT)) {
+
+ backspace_string(ttyfd, passphrase);
+ flags |= ASK_PASSWORD_SILENT;
+
+ /* ... or by pressing TAB at any time. */
+
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "(no echo) ", 10, false);
+
+ } else if (p >= sizeof(passphrase)-1) {
+
+ /* Reached the size limit */
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "\a", 1, false);
+
+ } else {
+ passphrase[p++] = c;
+
+ if (!(flags & ASK_PASSWORD_SILENT) && ttyfd >= 0) {
+ /* Check if we got a complete UTF-8 character now. If so, let's output one '*'. */
+ n = utf8_encoded_valid_unichar(passphrase + codepoint);
+ if (n >= 0) {
+ codepoint = p;
+ (void) loop_write(ttyfd, (flags & ASK_PASSWORD_ECHO) ? &c : "*", 1, false);
+ }
+ }
+
+ dirty = true;
+ }
+
+ /* Let's forget this char, just to not keep needlessly copies of key material around */
+ c = 'x';
+ }
+
+ x = strndup(passphrase, p);
+ explicit_bzero_safe(passphrase, sizeof(passphrase));
+ if (!x) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = strv_consume(&l, x);
+ if (r < 0)
+ goto finish;
+
+ if (keyname)
+ (void) add_to_keyring_and_log(keyname, flags, l);
+
+ *ret = TAKE_PTR(l);
+ r = 0;
+
+finish:
+ if (ttyfd >= 0 && reset_tty) {
+ (void) loop_write(ttyfd, "\n", 1, false);
+ (void) tcsetattr(ttyfd, TCSADRAIN, &old_termios);
+ }
+
+ return r;
+}
+
+static int create_socket(char **ret) {
+ _cleanup_free_ char *path = NULL;
+ union sockaddr_union sa = {};
+ _cleanup_close_ int fd = -1;
+ int salen, r;
+
+ assert(ret);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (asprintf(&path, "/run/systemd/ask-password/sck.%" PRIx64, random_u64()) < 0)
+ return -ENOMEM;
+
+ salen = sockaddr_un_set_path(&sa.un, path);
+ if (salen < 0)
+ return salen;
+
+ RUN_WITH_UMASK(0177) {
+ if (bind(fd, &sa.sa, salen) < 0)
+ return -errno;
+ }
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(path);
+ return TAKE_FD(fd);
+}
+
+int ask_password_agent(
+ const char *message,
+ const char *icon,
+ const char *id,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ char ***ret) {
+
+ enum {
+ FD_SOCKET,
+ FD_SIGNAL,
+ FD_INOTIFY,
+ _FD_MAX
+ };
+
+ _cleanup_close_ int socket_fd = -1, signal_fd = -1, notify = -1, fd = -1;
+ char temp[] = "/run/systemd/ask-password/tmp.XXXXXX";
+ char final[sizeof(temp)] = "";
+ _cleanup_free_ char *socket_name = NULL;
+ _cleanup_strv_free_erase_ char **l = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ struct pollfd pollfd[_FD_MAX];
+ sigset_t mask, oldmask;
+ int r;
+
+ assert(ret);
+
+ if (flags & ASK_PASSWORD_NO_AGENT)
+ return -EUNATCH;
+
+ assert_se(sigemptyset(&mask) >= 0);
+ assert_se(sigset_add_many(&mask, SIGINT, SIGTERM, -1) >= 0);
+ assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) >= 0);
+
+ (void) mkdir_p_label("/run/systemd/ask-password", 0755);
+
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+
+ notify = inotify_init1(IN_CLOEXEC | IN_NONBLOCK);
+ if (notify < 0) {
+ r = -errno;
+ goto finish;
+ }
+ if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+
+ fd = mkostemp_safe(temp);
+ if (fd < 0) {
+ r = fd;
+ goto finish;
+ }
+
+ (void) fchmod(fd, 0644);
+
+ f = fdopen(fd, "w");
+ if (!f) {
+ r = -errno;
+ goto finish;
+ }
+
+ fd = -1;
+
+ signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (signal_fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ socket_fd = create_socket(&socket_name);
+ if (socket_fd < 0) {
+ r = socket_fd;
+ goto finish;
+ }
+
+ fprintf(f,
+ "[Ask]\n"
+ "PID="PID_FMT"\n"
+ "Socket=%s\n"
+ "AcceptCached=%i\n"
+ "Echo=%i\n"
+ "NotAfter="USEC_FMT"\n",
+ getpid_cached(),
+ socket_name,
+ (flags & ASK_PASSWORD_ACCEPT_CACHED) ? 1 : 0,
+ (flags & ASK_PASSWORD_ECHO) ? 1 : 0,
+ until);
+
+ if (message)
+ fprintf(f, "Message=%s\n", message);
+
+ if (icon)
+ fprintf(f, "Icon=%s\n", icon);
+
+ if (id)
+ fprintf(f, "Id=%s\n", id);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto finish;
+
+ memcpy(final, temp, sizeof(temp));
+
+ final[sizeof(final)-11] = 'a';
+ final[sizeof(final)-10] = 's';
+ final[sizeof(final)-9] = 'k';
+
+ if (rename(temp, final) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ zero(pollfd);
+ pollfd[FD_SOCKET].fd = socket_fd;
+ pollfd[FD_SOCKET].events = POLLIN;
+ pollfd[FD_SIGNAL].fd = signal_fd;
+ pollfd[FD_SIGNAL].events = POLLIN;
+ pollfd[FD_INOTIFY].fd = notify;
+ pollfd[FD_INOTIFY].events = POLLIN;
+
+ for (;;) {
+ char passphrase[LINE_MAX+1];
+ struct msghdr msghdr;
+ struct iovec iovec;
+ struct ucred *ucred;
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
+ } control;
+ ssize_t n;
+ int k;
+ usec_t t;
+
+ t = now(CLOCK_MONOTONIC);
+
+ if (until > 0 && until <= t) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ k = poll(pollfd, notify >= 0 ? _FD_MAX : _FD_MAX - 1, until > 0 ? (int) ((until-t)/USEC_PER_MSEC) : -1);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ r = -errno;
+ goto finish;
+ }
+
+ if (k <= 0) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ if (pollfd[FD_SIGNAL].revents & POLLIN) {
+ r = -EINTR;
+ goto finish;
+ }
+
+ if (notify >= 0 && pollfd[FD_INOTIFY].revents != 0) {
+ (void) flush_fd(notify);
+
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+ }
+
+ if (pollfd[FD_SOCKET].revents == 0)
+ continue;
+
+ if (pollfd[FD_SOCKET].revents != POLLIN) {
+ r = -EIO;
+ goto finish;
+ }
+
+ iovec = IOVEC_MAKE(passphrase, sizeof(passphrase));
+
+ zero(control);
+ zero(msghdr);
+ msghdr.msg_iov = &iovec;
+ msghdr.msg_iovlen = 1;
+ msghdr.msg_control = &control;
+ msghdr.msg_controllen = sizeof(control);
+
+ n = recvmsg(socket_fd, &msghdr, 0);
+ if (n < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ continue;
+
+ r = -errno;
+ goto finish;
+ }
+
+ cmsg_close_all(&msghdr);
+
+ if (n <= 0) {
+ log_debug("Message too short");
+ continue;
+ }
+
+ if (msghdr.msg_controllen < CMSG_LEN(sizeof(struct ucred)) ||
+ control.cmsghdr.cmsg_level != SOL_SOCKET ||
+ control.cmsghdr.cmsg_type != SCM_CREDENTIALS ||
+ control.cmsghdr.cmsg_len != CMSG_LEN(sizeof(struct ucred))) {
+ log_debug("Received message without credentials. Ignoring.");
+ continue;
+ }
+
+ ucred = (struct ucred*) CMSG_DATA(&control.cmsghdr);
+ if (ucred->uid != 0) {
+ log_debug("Got request from unprivileged user. Ignoring.");
+ continue;
+ }
+
+ if (passphrase[0] == '+') {
+ /* An empty message refers to the empty password */
+ if (n == 1)
+ l = strv_new("");
+ else
+ l = strv_parse_nulstr(passphrase+1, n-1);
+ explicit_bzero_safe(passphrase, n);
+ if (!l) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ if (strv_isempty(l)) {
+ l = strv_free(l);
+ log_debug("Invalid packet");
+ continue;
+ }
+
+ break;
+ }
+
+ if (passphrase[0] == '-') {
+ r = -ECANCELED;
+ goto finish;
+ }
+
+ log_debug("Invalid packet");
+ }
+
+ if (keyname)
+ (void) add_to_keyring_and_log(keyname, flags, l);
+
+ *ret = TAKE_PTR(l);
+ r = 0;
+
+finish:
+ if (socket_name)
+ (void) unlink(socket_name);
+
+ (void) unlink(temp);
+
+ if (final[0])
+ (void) unlink(final);
+
+ assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) == 0);
+ return r;
+}
+
+int ask_password_auto(
+ const char *message,
+ const char *icon,
+ const char *id,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ char ***ret) {
+
+ int r;
+
+ assert(ret);
+
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) &&
+ keyname &&
+ ((flags & ASK_PASSWORD_NO_TTY) || !isatty(STDIN_FILENO)) &&
+ (flags & ASK_PASSWORD_NO_AGENT)) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r != -ENOKEY)
+ return r;
+ }
+
+ if (!(flags & ASK_PASSWORD_NO_TTY) && isatty(STDIN_FILENO))
+ return ask_password_tty(-1, message, keyname, until, flags, NULL, ret);
+
+ if (!(flags & ASK_PASSWORD_NO_AGENT))
+ return ask_password_agent(message, icon, id, keyname, until, flags, ret);
+
+ return -EUNATCH;
+}
diff --git a/src/shared/ask-password-api.h b/src/shared/ask-password-api.h
new file mode 100644
index 0000000..15762b9
--- /dev/null
+++ b/src/shared/ask-password-api.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+
+typedef enum AskPasswordFlags {
+ ASK_PASSWORD_ACCEPT_CACHED = 1 << 0,
+ ASK_PASSWORD_PUSH_CACHE = 1 << 1,
+ ASK_PASSWORD_ECHO = 1 << 2, /* show the password literally while reading, instead of "*" */
+ ASK_PASSWORD_SILENT = 1 << 3, /* do no show any password at all while reading */
+ ASK_PASSWORD_NO_TTY = 1 << 4,
+ ASK_PASSWORD_NO_AGENT = 1 << 5,
+ ASK_PASSWORD_CONSOLE_COLOR = 1 << 6, /* Use color if /dev/console points to a console that supports color */
+} AskPasswordFlags;
+
+int ask_password_tty(int tty_fd, const char *message, const char *keyname, usec_t until, AskPasswordFlags flags, const char *flag_file, char ***ret);
+int ask_password_agent(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret);
+int ask_password_auto(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret);
diff --git a/src/shared/barrier.c b/src/shared/barrier.c
new file mode 100644
index 0000000..bb5869d
--- /dev/null
+++ b/src/shared/barrier.c
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "barrier.h"
+#include "fd-util.h"
+#include "macro.h"
+
+/**
+ * Barriers
+ * This barrier implementation provides a simple synchronization method based
+ * on file-descriptors that can safely be used between threads and processes. A
+ * barrier object contains 2 shared counters based on eventfd. Both processes
+ * can now place barriers and wait for the other end to reach a random or
+ * specific barrier.
+ * Barriers are numbered, so you can either wait for the other end to reach any
+ * barrier or the last barrier that you placed. This way, you can use barriers
+ * for one-way *and* full synchronization. Note that even-though barriers are
+ * numbered, these numbers are internal and recycled once both sides reached the
+ * same barrier (implemented as a simple signed counter). It is thus not
+ * possible to address barriers by their ID.
+ *
+ * Barrier-API: Both ends can place as many barriers via barrier_place() as
+ * they want and each pair of barriers on both sides will be implicitly linked.
+ * Each side can use the barrier_wait/sync_*() family of calls to wait for the
+ * other side to place a specific barrier. barrier_wait_next() waits until the
+ * other side calls barrier_place(). No links between the barriers are
+ * considered and this simply serves as most basic asynchronous barrier.
+ * barrier_sync_next() is like barrier_wait_next() and waits for the other side
+ * to place their next barrier via barrier_place(). However, it only waits for
+ * barriers that are linked to a barrier we already placed. If the other side
+ * already placed more barriers than we did, barrier_sync_next() returns
+ * immediately.
+ * barrier_sync() extends barrier_sync_next() and waits until the other end
+ * placed as many barriers via barrier_place() as we did. If they already placed
+ * as many as we did (or more), it returns immediately.
+ *
+ * Additionally to basic barriers, an abortion event is available.
+ * barrier_abort() places an abortion event that cannot be undone. An abortion
+ * immediately cancels all placed barriers and replaces them. Any running and
+ * following wait/sync call besides barrier_wait_abortion() will immediately
+ * return false on both sides (otherwise, they always return true).
+ * barrier_abort() can be called multiple times on both ends and will be a
+ * no-op if already called on this side.
+ * barrier_wait_abortion() can be used to wait for the other side to call
+ * barrier_abort() and is the only wait/sync call that does not return
+ * immediately if we aborted outself. It only returns once the other side
+ * called barrier_abort().
+ *
+ * Barriers can be used for in-process and inter-process synchronization.
+ * However, for in-process synchronization you could just use mutexes.
+ * Therefore, main target is IPC and we require both sides to *not* share the FD
+ * table. If that's given, barriers provide target tracking: If the remote side
+ * exit()s, an abortion event is implicitly queued on the other side. This way,
+ * a sync/wait call will be woken up if the remote side crashed or exited
+ * unexpectedly. However, note that these abortion events are only queued if the
+ * barrier-queue has been drained. Therefore, it is safe to place a barrier and
+ * exit. The other side can safely wait on the barrier even though the exit
+ * queued an abortion event. Usually, the abortion event would overwrite the
+ * barrier, however, that's not true for exit-abortion events. Those are only
+ * queued if the barrier-queue is drained (thus, the receiving side has placed
+ * more barriers than the remote side).
+ */
+
+/**
+ * barrier_create() - Initialize a barrier object
+ * @obj: barrier to initialize
+ *
+ * This initializes a barrier object. The caller is responsible of allocating
+ * the memory and keeping it valid. The memory does not have to be zeroed
+ * beforehand.
+ * Two eventfd objects are allocated for each barrier. If allocation fails, an
+ * error is returned.
+ *
+ * If this function fails, the barrier is reset to an invalid state so it is
+ * safe to call barrier_destroy() on the object regardless whether the
+ * initialization succeeded or not.
+ *
+ * The caller is responsible to destroy the object via barrier_destroy() before
+ * releasing the underlying memory.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int barrier_create(Barrier *b) {
+ _cleanup_(barrier_destroyp) Barrier *staging = b;
+ int r;
+
+ assert(b);
+
+ b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+ if (b->me < 0)
+ return -errno;
+
+ b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+ if (b->them < 0)
+ return -errno;
+
+ r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
+ if (r < 0)
+ return -errno;
+
+ staging = NULL;
+ return 0;
+}
+
+/**
+ * barrier_destroy() - Destroy a barrier object
+ * @b: barrier to destroy or NULL
+ *
+ * This destroys a barrier object that has previously been passed to
+ * barrier_create(). The object is released and reset to invalid
+ * state. Therefore, it is safe to call barrier_destroy() multiple
+ * times or even if barrier_create() failed. However, barrier must be
+ * always initialized with BARRIER_NULL.
+ *
+ * If @b is NULL, this is a no-op.
+ */
+void barrier_destroy(Barrier *b) {
+ if (!b)
+ return;
+
+ b->me = safe_close(b->me);
+ b->them = safe_close(b->them);
+ safe_close_pair(b->pipe);
+ b->barriers = 0;
+}
+
+/**
+ * barrier_set_role() - Set the local role of the barrier
+ * @b: barrier to operate on
+ * @role: role to set on the barrier
+ *
+ * This sets the roles on a barrier object. This is needed to know
+ * which side of the barrier you're on. Usually, the parent creates
+ * the barrier via barrier_create() and then calls fork() or clone().
+ * Therefore, the FDs are duplicated and the child retains the same
+ * barrier object.
+ *
+ * Both sides need to call barrier_set_role() after fork() or clone()
+ * are done. If this is not done, barriers will not work correctly.
+ *
+ * Note that barriers could be supported without fork() or clone(). However,
+ * this is currently not needed so it hasn't been implemented.
+ */
+void barrier_set_role(Barrier *b, unsigned role) {
+ assert(b);
+ assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
+ /* make sure this is only called once */
+ assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
+
+ if (role == BARRIER_PARENT)
+ b->pipe[1] = safe_close(b->pipe[1]);
+ else {
+ b->pipe[0] = safe_close(b->pipe[0]);
+
+ /* swap me/them for children */
+ SWAP_TWO(b->me, b->them);
+ }
+}
+
+/* places barrier; returns false if we aborted, otherwise true */
+static bool barrier_write(Barrier *b, uint64_t buf) {
+ ssize_t len;
+
+ /* prevent new sync-points if we already aborted */
+ if (barrier_i_aborted(b))
+ return false;
+
+ assert(b->me >= 0);
+ do {
+ len = write(b->me, &buf, sizeof(buf));
+ } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
+
+ if (len != sizeof(buf))
+ goto error;
+
+ /* lock if we aborted */
+ if (buf >= (uint64_t)BARRIER_ABORTION) {
+ if (barrier_they_aborted(b))
+ b->barriers = BARRIER_WE_ABORTED;
+ else
+ b->barriers = BARRIER_I_ABORTED;
+ } else if (!barrier_is_aborted(b))
+ b->barriers += buf;
+
+ return !barrier_i_aborted(b);
+
+error:
+ /* If there is an unexpected error, we have to make this fatal. There
+ * is no way we can recover from sync-errors. Therefore, we close the
+ * pipe-ends and treat this as abortion. The other end will notice the
+ * pipe-close and treat it as abortion, too. */
+
+ safe_close_pair(b->pipe);
+ b->barriers = BARRIER_WE_ABORTED;
+ return false;
+}
+
+/* waits for barriers; returns false if they aborted, otherwise true */
+static bool barrier_read(Barrier *b, int64_t comp) {
+ if (barrier_they_aborted(b))
+ return false;
+
+ while (b->barriers > comp) {
+ struct pollfd pfd[2] = {
+ { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
+ .events = POLLHUP },
+ { .fd = b->them,
+ .events = POLLIN }};
+ uint64_t buf;
+ int r;
+
+ r = poll(pfd, 2, -1);
+ if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
+ continue;
+ else if (r < 0)
+ goto error;
+
+ if (pfd[1].revents) {
+ ssize_t len;
+
+ /* events on @them signal new data for us */
+ len = read(b->them, &buf, sizeof(buf));
+ if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
+ continue;
+
+ if (len != sizeof(buf))
+ goto error;
+ } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
+ /* POLLHUP on the pipe tells us the other side exited.
+ * We treat this as implicit abortion. But we only
+ * handle it if there's no event on the eventfd. This
+ * guarantees that exit-abortions do not overwrite real
+ * barriers. */
+ buf = BARRIER_ABORTION;
+ else
+ continue;
+
+ /* lock if they aborted */
+ if (buf >= (uint64_t)BARRIER_ABORTION) {
+ if (barrier_i_aborted(b))
+ b->barriers = BARRIER_WE_ABORTED;
+ else
+ b->barriers = BARRIER_THEY_ABORTED;
+ } else if (!barrier_is_aborted(b))
+ b->barriers -= buf;
+ }
+
+ return !barrier_they_aborted(b);
+
+error:
+ /* If there is an unexpected error, we have to make this fatal. There
+ * is no way we can recover from sync-errors. Therefore, we close the
+ * pipe-ends and treat this as abortion. The other end will notice the
+ * pipe-close and treat it as abortion, too. */
+
+ safe_close_pair(b->pipe);
+ b->barriers = BARRIER_WE_ABORTED;
+ return false;
+}
+
+/**
+ * barrier_place() - Place a new barrier
+ * @b: barrier object
+ *
+ * This places a new barrier on the barrier object. If either side already
+ * aborted, this is a no-op and returns "false". Otherwise, the barrier is
+ * placed and this returns "true".
+ *
+ * Returns: true if barrier was placed, false if either side aborted.
+ */
+bool barrier_place(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_write(b, BARRIER_SINGLE);
+ return true;
+}
+
+/**
+ * barrier_abort() - Abort the synchronization
+ * @b: barrier object to abort
+ *
+ * This aborts the barrier-synchronization. If barrier_abort() was already
+ * called on this side, this is a no-op. Otherwise, the barrier is put into the
+ * ABORT-state and will stay there. The other side is notified about the
+ * abortion. Any following attempt to place normal barriers or to wait on normal
+ * barriers will return immediately as "false".
+ *
+ * You can wait for the other side to call barrier_abort(), too. Use
+ * barrier_wait_abortion() for that.
+ *
+ * Returns: false if the other side already aborted, true otherwise.
+ */
+bool barrier_abort(Barrier *b) {
+ assert(b);
+
+ barrier_write(b, BARRIER_ABORTION);
+ return !barrier_they_aborted(b);
+}
+
+/**
+ * barrier_wait_next() - Wait for the next barrier of the other side
+ * @b: barrier to operate on
+ *
+ * This waits until the other side places its next barrier. This is independent
+ * of any barrier-links and just waits for any next barrier of the other side.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_wait_next(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, b->barriers - 1);
+ return !barrier_is_aborted(b);
+}
+
+/**
+ * barrier_wait_abortion() - Wait for the other side to abort
+ * @b: barrier to operate on
+ *
+ * This waits until the other side called barrier_abort(). This can be called
+ * regardless whether the local side already called barrier_abort() or not.
+ *
+ * If the other side has already aborted, this returns immediately.
+ *
+ * Returns: false if the local side aborted, true otherwise.
+ */
+bool barrier_wait_abortion(Barrier *b) {
+ assert(b);
+
+ barrier_read(b, BARRIER_THEY_ABORTED);
+ return !barrier_i_aborted(b);
+}
+
+/**
+ * barrier_sync_next() - Wait for the other side to place a next linked barrier
+ * @b: barrier to operate on
+ *
+ * This is like barrier_wait_next() and waits for the other side to call
+ * barrier_place(). However, this only waits for linked barriers. That means, if
+ * the other side already placed more barriers than (or as much as) we did, this
+ * returns immediately instead of waiting.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_sync_next(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, MAX((int64_t)0, b->barriers - 1));
+ return !barrier_is_aborted(b);
+}
+
+/**
+ * barrier_sync() - Wait for the other side to place as many barriers as we did
+ * @b: barrier to operate on
+ *
+ * This is like barrier_sync_next() but waits for the other side to call
+ * barrier_place() as often as we did (in total). If they already placed as much
+ * as we did (or more), this returns immediately instead of waiting.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_sync(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, 0);
+ return !barrier_is_aborted(b);
+}
diff --git a/src/shared/barrier.h b/src/shared/barrier.h
new file mode 100644
index 0000000..0eb3d27
--- /dev/null
+++ b/src/shared/barrier.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+/* See source file for an API description. */
+
+typedef struct Barrier Barrier;
+
+enum {
+ BARRIER_SINGLE = 1LL,
+ BARRIER_ABORTION = INT64_MAX,
+
+ /* bias values to store state; keep @WE < @THEY < @I */
+ BARRIER_BIAS = INT64_MIN,
+ BARRIER_WE_ABORTED = BARRIER_BIAS + 1LL,
+ BARRIER_THEY_ABORTED = BARRIER_BIAS + 2LL,
+ BARRIER_I_ABORTED = BARRIER_BIAS + 3LL,
+};
+
+enum {
+ BARRIER_PARENT,
+ BARRIER_CHILD,
+};
+
+struct Barrier {
+ int me;
+ int them;
+ int pipe[2];
+ int64_t barriers;
+};
+
+#define BARRIER_NULL {-1, -1, {-1, -1}, 0}
+
+int barrier_create(Barrier *obj);
+void barrier_destroy(Barrier *b);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Barrier*, barrier_destroy);
+
+void barrier_set_role(Barrier *b, unsigned role);
+
+bool barrier_place(Barrier *b);
+bool barrier_abort(Barrier *b);
+
+bool barrier_wait_next(Barrier *b);
+bool barrier_wait_abortion(Barrier *b);
+bool barrier_sync_next(Barrier *b);
+bool barrier_sync(Barrier *b);
+
+static inline bool barrier_i_aborted(Barrier *b) {
+ return IN_SET(b->barriers, BARRIER_I_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_they_aborted(Barrier *b) {
+ return IN_SET(b->barriers, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_we_aborted(Barrier *b) {
+ return b->barriers == BARRIER_WE_ABORTED;
+}
+
+static inline bool barrier_is_aborted(Barrier *b) {
+ return IN_SET(b->barriers,
+ BARRIER_I_ABORTED, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_place_and_sync(Barrier *b) {
+ (void) barrier_place(b);
+ return barrier_sync(b);
+}
diff --git a/src/shared/base-filesystem.c b/src/shared/base-filesystem.c
new file mode 100644
index 0000000..89d7a7d
--- /dev/null
+++ b/src/shared/base-filesystem.c
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "base-filesystem.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+
+typedef struct BaseFilesystem {
+ const char *dir;
+ mode_t mode;
+ const char *target;
+ const char *exists;
+ bool ignore_failure;
+} BaseFilesystem;
+
+static const BaseFilesystem table[] = {
+ { "bin", 0, "usr/bin\0", NULL },
+ { "lib", 0, "usr/lib\0", NULL },
+ { "root", 0755, NULL, NULL, true },
+ { "sbin", 0, "usr/sbin\0", NULL },
+ { "usr", 0755, NULL, NULL },
+ { "var", 0755, NULL, NULL },
+ { "etc", 0755, NULL, NULL },
+ { "proc", 0755, NULL, NULL, true },
+ { "sys", 0755, NULL, NULL, true },
+ { "dev", 0755, NULL, NULL, true },
+#if defined(__i386__) || defined(__x86_64__)
+ { "lib64", 0, "usr/lib/x86_64-linux-gnu\0"
+ "usr/lib64\0", "ld-linux-x86-64.so.2" },
+#endif
+};
+
+int base_filesystem_create(const char *root, uid_t uid, gid_t gid) {
+ _cleanup_close_ int fd = -1;
+ int r = 0;
+ size_t i;
+
+ fd = open(root, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open root file system: %m");
+
+ for (i = 0; i < ELEMENTSOF(table); i ++) {
+ if (faccessat(fd, table[i].dir, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
+ continue;
+
+ if (table[i].target) {
+ const char *target = NULL, *s;
+
+ /* check if one of the targets exists */
+ NULSTR_FOREACH(s, table[i].target) {
+ if (faccessat(fd, s, F_OK, AT_SYMLINK_NOFOLLOW) < 0)
+ continue;
+
+ /* check if a specific file exists at the target path */
+ if (table[i].exists) {
+ _cleanup_free_ char *p = NULL;
+
+ p = strjoin(s, "/", table[i].exists);
+ if (!p)
+ return log_oom();
+
+ if (faccessat(fd, p, F_OK, AT_SYMLINK_NOFOLLOW) < 0)
+ continue;
+ }
+
+ target = s;
+ break;
+ }
+
+ if (!target)
+ continue;
+
+ r = symlinkat(target, fd, table[i].dir);
+ if (r < 0 && errno != EEXIST)
+ return log_error_errno(errno, "Failed to create symlink at %s/%s: %m", root, table[i].dir);
+
+ if (uid_is_valid(uid) || gid_is_valid(gid)) {
+ if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "Failed to chown symlink at %s/%s: %m", root, table[i].dir);
+ }
+
+ continue;
+ }
+
+ RUN_WITH_UMASK(0000)
+ r = mkdirat(fd, table[i].dir, table[i].mode);
+ if (r < 0 && errno != EEXIST) {
+ log_full_errno(table[i].ignore_failure ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to create directory at %s/%s: %m", root, table[i].dir);
+
+ if (!table[i].ignore_failure)
+ return -errno;
+
+ continue;
+ }
+
+ if (uid != UID_INVALID || gid != UID_INVALID) {
+ if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "Failed to chown directory at %s/%s: %m", root, table[i].dir);
+ }
+ }
+
+ return 0;
+}
diff --git a/src/shared/base-filesystem.h b/src/shared/base-filesystem.h
new file mode 100644
index 0000000..39d9708
--- /dev/null
+++ b/src/shared/base-filesystem.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+int base_filesystem_create(const char *root, uid_t uid, gid_t gid);
diff --git a/src/shared/bitmap.c b/src/shared/bitmap.c
new file mode 100644
index 0000000..a4cd645
--- /dev/null
+++ b/src/shared/bitmap.c
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "bitmap.h"
+#include "hashmap.h"
+#include "macro.h"
+
+struct Bitmap {
+ uint64_t *bitmaps;
+ size_t n_bitmaps;
+ size_t bitmaps_allocated;
+};
+
+/* Bitmaps are only meant to store relatively small numbers
+ * (corresponding to, say, an enum), so it is ok to limit
+ * the max entry. 64k should be plenty. */
+#define BITMAPS_MAX_ENTRY 0xffff
+
+/* This indicates that we reached the end of the bitmap */
+#define BITMAP_END ((unsigned) -1)
+
+#define BITMAP_NUM_TO_OFFSET(n) ((n) / (sizeof(uint64_t) * 8))
+#define BITMAP_NUM_TO_REM(n) ((n) % (sizeof(uint64_t) * 8))
+#define BITMAP_OFFSET_TO_NUM(offset, rem) ((offset) * sizeof(uint64_t) * 8 + (rem))
+
+Bitmap *bitmap_new(void) {
+ return new0(Bitmap, 1);
+}
+
+Bitmap *bitmap_copy(Bitmap *b) {
+ Bitmap *ret;
+
+ ret = bitmap_new();
+ if (!ret)
+ return NULL;
+
+ ret->bitmaps = newdup(uint64_t, b->bitmaps, b->n_bitmaps);
+ if (!ret->bitmaps)
+ return mfree(ret);
+
+ ret->n_bitmaps = ret->bitmaps_allocated = b->n_bitmaps;
+ return ret;
+}
+
+void bitmap_free(Bitmap *b) {
+ if (!b)
+ return;
+
+ free(b->bitmaps);
+ free(b);
+}
+
+int bitmap_ensure_allocated(Bitmap **b) {
+ Bitmap *a;
+
+ assert(b);
+
+ if (*b)
+ return 0;
+
+ a = bitmap_new();
+ if (!a)
+ return -ENOMEM;
+
+ *b = a;
+
+ return 0;
+}
+
+int bitmap_set(Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ assert(b);
+
+ /* we refuse to allocate huge bitmaps */
+ if (n > BITMAPS_MAX_ENTRY)
+ return -ERANGE;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps) {
+ if (!GREEDY_REALLOC0(b->bitmaps, b->bitmaps_allocated, offset + 1))
+ return -ENOMEM;
+
+ b->n_bitmaps = offset + 1;
+ }
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ b->bitmaps[offset] |= bitmask;
+
+ return 0;
+}
+
+void bitmap_unset(Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ if (!b)
+ return;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps)
+ return;
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ b->bitmaps[offset] &= ~bitmask;
+}
+
+bool bitmap_isset(Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ if (!b)
+ return false;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps)
+ return false;
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ return !!(b->bitmaps[offset] & bitmask);
+}
+
+bool bitmap_isclear(Bitmap *b) {
+ unsigned i;
+
+ if (!b)
+ return true;
+
+ for (i = 0; i < b->n_bitmaps; i++)
+ if (b->bitmaps[i] != 0)
+ return false;
+
+ return true;
+}
+
+void bitmap_clear(Bitmap *b) {
+
+ if (!b)
+ return;
+
+ b->bitmaps = mfree(b->bitmaps);
+ b->n_bitmaps = 0;
+ b->bitmaps_allocated = 0;
+}
+
+bool bitmap_iterate(Bitmap *b, Iterator *i, unsigned *n) {
+ uint64_t bitmask;
+ unsigned offset, rem;
+
+ assert(i);
+ assert(n);
+
+ if (!b || i->idx == BITMAP_END)
+ return false;
+
+ offset = BITMAP_NUM_TO_OFFSET(i->idx);
+ rem = BITMAP_NUM_TO_REM(i->idx);
+ bitmask = UINT64_C(1) << rem;
+
+ for (; offset < b->n_bitmaps; offset ++) {
+ if (b->bitmaps[offset]) {
+ for (; bitmask; bitmask <<= 1, rem ++) {
+ if (b->bitmaps[offset] & bitmask) {
+ *n = BITMAP_OFFSET_TO_NUM(offset, rem);
+ i->idx = *n + 1;
+
+ return true;
+ }
+ }
+ }
+
+ rem = 0;
+ bitmask = 1;
+ }
+
+ i->idx = BITMAP_END;
+
+ return false;
+}
+
+bool bitmap_equal(Bitmap *a, Bitmap *b) {
+ size_t common_n_bitmaps;
+ Bitmap *c;
+ unsigned i;
+
+ if (a == b)
+ return true;
+
+ if (!a != !b)
+ return false;
+
+ if (!a)
+ return true;
+
+ common_n_bitmaps = MIN(a->n_bitmaps, b->n_bitmaps);
+ if (memcmp_safe(a->bitmaps, b->bitmaps, sizeof(uint64_t) * common_n_bitmaps) != 0)
+ return false;
+
+ c = a->n_bitmaps > b->n_bitmaps ? a : b;
+ for (i = common_n_bitmaps; i < c->n_bitmaps; i++)
+ if (c->bitmaps[i] != 0)
+ return false;
+
+ return true;
+}
diff --git a/src/shared/bitmap.h b/src/shared/bitmap.h
new file mode 100644
index 0000000..843d27d
--- /dev/null
+++ b/src/shared/bitmap.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+
+typedef struct Bitmap Bitmap;
+
+Bitmap *bitmap_new(void);
+Bitmap *bitmap_copy(Bitmap *b);
+int bitmap_ensure_allocated(Bitmap **b);
+void bitmap_free(Bitmap *b);
+
+int bitmap_set(Bitmap *b, unsigned n);
+void bitmap_unset(Bitmap *b, unsigned n);
+bool bitmap_isset(Bitmap *b, unsigned n);
+bool bitmap_isclear(Bitmap *b);
+void bitmap_clear(Bitmap *b);
+
+bool bitmap_iterate(Bitmap *b, Iterator *i, unsigned *n);
+
+bool bitmap_equal(Bitmap *a, Bitmap *b);
+
+#define BITMAP_FOREACH(n, b, i) \
+ for ((i).idx = 0; bitmap_iterate((b), &(i), (unsigned*)&(n)); )
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Bitmap*, bitmap_free);
+
+#define _cleanup_bitmap_free_ _cleanup_(bitmap_freep)
diff --git a/src/shared/blkid-util.h b/src/shared/blkid-util.h
new file mode 100644
index 0000000..eb07a88
--- /dev/null
+++ b/src/shared/blkid-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if HAVE_BLKID
+# include <blkid.h>
+
+# include "macro.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(blkid_probe, blkid_free_probe);
+#endif
diff --git a/src/shared/boot-timestamps.c b/src/shared/boot-timestamps.c
new file mode 100644
index 0000000..bcbb86d
--- /dev/null
+++ b/src/shared/boot-timestamps.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "acpi-fpdt.h"
+#include "boot-timestamps.h"
+#include "efivars.h"
+#include "macro.h"
+#include "time-util.h"
+
+int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader) {
+ usec_t x = 0, y = 0, a;
+ int r;
+ dual_timestamp _n;
+
+ assert(firmware);
+ assert(loader);
+
+ if (!n) {
+ dual_timestamp_get(&_n);
+ n = &_n;
+ }
+
+ r = acpi_get_boot_usec(&x, &y);
+ if (r < 0) {
+ r = efi_loader_get_boot_usec(&x, &y);
+ if (r < 0)
+ return r;
+ }
+
+ /* Let's convert this to timestamps where the firmware
+ * began/loader began working. To make this more confusing:
+ * since usec_t is unsigned and the kernel's monotonic clock
+ * begins at kernel initialization we'll actually initialize
+ * the monotonic timestamps here as negative of the actual
+ * value. */
+
+ firmware->monotonic = y;
+ loader->monotonic = y - x;
+
+ a = n->monotonic + firmware->monotonic;
+ firmware->realtime = n->realtime > a ? n->realtime - a : 0;
+
+ a = n->monotonic + loader->monotonic;
+ loader->realtime = n->realtime > a ? n->realtime - a : 0;
+
+ return 0;
+}
diff --git a/src/shared/boot-timestamps.h b/src/shared/boot-timestamps.h
new file mode 100644
index 0000000..4e648f1
--- /dev/null
+++ b/src/shared/boot-timestamps.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <time-util.h>
+
+int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader);
diff --git a/src/shared/bootspec.c b/src/shared/bootspec.c
new file mode 100644
index 0000000..7e276f1
--- /dev/null
+++ b/src/shared/bootspec.c
@@ -0,0 +1,665 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <linux/magic.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "blkid-util.h"
+#include "bootspec.h"
+#include "conf-files.h"
+#include "def.h"
+#include "device-nodes.h"
+#include "efivars.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "virt.h"
+
+static void boot_entry_free(BootEntry *entry) {
+ assert(entry);
+
+ free(entry->id);
+ free(entry->path);
+ free(entry->title);
+ free(entry->show_title);
+ free(entry->version);
+ free(entry->machine_id);
+ free(entry->architecture);
+ strv_free(entry->options);
+ free(entry->kernel);
+ free(entry->efi);
+ strv_free(entry->initrd);
+ free(entry->device_tree);
+}
+
+static int boot_entry_load(const char *path, BootEntry *entry) {
+ _cleanup_(boot_entry_free) BootEntry tmp = {};
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned line = 1;
+ char *b, *c;
+ int r;
+
+ assert(path);
+ assert(entry);
+
+ c = endswith_no_case(path, ".conf");
+ if (!c) {
+ log_error("Invalid loader entry filename: %s", path);
+ return -EINVAL;
+ }
+
+ b = basename(path);
+ tmp.id = strndup(b, c - b);
+ if (!tmp.id)
+ return log_oom();
+
+ tmp.path = strdup(path);
+ if (!tmp.path)
+ return log_oom();
+
+ f = fopen(path, "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open \"%s\": %m", path);
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL, *field = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS)
+ return log_error_errno(r, "%s:%u: Line too long", path, line);
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+
+ line++;
+
+ if (IN_SET(*strstrip(buf), '#', '\0'))
+ continue;
+
+ p = buf;
+ r = extract_first_word(&p, &field, " \t", 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line);
+ continue;
+ }
+ if (r == 0) {
+ log_warning("%s:%u: Bad syntax", path, line);
+ continue;
+ }
+
+ if (streq(field, "title"))
+ r = free_and_strdup(&tmp.title, p);
+ else if (streq(field, "version"))
+ r = free_and_strdup(&tmp.version, p);
+ else if (streq(field, "machine-id"))
+ r = free_and_strdup(&tmp.machine_id, p);
+ else if (streq(field, "architecture"))
+ r = free_and_strdup(&tmp.architecture, p);
+ else if (streq(field, "options"))
+ r = strv_extend(&tmp.options, p);
+ else if (streq(field, "linux"))
+ r = free_and_strdup(&tmp.kernel, p);
+ else if (streq(field, "efi"))
+ r = free_and_strdup(&tmp.efi, p);
+ else if (streq(field, "initrd"))
+ r = strv_extend(&tmp.initrd, p);
+ else if (streq(field, "devicetree"))
+ r = free_and_strdup(&tmp.device_tree, p);
+ else {
+ log_notice("%s:%u: Unknown line \"%s\"", path, line, field);
+ continue;
+ }
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+ }
+
+ *entry = tmp;
+ tmp = (BootEntry) {};
+ return 0;
+}
+
+void boot_config_free(BootConfig *config) {
+ size_t i;
+
+ assert(config);
+
+ free(config->default_pattern);
+ free(config->timeout);
+ free(config->editor);
+ free(config->auto_entries);
+ free(config->auto_firmware);
+
+ free(config->entry_oneshot);
+ free(config->entry_default);
+
+ for (i = 0; i < config->n_entries; i++)
+ boot_entry_free(config->entries + i);
+ free(config->entries);
+}
+
+static int boot_loader_read_conf(const char *path, BootConfig *config) {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned line = 1;
+ int r;
+
+ assert(path);
+ assert(config);
+
+ f = fopen(path, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open \"%s\": %m", path);
+ }
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL, *field = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS)
+ return log_error_errno(r, "%s:%u: Line too long", path, line);
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+
+ line++;
+
+ if (IN_SET(*strstrip(buf), '#', '\0'))
+ continue;
+
+ p = buf;
+ r = extract_first_word(&p, &field, " \t", 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line);
+ continue;
+ }
+ if (r == 0) {
+ log_warning("%s:%u: Bad syntax", path, line);
+ continue;
+ }
+
+ if (streq(field, "default"))
+ r = free_and_strdup(&config->default_pattern, p);
+ else if (streq(field, "timeout"))
+ r = free_and_strdup(&config->timeout, p);
+ else if (streq(field, "editor"))
+ r = free_and_strdup(&config->editor, p);
+ else if (streq(field, "auto-entries"))
+ r = free_and_strdup(&config->auto_entries, p);
+ else if (streq(field, "auto-firmware"))
+ r = free_and_strdup(&config->auto_firmware, p);
+ else if (streq(field, "console-mode"))
+ r = free_and_strdup(&config->console_mode, p);
+ else {
+ log_notice("%s:%u: Unknown line \"%s\"", path, line, field);
+ continue;
+ }
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+ }
+
+ return 1;
+}
+
+static int boot_entry_compare(const BootEntry *a, const BootEntry *b) {
+ return str_verscmp(a->id, b->id);
+}
+
+static int boot_entries_find(const char *dir, BootEntry **ret_entries, size_t *ret_n_entries) {
+ _cleanup_strv_free_ char **files = NULL;
+ char **f;
+ int r;
+ BootEntry *array = NULL;
+ size_t n_allocated = 0, n = 0;
+
+ assert(dir);
+ assert(ret_entries);
+ assert(ret_n_entries);
+
+ r = conf_files_list(&files, ".conf", NULL, 0, dir, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list files in \"%s\": %m", dir);
+
+ STRV_FOREACH(f, files) {
+ if (!GREEDY_REALLOC0(array, n_allocated, n + 1))
+ return log_oom();
+
+ r = boot_entry_load(*f, array + n);
+ if (r < 0)
+ continue;
+
+ n++;
+ }
+
+ typesafe_qsort(array, n, boot_entry_compare);
+
+ *ret_entries = array;
+ *ret_n_entries = n;
+
+ return 0;
+}
+
+static bool find_nonunique(BootEntry *entries, size_t n_entries, bool *arr) {
+ size_t i, j;
+ bool non_unique = false;
+
+ assert(entries || n_entries == 0);
+ assert(arr || n_entries == 0);
+
+ for (i = 0; i < n_entries; i++)
+ arr[i] = false;
+
+ for (i = 0; i < n_entries; i++)
+ for (j = 0; j < n_entries; j++)
+ if (i != j && streq(boot_entry_title(entries + i),
+ boot_entry_title(entries + j)))
+ non_unique = arr[i] = arr[j] = true;
+
+ return non_unique;
+}
+
+static int boot_entries_uniquify(BootEntry *entries, size_t n_entries) {
+ char *s;
+ size_t i;
+ int r;
+ bool arr[n_entries];
+
+ assert(entries || n_entries == 0);
+
+ /* Find _all_ non-unique titles */
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add version to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i] && entries[i].version) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].version);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add machine-id to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i] && entries[i].machine_id) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].machine_id);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add file name to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i]) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].id);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ return 0;
+}
+
+static int boot_entries_select_default(const BootConfig *config) {
+ int i;
+
+ assert(config);
+
+ if (config->entry_oneshot)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (streq(config->entry_oneshot, config->entries[i].id)) {
+ log_debug("Found default: id \"%s\" is matched by LoaderEntryOneShot",
+ config->entries[i].id);
+ return i;
+ }
+
+ if (config->entry_default)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (streq(config->entry_default, config->entries[i].id)) {
+ log_debug("Found default: id \"%s\" is matched by LoaderEntryDefault",
+ config->entries[i].id);
+ return i;
+ }
+
+ if (config->default_pattern)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (fnmatch(config->default_pattern, config->entries[i].id, FNM_CASEFOLD) == 0) {
+ log_debug("Found default: id \"%s\" is matched by pattern \"%s\"",
+ config->entries[i].id, config->default_pattern);
+ return i;
+ }
+
+ if (config->n_entries > 0)
+ log_debug("Found default: last entry \"%s\"", config->entries[config->n_entries - 1].id);
+ else
+ log_debug("Found no default boot entry :(");
+
+ return config->n_entries - 1; /* -1 means "no default" */
+}
+
+int boot_entries_load_config(const char *esp_path, BootConfig *config) {
+ const char *p;
+ int r;
+
+ assert(esp_path);
+ assert(config);
+
+ p = strjoina(esp_path, "/loader/loader.conf");
+ r = boot_loader_read_conf(p, config);
+ if (r < 0)
+ return r;
+
+ p = strjoina(esp_path, "/loader/entries");
+ r = boot_entries_find(p, &config->entries, &config->n_entries);
+ if (r < 0)
+ return r;
+
+ r = boot_entries_uniquify(config->entries, config->n_entries);
+ if (r < 0)
+ return log_error_errno(r, "Failed to uniquify boot entries: %m");
+
+ if (is_efi_boot()) {
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryOneShot", &config->entry_oneshot);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to read EFI var \"LoaderEntryOneShot\": %m");
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryDefault", &config->entry_default);
+ if (r < 0 && r != -ENOENT)
+ return log_error_errno(r, "Failed to read EFI var \"LoaderEntryDefault\": %m");
+ }
+
+ config->default_entry = boot_entries_select_default(config);
+ return 0;
+}
+
+/********************************************************************************/
+
+static int verify_esp(
+ const char *p,
+ bool searching,
+ bool unprivileged_mode,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+#if HAVE_BLKID
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ _cleanup_free_ char *node = NULL;
+ const char *v;
+#endif
+ uint64_t pstart = 0, psize = 0;
+ struct stat st, st2;
+ const char *t2;
+ struct statfs sfs;
+ sd_id128_t uuid = SD_ID128_NULL;
+ uint32_t part = 0;
+ bool relax_checks;
+ int r;
+
+ assert(p);
+
+ relax_checks = getenv_bool("SYSTEMD_RELAX_ESP_CHECKS") > 0;
+
+ /* Non-root user can only check the status, so if an error occured in the following, it does not cause any
+ * issues. Let's also, silence the error messages. */
+
+ if (!relax_checks) {
+ if (statfs(p, &sfs) < 0) {
+ /* If we are searching for the mount point, don't generate a log message if we can't find the path */
+ if (errno == ENOENT && searching)
+ return -ENOENT;
+
+ return log_full_errno(unprivileged_mode && errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to check file system type of \"%s\": %m", p);
+ }
+
+ if (!F_TYPE_EQUAL(sfs.f_type, MSDOS_SUPER_MAGIC)) {
+ if (searching)
+ return -EADDRNOTAVAIL;
+
+ log_error("File system \"%s\" is not a FAT EFI System Partition (ESP) file system.", p);
+ return -ENODEV;
+ }
+ }
+
+ if (stat(p, &st) < 0)
+ return log_full_errno(unprivileged_mode && errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to determine block device node of \"%s\": %m", p);
+
+ if (major(st.st_dev) == 0) {
+ log_error("Block device node of %p is invalid.", p);
+ return -ENODEV;
+ }
+
+ t2 = strjoina(p, "/..");
+ r = stat(t2, &st2);
+ if (r < 0)
+ return log_full_errno(unprivileged_mode && errno == EACCES ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to determine block device node of parent of \"%s\": %m", p);
+
+ if (st.st_dev == st2.st_dev) {
+ log_error("Directory \"%s\" is not the root of the EFI System Partition (ESP) file system.", p);
+ return -ENODEV;
+ }
+
+ /* In a container we don't have access to block devices, skip this part of the verification, we trust the
+ * container manager set everything up correctly on its own. Also skip the following verification for non-root user. */
+ if (detect_container() > 0 || unprivileged_mode || relax_checks)
+ goto finish;
+
+#if HAVE_BLKID
+ r = device_path_make_major_minor(S_IFBLK, st.st_dev, &node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor device path: %m");
+ errno = 0;
+ b = blkid_new_probe_from_filename(node);
+ if (!b)
+ return log_error_errno(errno ?: ENOMEM, "Failed to open file system \"%s\": %m", p);
+
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
+ blkid_probe_enable_partitions(b, 1);
+ blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == -2) {
+ log_error("File system \"%s\" is ambiguous.", p);
+ return -ENODEV;
+ } else if (r == 1) {
+ log_error("File system \"%s\" does not contain a label.", p);
+ return -ENODEV;
+ } else if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe file system \"%s\": %m", p);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "TYPE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe file system type \"%s\": %m", p);
+ if (!streq(v, "vfat")) {
+ log_error("File system \"%s\" is not FAT.", p);
+ return -ENODEV;
+ }
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_SCHEME", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition scheme \"%s\": %m", p);
+ if (!streq(v, "gpt")) {
+ log_error("File system \"%s\" is not on a GPT partition table.", p);
+ return -ENODEV;
+ }
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition type UUID \"%s\": %m", p);
+ if (!streq(v, "c12a7328-f81f-11d2-ba4b-00a0c93ec93b")) {
+ log_error("File system \"%s\" has wrong type for an EFI System Partition (ESP).", p);
+ return -ENODEV;
+ }
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition entry UUID \"%s\": %m", p);
+ r = sd_id128_from_string(v, &uuid);
+ if (r < 0) {
+ log_error("Partition \"%s\" has invalid UUID \"%s\".", p, v);
+ return -EIO;
+ }
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_NUMBER", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition number \"%s\": m", p);
+ r = safe_atou32(v, &part);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_NUMBER field.");
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_OFFSET", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition offset \"%s\": %m", p);
+ r = safe_atou64(v, &pstart);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_OFFSET field.");
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_SIZE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition size \"%s\": %m", p);
+ r = safe_atou64(v, &psize);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_SIZE field.");
+#endif
+
+finish:
+ if (ret_part)
+ *ret_part = part;
+ if (ret_pstart)
+ *ret_pstart = pstart;
+ if (ret_psize)
+ *ret_psize = psize;
+ if (ret_uuid)
+ *ret_uuid = uuid;
+
+ return 0;
+}
+
+int find_esp_and_warn(
+ const char *path,
+ bool unprivileged_mode,
+ char **ret_path,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ int r;
+
+ /* This logs about all errors except:
+ *
+ * -ENOKEY → when we can't find the partition
+ * -EACCESS → when unprivileged_mode is true, and we can't access something
+ */
+
+ if (path) {
+ r = verify_esp(path, false, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid);
+ if (r < 0)
+ return r;
+
+ goto found;
+ }
+
+ path = getenv("SYSTEMD_ESP_PATH");
+ if (path) {
+ if (!path_is_valid(path) || !path_is_absolute(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "$SYSTEMD_ESP_PATH does not refer to absolute path, refusing to use it: %s",
+ path);
+
+ /* Note: when the user explicitly configured things with an env var we won't validate the mount
+ * point. After all we want this to be useful for testing. */
+ goto found;
+ }
+
+ FOREACH_STRING(path, "/efi", "/boot", "/boot/efi") {
+
+ r = verify_esp(path, true, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid);
+ if (r >= 0)
+ goto found;
+ if (!IN_SET(r, -ENOENT, -EADDRNOTAVAIL)) /* This one is not it */
+ return r;
+ }
+
+ /* No logging here */
+ return -ENOKEY;
+
+found:
+ if (ret_path) {
+ char *c;
+
+ c = strdup(path);
+ if (!c)
+ return log_oom();
+
+ *ret_path = c;
+ }
+
+ return 0;
+}
+
+int find_default_boot_entry(
+ const char *esp_path,
+ char **esp_where,
+ BootConfig *config,
+ const BootEntry **e) {
+
+ _cleanup_free_ char *where = NULL;
+ int r;
+
+ assert(config);
+ assert(e);
+
+ r = find_esp_and_warn(esp_path, false, &where, NULL, NULL, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ r = boot_entries_load_config(where, config);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load bootspec config from \"%s/loader\": %m", where);
+
+ if (config->default_entry < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "No entry suitable as default, refusing to guess.");
+
+ *e = &config->entries[config->default_entry];
+ log_debug("Found default boot entry in file \"%s\"", (*e)->path);
+
+ if (esp_where)
+ *esp_where = TAKE_PTR(where);
+
+ return 0;
+}
diff --git a/src/shared/bootspec.h b/src/shared/bootspec.h
new file mode 100644
index 0000000..ed57621
--- /dev/null
+++ b/src/shared/bootspec.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+typedef struct BootEntry {
+ char *id; /* This is the file basename without extension */
+ char *path; /* This is the full path to the file */
+ char *title;
+ char *show_title;
+ char *version;
+ char *machine_id;
+ char *architecture;
+ char **options;
+ char *kernel; /* linux is #defined to 1, yikes! */
+ char *efi;
+ char **initrd;
+ char *device_tree;
+} BootEntry;
+
+typedef struct BootConfig {
+ char *default_pattern;
+ char *timeout;
+ char *editor;
+ char *auto_entries;
+ char *auto_firmware;
+ char *console_mode;
+
+ char *entry_oneshot;
+ char *entry_default;
+
+ BootEntry *entries;
+ size_t n_entries;
+ ssize_t default_entry;
+} BootConfig;
+
+void boot_config_free(BootConfig *config);
+int boot_entries_load_config(const char *esp_path, BootConfig *config);
+
+static inline const char* boot_entry_title(const BootEntry *entry) {
+ return entry->show_title ?: entry->title ?: entry->id;
+}
+
+int find_esp_and_warn(const char *path, bool unprivileged_mode, char **ret_path, uint32_t *ret_part, uint64_t *ret_pstart, uint64_t *ret_psize, sd_id128_t *ret_uuid);
+
+int find_default_boot_entry(const char *esp_path, char **esp_where, BootConfig *config, const BootEntry **e);
diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c
new file mode 100644
index 0000000..2c61e04
--- /dev/null
+++ b/src/shared/bpf-program.c
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "log.h"
+#include "missing.h"
+#include "path-util.h"
+#include "util.h"
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+
+ p = new0(BPFProgram, 1);
+ if (!p)
+ return log_oom();
+
+ p->n_ref = 1;
+ p->prog_type = prog_type;
+ p->kernel_fd = -1;
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static BPFProgram *bpf_program_free(BPFProgram *p) {
+ assert(p);
+
+ /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
+ * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
+ * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
+ * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
+ * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
+ * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
+ * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
+ * whenever we close the BPF fd. */
+ (void) bpf_program_cgroup_detach(p);
+
+ safe_close(p->kernel_fd);
+ free(p->instructions);
+ free(p->attached_path);
+
+ return mfree(p);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
+
+ assert(p);
+
+ if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
+ return -EBUSY;
+
+ if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
+ return -ENOMEM;
+
+ memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
+ p->n_instructions += count;
+
+ return 0;
+}
+
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
+ union bpf_attr attr;
+
+ assert(p);
+
+ if (p->kernel_fd >= 0) { /* make this idempotent */
+ memzero(log_buf, log_size);
+ return 0;
+ }
+
+ attr = (union bpf_attr) {
+ .prog_type = p->prog_type,
+ .insns = PTR_TO_UINT64(p->instructions),
+ .insn_cnt = p->n_instructions,
+ .license = PTR_TO_UINT64("GPL"),
+ .log_buf = PTR_TO_UINT64(log_buf),
+ .log_level = !!log_buf,
+ .log_size = log_size,
+ };
+
+ p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (p->kernel_fd < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
+ _cleanup_free_ char *copy = NULL;
+ _cleanup_close_ int fd = -1;
+ union bpf_attr attr;
+ int r;
+
+ assert(p);
+ assert(type >= 0);
+ assert(path);
+
+ if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
+ return -EINVAL;
+
+ /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
+ * refuse this early. */
+ if (p->attached_path) {
+ if (!path_equal(p->attached_path, path))
+ return -EBUSY;
+ if (p->attached_type != type)
+ return -EBUSY;
+ if (p->attached_flags != flags)
+ return -EBUSY;
+
+ /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
+ * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
+ * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
+ * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
+ * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
+ * would remain in effect. */
+ if (flags != BPF_F_ALLOW_OVERRIDE)
+ return 0;
+ }
+
+ /* Ensure we have a kernel object for this. */
+ r = bpf_program_load_kernel(p, NULL, 0);
+ if (r < 0)
+ return r;
+
+ copy = strdup(path);
+ if (!copy)
+ return -ENOMEM;
+
+ fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ attr = (union bpf_attr) {
+ .attach_type = type,
+ .target_fd = fd,
+ .attach_bpf_fd = p->kernel_fd,
+ .attach_flags = flags,
+ };
+
+ if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ free_and_replace(p->attached_path, copy);
+ p->attached_type = type;
+ p->attached_flags = flags;
+
+ return 0;
+}
+
+int bpf_program_cgroup_detach(BPFProgram *p) {
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+
+ if (!p->attached_path)
+ return -EUNATCH;
+
+ fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
+ * implicitly by the removal, hence don't complain */
+
+ } else {
+ union bpf_attr attr;
+
+ attr = (union bpf_attr) {
+ .attach_type = p->attached_type,
+ .target_fd = fd,
+ .attach_bpf_fd = p->kernel_fd,
+ };
+
+ if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
+ return -errno;
+ }
+
+ p->attached_path = mfree(p->attached_path);
+
+ return 0;
+}
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
+ union bpf_attr attr = {
+ .map_type = type,
+ .key_size = key_size,
+ .value_size = value_size,
+ .max_entries = max_entries,
+ .map_flags = flags,
+ };
+ int fd;
+
+ fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int bpf_map_update_element(int fd, const void *key, void *value) {
+
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = PTR_TO_UINT64(key),
+ .value = PTR_TO_UINT64(value),
+ };
+
+ if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_map_lookup_element(int fd, const void *key, void *value) {
+
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = PTR_TO_UINT64(key),
+ .value = PTR_TO_UINT64(value),
+ };
+
+ if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h
new file mode 100644
index 0000000..c21eb2f
--- /dev/null
+++ b/src/shared/bpf-program.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <sys/syscall.h>
+
+#include "list.h"
+#include "macro.h"
+
+typedef struct BPFProgram BPFProgram;
+
+struct BPFProgram {
+ unsigned n_ref;
+
+ int kernel_fd;
+ uint32_t prog_type;
+
+ size_t n_instructions;
+ size_t allocated;
+ struct bpf_insn *instructions;
+
+ char *attached_path;
+ int attached_type;
+ uint32_t attached_flags;
+};
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret);
+BPFProgram *bpf_program_unref(BPFProgram *p);
+BPFProgram *bpf_program_ref(BPFProgram *p);
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
+int bpf_program_cgroup_detach(BPFProgram *p);
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
+int bpf_map_update_element(int fd, const void *key, void *value);
+int bpf_map_lookup_element(int fd, const void *key, void *value);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
new file mode 100644
index 0000000..9a8051d
--- /dev/null
+++ b/src/shared/bus-unit-util.c
@@ -0,0 +1,2547 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "cap-list.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "cpu-set-util.h"
+#include "env-util.h"
+#include "errno-list.h"
+#include "escape.h"
+#include "hashmap.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "ip-protocol-list.h"
+#include "list.h"
+#include "locale-util.h"
+#include "missing_fs.h"
+#include "mountpoint-util.h"
+#include "nsflags.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "securebits-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "unit-def.h"
+#include "user-util.h"
+#include "utf8.h"
+#include "util.h"
+
+int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) {
+ assert(message);
+ assert(u);
+
+ u->machine = NULL;
+
+ return sd_bus_message_read(
+ message,
+ "(ssssssouso)",
+ &u->id,
+ &u->description,
+ &u->load_state,
+ &u->active_state,
+ &u->sub_state,
+ &u->following,
+ &u->unit_path,
+ &u->job_id,
+ &u->job_type,
+ &u->job_path);
+}
+
+#define DEFINE_BUS_APPEND_PARSE_PTR(bus_type, cast_type, type, parse_func) \
+ static int bus_append_##parse_func( \
+ sd_bus_message *m, \
+ const char *field, \
+ const char *eq) { \
+ type val; \
+ int r; \
+ \
+ r = parse_func(eq, &val); \
+ if (r < 0) \
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); \
+ \
+ r = sd_bus_message_append(m, "(sv)", field, \
+ bus_type, (cast_type) val); \
+ if (r < 0) \
+ return bus_log_create_error(r); \
+ \
+ return 1; \
+ }
+
+#define DEFINE_BUS_APPEND_PARSE(bus_type, parse_func) \
+ static int bus_append_##parse_func( \
+ sd_bus_message *m, \
+ const char *field, \
+ const char *eq) { \
+ int r; \
+ \
+ r = parse_func(eq); \
+ if (r < 0) \
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s: %s", field, eq); \
+ \
+ r = sd_bus_message_append(m, "(sv)", field, \
+ bus_type, (int32_t) r); \
+ if (r < 0) \
+ return bus_log_create_error(r); \
+ \
+ return 1; \
+ }
+
+DEFINE_BUS_APPEND_PARSE("b", parse_boolean);
+DEFINE_BUS_APPEND_PARSE("i", ioprio_class_from_string);
+DEFINE_BUS_APPEND_PARSE("i", ip_tos_from_string);
+DEFINE_BUS_APPEND_PARSE("i", log_facility_unshifted_from_string);
+DEFINE_BUS_APPEND_PARSE("i", log_level_from_string);
+DEFINE_BUS_APPEND_PARSE("i", parse_errno);
+DEFINE_BUS_APPEND_PARSE("i", sched_policy_from_string);
+DEFINE_BUS_APPEND_PARSE("i", secure_bits_from_string);
+DEFINE_BUS_APPEND_PARSE("i", signal_from_string);
+DEFINE_BUS_APPEND_PARSE("i", parse_ip_protocol);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, ioprio_parse_priority);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, parse_nice);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, safe_atoi);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, nsec_t, parse_nsec);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_blkio_weight_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_cpu_shares_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_weight_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, unsigned long, mount_propagation_flags_from_string);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, safe_atou64);
+DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, mode_t, parse_mode);
+DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, unsigned, safe_atou);
+DEFINE_BUS_APPEND_PARSE_PTR("x", int64_t, int64_t, safe_atoi64);
+
+static int bus_append_string(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ r = sd_bus_message_append(m, "(sv)", field, "s", eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_strv(sd_bus_message *m, const char *field, const char *eq, ExtractFlags flags) {
+ const char *p;
+ int r;
+
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "as");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (p = eq;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, flags);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax: %s", eq);
+
+ r = sd_bus_message_append_basic(m, 's', word);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_byte_array(sd_bus_message *m, const char *field, const void *buf, size_t n) {
+ int r;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "ay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'y', buf, n);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_parse_sec_rename(sd_bus_message *m, const char *field, const char *eq) {
+ char *n;
+ usec_t t;
+ size_t l;
+ int r;
+
+ r = parse_sec(eq, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ l = strlen(field);
+ n = newa(char, l + 2);
+ /* Change suffix Sec → USec */
+ strcpy(mempcpy(n, field, l - 3), "USec");
+
+ r = sd_bus_message_append(m, "(sv)", n, "t", t);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_parse_size(sd_bus_message *m, const char *field, const char *eq, uint64_t base) {
+ uint64_t v;
+ int r;
+
+ r = parse_size(eq, base, &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", v);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_exec_command(sd_bus_message *m, const char *field, const char *eq) {
+ bool ignore_failure = false, explicit_path = false, done = false;
+ _cleanup_strv_free_ char **l = NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ do {
+ switch (*eq) {
+
+ case '-':
+ if (ignore_failure)
+ done = true;
+ else {
+ ignore_failure = true;
+ eq++;
+ }
+ break;
+
+ case '@':
+ if (explicit_path)
+ done = true;
+ else {
+ explicit_path = true;
+ eq++;
+ }
+ break;
+
+ case '+':
+ case '!':
+ /* The bus API doesn't support +, ! and !! currently, unfortunately. :-( */
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Sorry, but +, ! and !! are currently not supported for transient services.");
+
+ default:
+ done = true;
+ break;
+ }
+ } while (!done);
+
+ if (explicit_path) {
+ r = extract_first_word(&eq, &path, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse path: %m");
+ }
+
+ r = strv_split_extract(&l, eq, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse command line: %m");
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!strv_isempty(l)) {
+
+ r = sd_bus_message_open_container(m, 'r', "sasb");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", path ?: l[0]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, l);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "b", ignore_failure);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_ip_address_access(sd_bus_message *m, int family, const union in_addr_union *prefix, unsigned char prefixlen) {
+ int r;
+
+ assert(m);
+ assert(prefix);
+
+ r = sd_bus_message_open_container(m, 'r', "iayu");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "i", family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(m, 'y', prefix, FAMILY_ADDRESS_SIZE(family));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "u", prefixlen);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(m);
+}
+
+static int bus_append_cgroup_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "DevicePolicy", "Slice"))
+
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting",
+ "TasksAccounting", "IPAccounting"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUWeight", "StartupCPUWeight", "IOWeight", "StartupIOWeight"))
+
+ return bus_append_cg_weight_parse(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUShares", "StartupCPUShares"))
+
+ return bus_append_cg_cpu_shares_parse(m, field, eq);
+
+ if (STR_IN_SET(field, "BlockIOWeight", "StartupBlockIOWeight"))
+
+ return bus_append_cg_blkio_weight_parse(m, field, eq);
+
+ if (streq(field, "Delegate")) {
+
+ r = parse_boolean(eq);
+ if (r < 0)
+ return bus_append_strv(m, "DelegateControllers", eq, EXTRACT_QUOTES);
+
+ r = sd_bus_message_append(m, "(sv)", "Delegate", "b", r);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "MemoryMin", "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit", "TasksMax")) {
+
+ if (isempty(eq) || streq(eq, "infinity")) {
+ r = sd_bus_message_append(m, "(sv)", field, "t", CGROUP_LIMIT_MAX);
+ if (r < 0)
+ return bus_log_create_error(r);
+ return 1;
+ }
+
+ r = parse_permille(eq);
+ if (r >= 0) {
+ char *n;
+
+ /* When this is a percentage we'll convert this into a relative value in the range 0…UINT32_MAX
+ * and pass it in the MemoryLowScale property (and related ones). This way the physical memory
+ * size can be determined server-side. */
+
+ n = strjoina(field, "Scale");
+ r = sd_bus_message_append(m, "(sv)", n, "u", (uint32_t) (((uint64_t) r * UINT32_MAX) / 1000U));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "TasksMax"))
+ return bus_append_safe_atou64(m, field, eq);
+
+ return bus_append_parse_size(m, field, eq, 1024);
+ }
+
+ if (streq(field, "CPUQuota")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", USEC_INFINITY);
+ else {
+ r = parse_permille_unbounded(eq);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE),
+ "CPU quota too small.");
+ if (r < 0)
+ return log_error_errno(r, "CPU quota '%s' invalid.", eq);
+
+ r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", (((uint64_t) r * USEC_PER_SEC) / 1000U));
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "DeviceAllow")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 0);
+ else {
+ const char *path = eq, *rwm = NULL, *e;
+
+ e = strchr(eq, ' ');
+ if (e) {
+ path = strndupa(eq, e - eq);
+ rwm = e+1;
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 1, path, strempty(rwm));
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (cgroup_io_limit_type_from_string(field) >= 0 || STR_IN_SET(field, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0);
+ else {
+ const char *path, *bandwidth, *e;
+ uint64_t bytes;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ bandwidth = e+1;
+
+ if (streq(bandwidth, "infinity"))
+ bytes = CGROUP_LIMIT_MAX;
+ else {
+ r = parse_size(bandwidth, 1000, &bytes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse byte value %s: %m", bandwidth);
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, bytes);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "IODeviceWeight", "BlockIODeviceWeight")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0);
+ else {
+ const char *path, *weight, *e;
+ uint64_t u;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ weight = e+1;
+
+ r = safe_atou64(weight, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, weight);
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, u);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "IODeviceLatencyTargetSec")) {
+ const char *field_usec = "IODeviceLatencyTargetUSec";
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", USEC_INFINITY);
+ else {
+ const char *path, *target, *e;
+ usec_t usec;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ target = e+1;
+
+ r = parse_sec(target, &usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, target);
+
+ r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", 1, path, usec);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "IPAddressAllow", "IPAddressDeny")) {
+ unsigned char prefixlen;
+ union in_addr_union prefix = {};
+ int family;
+
+ if (isempty(eq)) {
+ r = sd_bus_message_append(m, "(sv)", field, "a(iayu)", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(iayu)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(iayu)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (streq(eq, "any")) {
+ /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else if (is_localhost(eq)) {
+ /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+ prefix.in.s_addr = htobe32(0x7f000000);
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 8);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 128);
+ if (r < 0)
+ return r;
+
+ } else if (streq(eq, "link-local")) {
+ /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+ prefix.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 16);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xfe800000)
+ };
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 64);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else if (streq(eq, "multicast")) {
+ /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+ prefix.in.s_addr = htobe32((UINT32_C(224) << 24));
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 4);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xff000000)
+ };
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 8);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else {
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&eq, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %s", field, eq);
+
+ r = in_addr_prefix_from_string_auto(word, &family, &prefix, &prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse IP address prefix: %s", word);
+
+ r = bus_append_ip_address_access(m, family, &prefix, prefixlen);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_automount_property(sd_bus_message *m, const char *field, const char *eq) {
+
+ if (streq(field, "Where"))
+
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+
+ return bus_append_parse_mode(m, field, eq);
+
+ if (streq(field, "TimeoutIdleSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_execute_property(sd_bus_message *m, const char *field, const char *eq) {
+ const char *suffix;
+ int r;
+
+ if (STR_IN_SET(field,
+ "User", "Group",
+ "UtmpIdentifier", "UtmpMode", "PAMName", "TTYPath",
+ "WorkingDirectory", "RootDirectory", "SyslogIdentifier",
+ "ProtectSystem", "ProtectHome", "SELinuxContext", "RootImage",
+ "RuntimeDirectoryPreserve", "Personality", "KeyringMode"))
+
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate",
+ "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
+ "PrivateMounts", "NoNewPrivileges", "SyslogLevelPrefix",
+ "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC",
+ "ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups",
+ "MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories",
+ "ReadWritePaths", "ReadOnlyPaths", "InaccessiblePaths",
+ "RuntimeDirectory", "StateDirectory", "CacheDirectory", "LogsDirectory", "ConfigurationDirectory",
+ "SupplementaryGroups", "SystemCallArchitectures"))
+
+ return bus_append_strv(m, field, eq, EXTRACT_QUOTES);
+
+ if (STR_IN_SET(field, "SyslogLevel", "LogLevelMax"))
+
+ return bus_append_log_level_from_string(m, field, eq);
+
+ if (streq(field, "SyslogFacility"))
+
+ return bus_append_log_facility_unshifted_from_string(m, field, eq);
+
+ if (streq(field, "SecureBits"))
+
+ return bus_append_secure_bits_from_string(m, field, eq);
+
+ if (streq(field, "CPUSchedulingPolicy"))
+
+ return bus_append_sched_policy_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUSchedulingPriority", "OOMScoreAdjust"))
+
+ return bus_append_safe_atoi(m, field, eq);
+
+ if (streq(field, "Nice"))
+
+ return bus_append_parse_nice(m, field, eq);
+
+ if (streq(field, "SystemCallErrorNumber"))
+
+ return bus_append_parse_errno(m, field, eq);
+
+ if (streq(field, "IOSchedulingClass"))
+
+ return bus_append_ioprio_class_from_string(m, field, eq);
+
+ if (streq(field, "IOSchedulingPriority"))
+
+ return bus_append_ioprio_parse_priority(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "RuntimeDirectoryMode", "StateDirectoryMode", "CacheDirectoryMode",
+ "LogsDirectoryMode", "ConfigurationDirectoryMode", "UMask"))
+
+ return bus_append_parse_mode(m, field, eq);
+
+ if (streq(field, "TimerSlackNSec"))
+
+ return bus_append_parse_nsec(m, field, eq);
+
+ if (streq(field, "LogRateLimitIntervalSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "LogRateLimitBurst"))
+
+ return bus_append_safe_atou(m, field, eq);
+
+ if (streq(field, "MountFlags"))
+
+ return bus_append_mount_propagation_flags_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "Environment", "UnsetEnvironment", "PassEnvironment"))
+
+ return bus_append_strv(m, field, eq, EXTRACT_QUOTES|EXTRACT_CUNESCAPE);
+
+ if (streq(field, "EnvironmentFile")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 1,
+ eq[0] == '-' ? eq + 1 : eq,
+ eq[0] == '-');
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "LogExtraFields")) {
+
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', "LogExtraFields");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "aay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "ay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'y', eq, strlen(eq));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "StandardInput", "StandardOutput", "StandardError")) {
+ const char *n, *appended;
+
+ if ((n = startswith(eq, "fd:"))) {
+ appended = strjoina(field, "FileDescriptorName");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else if ((n = startswith(eq, "file:"))) {
+ appended = strjoina(field, "File");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else if ((n = startswith(eq, "append:"))) {
+ appended = strjoina(field, "FileToAppend");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else
+ r = sd_bus_message_append(m, "(sv)", field, "s", eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "StandardInputText")) {
+ _cleanup_free_ char *unescaped = NULL;
+
+ r = cunescape(eq, 0, &unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape text '%s': %m", eq);
+
+ if (!strextend(&unescaped, "\n", NULL))
+ return log_oom();
+
+ /* Note that we don't expand specifiers here, but that should be OK, as this is a programmatic
+ * interface anyway */
+
+ return bus_append_byte_array(m, field, unescaped, strlen(unescaped));
+ }
+
+ if (streq(field, "StandardInputData")) {
+ _cleanup_free_ void *decoded = NULL;
+ size_t sz;
+
+ r = unbase64mem(eq, (size_t) -1, &decoded, &sz);
+ if (r < 0)
+ return log_error_errno(r, "Failed to decode base64 data '%s': %m", eq);
+
+ return bus_append_byte_array(m, field, decoded, sz);
+ }
+
+ if ((suffix = startswith(field, "Limit"))) {
+ int rl;
+
+ rl = rlimit_from_string(suffix);
+ if (rl >= 0) {
+ const char *sn;
+ struct rlimit l;
+
+ r = rlimit_parse(rl, eq, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse resource limit: %s", eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", l.rlim_max);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ sn = strjoina(field, "Soft");
+ r = sd_bus_message_append(m, "(sv)", sn, "t", l.rlim_cur);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+ }
+
+ if (STR_IN_SET(field, "AppArmorProfile", "SmackProcessLabel")) {
+ int ignore = 0;
+ const char *s = eq;
+
+ if (eq[0] == '-') {
+ ignore = 1;
+ s = eq + 1;
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "(bs)", ignore, s);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "CapabilityBoundingSet", "AmbientCapabilities")) {
+ uint64_t sum = 0;
+ bool invert = false;
+ const char *p = eq;
+
+ if (*p == '~') {
+ invert = true;
+ p++;
+ }
+
+ r = capability_set_from_string(p, &sum);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, eq);
+
+ sum = invert ? ~sum : sum;
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", sum);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "CPUAffinity")) {
+ _cleanup_cpu_free_ cpu_set_t *cpuset = NULL;
+
+ r = parse_cpu_set(eq, &cpuset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+
+ return bus_append_byte_array(m, field, cpuset, CPU_ALLOC_SIZE(r));
+ }
+
+ if (STR_IN_SET(field, "RestrictAddressFamilies", "SystemCallFilter")) {
+ int whitelist = 1;
+ const char *p = eq;
+
+ if (*p == '~') {
+ whitelist = 0;
+ p++;
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "(bas)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "bas");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 'b', &whitelist);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax: %s", eq);
+
+ r = sd_bus_message_append_basic(m, 's', word);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "RestrictNamespaces")) {
+ bool invert = false;
+ unsigned long flags;
+
+ r = parse_boolean(eq);
+ if (r > 0)
+ flags = 0;
+ else if (r == 0)
+ flags = NAMESPACE_FLAGS_ALL;
+ else {
+ if (eq[0] == '~') {
+ invert = true;
+ eq++;
+ }
+
+ r = namespace_flags_from_string(eq, &flags);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s.", field, eq);
+ }
+
+ if (invert)
+ flags = (~flags) & NAMESPACE_FLAGS_ALL;
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", (uint64_t) flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "BindPaths", "BindReadOnlyPaths")) {
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ssbt)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ssbt)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *source = NULL, *destination = NULL;
+ char *s = NULL, *d = NULL;
+ bool ignore_enoent = false;
+ uint64_t flags = MS_REC;
+
+ r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ break;
+
+ s = source;
+ if (s[0] == '-') {
+ ignore_enoent = true;
+ s++;
+ }
+
+ if (p && p[-1] == ':') {
+ r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Missing argument after ':': %s",
+ eq);
+
+ d = destination;
+
+ if (p && p[-1] == ':') {
+ _cleanup_free_ char *options = NULL;
+
+ r = extract_first_word(&p, &options, NULL, EXTRACT_QUOTES);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+
+ if (isempty(options) || streq(options, "rbind"))
+ flags = MS_REC;
+ else if (streq(options, "norbind"))
+ flags = 0;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown options: %s",
+ eq);
+ }
+ } else
+ d = s;
+
+ r = sd_bus_message_append(m, "(ssbt)", s, d, ignore_enoent, flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "TemporaryFileSystem")) {
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *path = NULL;
+ const char *w;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ break;
+
+ w = word;
+ r = extract_first_word(&w, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse argument: %s",
+ p);
+
+ r = sd_bus_message_append(m, "(ss)", path, w);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_kill_property(sd_bus_message *m, const char *field, const char *eq) {
+
+ if (streq(field, "KillMode"))
+
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "SendSIGHUP", "SendSIGKILL"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "KillSignal", "FinalKillSignal", "WatchdogSignal"))
+
+ return bus_append_signal_from_string(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_mount_property(sd_bus_message *m, const char *field, const char *eq) {
+
+ if (STR_IN_SET(field, "What", "Where", "Options", "Type"))
+
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "TimeoutSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field, "SloppyOptions", "LazyUnmount", "ForceUnmount"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_path_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (streq(field, "MakeDirectory"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "PathExists", "PathExistsGlob", "PathChanged",
+ "PathModified", "DirectoryNotEmpty")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 1, field, eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_service_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field,
+ "PIDFile", "Type", "Restart", "BusName", "NotifyAccess",
+ "USBFunctionDescriptors", "USBFunctionStrings"))
+
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "PermissionsStartOnly", "RootDirectoryStartOnly", "RemainAfterExit", "GuessMainPID"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "RestartSec", "TimeoutStartSec", "TimeoutStopSec", "RuntimeMaxSec", "WatchdogSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "TimeoutSec")) {
+
+ r = bus_append_parse_sec_rename(m, "TimeoutStartSec", eq);
+ if (r < 0)
+ return r;
+
+ return bus_append_parse_sec_rename(m, "TimeoutStopSec", eq);
+ }
+
+ if (streq(field, "FileDescriptorStoreMax"))
+
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "ExecStartPre", "ExecStart", "ExecStartPost",
+ "ExecReload", "ExecStop", "ExecStopPost"))
+
+ return bus_append_exec_command(m, field, eq);
+
+ if (STR_IN_SET(field, "RestartPreventExitStatus", "RestartForceExitStatus", "SuccessExitStatus")) {
+ _cleanup_free_ int *status = NULL, *signal = NULL;
+ size_t sz_status = 0, sz_signal = 0;
+ const char *p;
+
+ for (p = eq;;) {
+ _cleanup_free_ char *word = NULL;
+ int val;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax in %s: %s", field, eq);
+
+ r = safe_atoi(word, &val);
+ if (r < 0) {
+ val = signal_from_string(word);
+ if (val < 0)
+ return log_error_errno(r, "Invalid status or signal %s in %s: %m", word, field);
+
+ signal = reallocarray(signal, sz_signal + 1, sizeof(int));
+ if (!signal)
+ return log_oom();
+
+ signal[sz_signal++] = val;
+ } else {
+ status = reallocarray(status, sz_status + 1, sizeof(int));
+ if (!status)
+ return log_oom();
+
+ status[sz_status++] = val;
+ }
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "(aiai)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "aiai");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'i', status, sz_status);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'i', signal, sz_signal);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_socket_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field,
+ "Accept", "Writable", "KeepAlive", "NoDelay", "FreeBind", "Transparent", "Broadcast",
+ "PassCredentials", "PassSecurity", "ReusePort", "RemoveOnStop", "SELinuxContextFromNet"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "Priority", "IPTTL", "Mark"))
+
+ return bus_append_safe_atoi(m, field, eq);
+
+ if (streq(field, "IPTOS"))
+
+ return bus_append_ip_tos_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "Backlog", "MaxConnections", "MaxConnectionsPerSource", "KeepAliveProbes", "TriggerLimitBurst"))
+
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field, "SocketMode", "DirectoryMode"))
+
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field, "MessageQueueMaxMessages", "MessageQueueMessageSize"))
+
+ return bus_append_safe_atoi64(m, field, eq);
+
+ if (STR_IN_SET(field, "TimeoutSec", "KeepAliveTimeSec", "KeepAliveIntervalSec", "DeferAcceptSec", "TriggerLimitIntervalSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (STR_IN_SET(field, "ReceiveBuffer", "SendBuffer", "PipeSize"))
+
+ return bus_append_parse_size(m, field, eq, 1024);
+
+ if (STR_IN_SET(field, "ExecStartPre", "ExecStartPost", "ExecReload", "ExecStopPost"))
+
+ return bus_append_exec_command(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "SmackLabel", "SmackLabelIPIn", "SmackLabelIPOut", "TCPCongestion",
+ "BindToDevice", "BindIPv6Only", "FileDescriptorName",
+ "SocketUser", "SocketGroup"))
+
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "Symlinks"))
+
+ return bus_append_strv(m, field, eq, EXTRACT_QUOTES);
+
+ if (streq(field, "SocketProtocol"))
+
+ return bus_append_parse_ip_protocol(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "ListenStream", "ListenDatagram", "ListenSequentialPacket", "ListenNetlink",
+ "ListenSpecial", "ListenMessageQueue", "ListenFIFO", "ListenUSBFunction")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 1, field + STRLEN("Listen"), eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+static int bus_append_timer_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "WakeSystem", "RemainAfterElapse", "Persistent"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "AccuracySec", "RandomizedDelaySec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "OnActiveSec", "OnBootSec", "OnStartupSec",
+ "OnUnitActiveSec","OnUnitInactiveSec")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 0);
+ else {
+ usec_t t;
+ r = parse_sec(eq, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 1, field, t);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "OnCalendar")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 1, field, eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_unit_property(sd_bus_message *m, const char *field, const char *eq) {
+ ConditionType t = _CONDITION_TYPE_INVALID;
+ bool is_condition = false;
+ int r;
+
+ if (STR_IN_SET(field,
+ "Description", "SourcePath", "OnFailureJobMode",
+ "JobTimeoutAction", "JobTimeoutRebootArgument",
+ "StartLimitAction", "FailureAction", "SuccessAction",
+ "RebootArgument", "CollectMode"))
+
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field,
+ "StopWhenUnneeded", "RefuseManualStart", "RefuseManualStop",
+ "AllowIsolate", "IgnoreOnIsolate", "DefaultDependencies"))
+
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "JobTimeoutSec", "JobRunningTimeoutSec", "StartLimitIntervalSec"))
+
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "StartLimitBurst"))
+
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field, "SuccessActionExitStatus", "FailureActionExitStatus")) {
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "i", -1);
+ else {
+ uint8_t u;
+
+ r = safe_atou8(eq, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "i", (int) u);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (unit_dependency_from_string(field) >= 0 ||
+ STR_IN_SET(field, "Documentation", "RequiresMountsFor"))
+
+ return bus_append_strv(m, field, eq, EXTRACT_QUOTES);
+
+ t = condition_type_from_string(field);
+ if (t >= 0)
+ is_condition = true;
+ else
+ t = assert_type_from_string(field);
+ if (t >= 0) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 0);
+ else {
+ const char *p = eq;
+ int trigger, negate;
+
+ trigger = *p == '|';
+ if (trigger)
+ p++;
+
+ negate = *p == '!';
+ if (negate)
+ p++;
+
+ r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 1,
+ field, trigger, negate, p);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment) {
+ const char *eq, *field;
+ int r;
+
+ assert(m);
+ assert(assignment);
+
+ eq = strchr(assignment, '=');
+ if (!eq)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not an assignment: %s", assignment);
+
+ field = strndupa(assignment, eq - assignment);
+ eq++;
+
+ switch (t) {
+ case UNIT_SERVICE:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_service_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SOCKET:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_socket_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_TIMER:
+ r = bus_append_timer_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_PATH:
+ r = bus_append_path_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SLICE:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SCOPE:
+
+ if (streq(field, "TimeoutStopSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_MOUNT:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_mount_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ break;
+
+ case UNIT_AUTOMOUNT:
+ r = bus_append_automount_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ break;
+
+ case UNIT_TARGET:
+ case UNIT_DEVICE:
+ case UNIT_SWAP:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not supported unit type");
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid unit type");
+ }
+
+ r = bus_append_unit_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown assignment: %s", assignment);
+}
+
+int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l) {
+ char **i;
+ int r;
+
+ assert(m);
+
+ STRV_FOREACH(i, l) {
+ r = bus_append_unit_property_assignment(m, t, *i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+typedef struct BusWaitForJobs {
+ sd_bus *bus;
+ Set *jobs;
+
+ char *name;
+ char *result;
+
+ sd_bus_slot *slot_job_removed;
+ sd_bus_slot *slot_disconnected;
+} BusWaitForJobs;
+
+static int match_disconnected(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ assert(m);
+
+ log_error("Warning! D-Bus connection terminated.");
+ sd_bus_close(sd_bus_message_get_bus(m));
+
+ return 0;
+}
+
+static int match_job_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ const char *path, *unit, *result;
+ BusWaitForJobs *d = userdata;
+ uint32_t id;
+ char *found;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ r = sd_bus_message_read(m, "uoss", &id, &path, &unit, &result);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ found = set_remove(d->jobs, (char*) path);
+ if (!found)
+ return 0;
+
+ free(found);
+
+ if (!isempty(result))
+ d->result = strdup(result);
+
+ if (!isempty(unit))
+ d->name = strdup(unit);
+
+ return 0;
+}
+
+void bus_wait_for_jobs_free(BusWaitForJobs *d) {
+ if (!d)
+ return;
+
+ set_free_free(d->jobs);
+
+ sd_bus_slot_unref(d->slot_disconnected);
+ sd_bus_slot_unref(d->slot_job_removed);
+
+ sd_bus_unref(d->bus);
+
+ free(d->name);
+ free(d->result);
+
+ free(d);
+}
+
+int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret) {
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *d = NULL;
+ int r;
+
+ assert(bus);
+ assert(ret);
+
+ d = new0(BusWaitForJobs, 1);
+ if (!d)
+ return -ENOMEM;
+
+ d->bus = sd_bus_ref(bus);
+
+ /* When we are a bus client we match by sender. Direct
+ * connections OTOH have no initialized sender field, and
+ * hence we ignore the sender then */
+ r = sd_bus_match_signal_async(
+ bus,
+ &d->slot_job_removed,
+ bus->bus_client ? "org.freedesktop.systemd1" : NULL,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobRemoved",
+ match_job_removed, NULL, d);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_match_signal_async(
+ bus,
+ &d->slot_disconnected,
+ "org.freedesktop.DBus.Local",
+ NULL,
+ "org.freedesktop.DBus.Local",
+ "Disconnected",
+ match_disconnected, NULL, d);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(d);
+
+ return 0;
+}
+
+static int bus_process_wait(sd_bus *bus) {
+ int r;
+
+ for (;;) {
+ r = sd_bus_process(bus, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+}
+
+static int bus_job_get_service_result(BusWaitForJobs *d, char **result) {
+ _cleanup_free_ char *dbus_path = NULL;
+
+ assert(d);
+ assert(d->name);
+ assert(result);
+
+ if (!endswith(d->name, ".service"))
+ return -EINVAL;
+
+ dbus_path = unit_dbus_path_from_name(d->name);
+ if (!dbus_path)
+ return -ENOMEM;
+
+ return sd_bus_get_property_string(d->bus,
+ "org.freedesktop.systemd1",
+ dbus_path,
+ "org.freedesktop.systemd1.Service",
+ "Result",
+ NULL,
+ result);
+}
+
+static const struct {
+ const char *result, *explanation;
+} explanations [] = {
+ { "resources", "of unavailable resources or another system error" },
+ { "protocol", "the service did not take the steps required by its unit configuration" },
+ { "timeout", "a timeout was exceeded" },
+ { "exit-code", "the control process exited with error code" },
+ { "signal", "a fatal signal was delivered to the control process" },
+ { "core-dump", "a fatal signal was delivered causing the control process to dump core" },
+ { "watchdog", "the service failed to send watchdog ping" },
+ { "start-limit", "start of the service was attempted too often" }
+};
+
+static void log_job_error_with_service_result(const char* service, const char *result, const char* const* extra_args) {
+ _cleanup_free_ char *service_shell_quoted = NULL;
+ const char *systemctl = "systemctl", *journalctl = "journalctl";
+
+ assert(service);
+
+ service_shell_quoted = shell_maybe_quote(service, ESCAPE_BACKSLASH);
+
+ if (!strv_isempty((char**) extra_args)) {
+ _cleanup_free_ char *t;
+
+ t = strv_join((char**) extra_args, " ");
+ systemctl = strjoina("systemctl ", t ? : "<args>");
+ journalctl = strjoina("journalctl ", t ? : "<args>");
+ }
+
+ if (!isempty(result)) {
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(explanations); ++i)
+ if (streq(result, explanations[i].result))
+ break;
+
+ if (i < ELEMENTSOF(explanations)) {
+ log_error("Job for %s failed because %s.\n"
+ "See \"%s status %s\" and \"%s -xe\" for details.\n",
+ service,
+ explanations[i].explanation,
+ systemctl,
+ service_shell_quoted ?: "<service>",
+ journalctl);
+ goto finish;
+ }
+ }
+
+ log_error("Job for %s failed.\n"
+ "See \"%s status %s\" and \"%s -xe\" for details.\n",
+ service,
+ systemctl,
+ service_shell_quoted ?: "<service>",
+ journalctl);
+
+finish:
+ /* For some results maybe additional explanation is required */
+ if (streq_ptr(result, "start-limit"))
+ log_info("To force a start use \"%1$s reset-failed %2$s\"\n"
+ "followed by \"%1$s start %2$s\" again.",
+ systemctl,
+ service_shell_quoted ?: "<service>");
+}
+
+static int check_wait_response(BusWaitForJobs *d, bool quiet, const char* const* extra_args) {
+ assert(d->result);
+
+ if (!quiet) {
+ if (streq(d->result, "canceled"))
+ log_error("Job for %s canceled.", strna(d->name));
+ else if (streq(d->result, "timeout"))
+ log_error("Job for %s timed out.", strna(d->name));
+ else if (streq(d->result, "dependency"))
+ log_error("A dependency job for %s failed. See 'journalctl -xe' for details.", strna(d->name));
+ else if (streq(d->result, "invalid"))
+ log_error("%s is not active, cannot reload.", strna(d->name));
+ else if (streq(d->result, "assert"))
+ log_error("Assertion failed on job for %s.", strna(d->name));
+ else if (streq(d->result, "unsupported"))
+ log_error("Operation on or unit type of %s not supported on this system.", strna(d->name));
+ else if (streq(d->result, "collected"))
+ log_error("Queued job for %s was garbage collected.", strna(d->name));
+ else if (streq(d->result, "once"))
+ log_error("Unit %s was started already once and can't be started again.", strna(d->name));
+ else if (!STR_IN_SET(d->result, "done", "skipped")) {
+ if (d->name) {
+ _cleanup_free_ char *result = NULL;
+ int q;
+
+ q = bus_job_get_service_result(d, &result);
+ if (q < 0)
+ log_debug_errno(q, "Failed to get Result property of unit %s: %m", d->name);
+
+ log_job_error_with_service_result(d->name, result, extra_args);
+ } else
+ log_error("Job failed. See \"journalctl -xe\" for details.");
+ }
+ }
+
+ if (STR_IN_SET(d->result, "canceled", "collected"))
+ return -ECANCELED;
+ else if (streq(d->result, "timeout"))
+ return -ETIME;
+ else if (streq(d->result, "dependency"))
+ return -EIO;
+ else if (streq(d->result, "invalid"))
+ return -ENOEXEC;
+ else if (streq(d->result, "assert"))
+ return -EPROTO;
+ else if (streq(d->result, "unsupported"))
+ return -EOPNOTSUPP;
+ else if (streq(d->result, "once"))
+ return -ESTALE;
+ else if (STR_IN_SET(d->result, "done", "skipped"))
+ return 0;
+
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "Unexpected job result, assuming server side newer than us: %s", d->result);
+}
+
+int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args) {
+ int r = 0;
+
+ assert(d);
+
+ while (!set_isempty(d->jobs)) {
+ int q;
+
+ q = bus_process_wait(d->bus);
+ if (q < 0)
+ return log_error_errno(q, "Failed to wait for response: %m");
+
+ if (d->result) {
+ q = check_wait_response(d, quiet, extra_args);
+ /* Return the first error as it is most likely to be
+ * meaningful. */
+ if (q < 0 && r == 0)
+ r = q;
+
+ log_debug_errno(q, "Got result %s/%m for job %s", strna(d->result), strna(d->name));
+ }
+
+ d->name = mfree(d->name);
+ d->result = mfree(d->result);
+ }
+
+ return r;
+}
+
+int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path) {
+ int r;
+
+ assert(d);
+
+ r = set_ensure_allocated(&d->jobs, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ return set_put_strdup(d->jobs, path);
+}
+
+int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet) {
+ int r;
+
+ r = bus_wait_for_jobs_add(d, path);
+ if (r < 0)
+ return log_oom();
+
+ return bus_wait_for_jobs(d, quiet, NULL);
+}
+
+int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes) {
+ const char *type, *path, *source;
+ int r;
+
+ /* changes is dereferenced when calling unit_file_dump_changes() later,
+ * so we have to make sure this is not NULL. */
+ assert(changes);
+ assert(n_changes);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(sss)", &type, &path, &source)) > 0) {
+ /* We expect only "success" changes to be sent over the bus.
+ Hence, reject anything negative. */
+ UnitFileChangeType ch = unit_file_change_type_from_string(type);
+
+ if (ch < 0) {
+ log_notice("Manager reported unknown change type \"%s\" for path \"%s\", ignoring.", type, path);
+ continue;
+ }
+
+ r = unit_file_changes_add(changes, n_changes, ch, path, source);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ unit_file_dump_changes(0, NULL, *changes, *n_changes, quiet);
+ return 0;
+}
+
+struct CGroupInfo {
+ char *cgroup_path;
+ bool is_const; /* If false, cgroup_path should be free()'d */
+
+ Hashmap *pids; /* PID → process name */
+ bool done;
+
+ struct CGroupInfo *parent;
+ LIST_FIELDS(struct CGroupInfo, siblings);
+ LIST_HEAD(struct CGroupInfo, children);
+ size_t n_children;
+};
+
+static bool IS_ROOT(const char *p) {
+ return isempty(p) || streq(p, "/");
+}
+
+static int add_cgroup(Hashmap *cgroups, const char *path, bool is_const, struct CGroupInfo **ret) {
+ struct CGroupInfo *parent = NULL, *cg;
+ int r;
+
+ assert(cgroups);
+ assert(ret);
+
+ if (IS_ROOT(path))
+ path = "/";
+
+ cg = hashmap_get(cgroups, path);
+ if (cg) {
+ *ret = cg;
+ return 0;
+ }
+
+ if (!IS_ROOT(path)) {
+ const char *e, *pp;
+
+ e = strrchr(path, '/');
+ if (!e)
+ return -EINVAL;
+
+ pp = strndupa(path, e - path);
+ if (!pp)
+ return -ENOMEM;
+
+ r = add_cgroup(cgroups, pp, false, &parent);
+ if (r < 0)
+ return r;
+ }
+
+ cg = new0(struct CGroupInfo, 1);
+ if (!cg)
+ return -ENOMEM;
+
+ if (is_const)
+ cg->cgroup_path = (char*) path;
+ else {
+ cg->cgroup_path = strdup(path);
+ if (!cg->cgroup_path) {
+ free(cg);
+ return -ENOMEM;
+ }
+ }
+
+ cg->is_const = is_const;
+ cg->parent = parent;
+
+ r = hashmap_put(cgroups, cg->cgroup_path, cg);
+ if (r < 0) {
+ if (!is_const)
+ free(cg->cgroup_path);
+ free(cg);
+ return r;
+ }
+
+ if (parent) {
+ LIST_PREPEND(siblings, parent->children, cg);
+ parent->n_children++;
+ }
+
+ *ret = cg;
+ return 1;
+}
+
+static int add_process(
+ Hashmap *cgroups,
+ const char *path,
+ pid_t pid,
+ const char *name) {
+
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(cgroups);
+ assert(name);
+ assert(pid > 0);
+
+ r = add_cgroup(cgroups, path, true, &cg);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&cg->pids, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ return hashmap_put(cg->pids, PID_TO_PTR(pid), (void*) name);
+}
+
+static void remove_cgroup(Hashmap *cgroups, struct CGroupInfo *cg) {
+ assert(cgroups);
+ assert(cg);
+
+ while (cg->children)
+ remove_cgroup(cgroups, cg->children);
+
+ hashmap_remove(cgroups, cg->cgroup_path);
+
+ if (!cg->is_const)
+ free(cg->cgroup_path);
+
+ hashmap_free(cg->pids);
+
+ if (cg->parent)
+ LIST_REMOVE(siblings, cg->parent->children, cg);
+
+ free(cg);
+}
+
+static int cgroup_info_compare_func(struct CGroupInfo * const *a, struct CGroupInfo * const *b) {
+ return strcmp((*a)->cgroup_path, (*b)->cgroup_path);
+}
+
+static int dump_processes(
+ Hashmap *cgroups,
+ const char *cgroup_path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(prefix);
+
+ if (IS_ROOT(cgroup_path))
+ cgroup_path = "/";
+
+ cg = hashmap_get(cgroups, cgroup_path);
+ if (!cg)
+ return 0;
+
+ if (!hashmap_isempty(cg->pids)) {
+ const char *name;
+ size_t n = 0, i;
+ pid_t *pids;
+ void *pidp;
+ Iterator j;
+ int width;
+
+ /* Order processes by their PID */
+ pids = newa(pid_t, hashmap_size(cg->pids));
+
+ HASHMAP_FOREACH_KEY(name, pidp, cg->pids, j)
+ pids[n++] = PTR_TO_PID(pidp);
+
+ assert(n == hashmap_size(cg->pids));
+ typesafe_qsort(pids, n, pid_compare_func);
+
+ width = DECIMAL_STR_WIDTH(pids[n-1]);
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *e = NULL;
+ const char *special;
+ bool more;
+
+ name = hashmap_get(cg->pids, PID_TO_PTR(pids[i]));
+ assert(name);
+
+ if (n_columns != 0) {
+ unsigned k;
+
+ k = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U);
+
+ e = ellipsize(name, k, 100);
+ if (e)
+ name = e;
+ }
+
+ more = i+1 < n || cg->children;
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT);
+
+ fprintf(stdout, "%s%s%*"PID_PRI" %s\n",
+ prefix,
+ special,
+ width, pids[i],
+ name);
+ }
+ }
+
+ if (cg->children) {
+ struct CGroupInfo **children, *child;
+ size_t n = 0, i;
+
+ /* Order subcgroups by their name */
+ children = newa(struct CGroupInfo*, cg->n_children);
+ LIST_FOREACH(siblings, child, cg->children)
+ children[n++] = child;
+ assert(n == cg->n_children);
+ typesafe_qsort(children, n, cgroup_info_compare_func);
+
+ if (n_columns != 0)
+ n_columns = MAX(LESS_BY(n_columns, 2U), 20U);
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *pp = NULL;
+ const char *name, *special;
+ bool more;
+
+ child = children[i];
+
+ name = strrchr(child->cgroup_path, '/');
+ if (!name)
+ return -EINVAL;
+ name++;
+
+ more = i+1 < n;
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT);
+
+ fputs(prefix, stdout);
+ fputs(special, stdout);
+ fputs(name, stdout);
+ fputc('\n', stdout);
+
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_VERTICAL : SPECIAL_GLYPH_TREE_SPACE);
+
+ pp = strappend(prefix, special);
+ if (!pp)
+ return -ENOMEM;
+
+ r = dump_processes(cgroups, child->cgroup_path, pp, n_columns, flags);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ cg->done = true;
+ return 0;
+}
+
+static int dump_extra_processes(
+ Hashmap *cgroups,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ _cleanup_free_ pid_t *pids = NULL;
+ _cleanup_hashmap_free_ Hashmap *names = NULL;
+ struct CGroupInfo *cg;
+ size_t n_allocated = 0, n = 0, k;
+ Iterator i;
+ int width, r;
+
+ /* Prints the extra processes, i.e. those that are in cgroups we haven't displayed yet. We show them as
+ * combined, sorted, linear list. */
+
+ HASHMAP_FOREACH(cg, cgroups, i) {
+ const char *name;
+ void *pidp;
+ Iterator j;
+
+ if (cg->done)
+ continue;
+
+ if (hashmap_isempty(cg->pids))
+ continue;
+
+ r = hashmap_ensure_allocated(&names, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC(pids, n_allocated, n + hashmap_size(cg->pids)))
+ return -ENOMEM;
+
+ HASHMAP_FOREACH_KEY(name, pidp, cg->pids, j) {
+ pids[n++] = PTR_TO_PID(pidp);
+
+ r = hashmap_put(names, pidp, (void*) name);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (n == 0)
+ return 0;
+
+ typesafe_qsort(pids, n, pid_compare_func);
+ width = DECIMAL_STR_WIDTH(pids[n-1]);
+
+ for (k = 0; k < n; k++) {
+ _cleanup_free_ char *e = NULL;
+ const char *name;
+
+ name = hashmap_get(names, PID_TO_PTR(pids[k]));
+ assert(name);
+
+ if (n_columns != 0) {
+ unsigned z;
+
+ z = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U);
+
+ e = ellipsize(name, z, 100);
+ if (e)
+ name = e;
+ }
+
+ fprintf(stdout, "%s%s %*" PID_PRI " %s\n",
+ prefix,
+ special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET),
+ width, pids[k],
+ name);
+ }
+
+ return 0;
+}
+
+int unit_show_processes(
+ sd_bus *bus,
+ const char *unit,
+ const char *cgroup_path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Hashmap *cgroups = NULL;
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(bus);
+ assert(unit);
+
+ if (flags & OUTPUT_FULL_WIDTH)
+ n_columns = 0;
+ else if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "GetUnitProcesses",
+ error,
+ &reply,
+ "s",
+ unit);
+ if (r < 0)
+ return r;
+
+ cgroups = hashmap_new(&path_hash_ops);
+ if (!cgroups)
+ return -ENOMEM;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(sus)");
+ if (r < 0)
+ goto finish;
+
+ for (;;) {
+ const char *path = NULL, *name = NULL;
+ uint32_t pid;
+
+ r = sd_bus_message_read(reply, "(sus)", &path, &pid, &name);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ break;
+
+ r = add_process(cgroups, path, pid, name);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto finish;
+
+ r = dump_processes(cgroups, cgroup_path, prefix, n_columns, flags);
+ if (r < 0)
+ goto finish;
+
+ r = dump_extra_processes(cgroups, prefix, n_columns, flags);
+
+finish:
+ while ((cg = hashmap_first(cgroups)))
+ remove_cgroup(cgroups, cg);
+
+ hashmap_free(cgroups);
+
+ return r;
+}
+
+int unit_load_state(sd_bus *bus, const char *name, char **load_state) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return log_oom();
+
+ /* This function warns on it's own, because otherwise it'd be awkward to pass
+ * the dbus error message around. */
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "LoadState",
+ &error,
+ load_state);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get load state of %s: %s", name, bus_error_message(&error, r));
+
+ return 0;
+}
diff --git a/src/shared/bus-unit-util.h b/src/shared/bus-unit-util.h
new file mode 100644
index 0000000..4fc94b0
--- /dev/null
+++ b/src/shared/bus-unit-util.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "install.h"
+#include "output-mode.h"
+#include "sd-bus.h"
+#include "unit-def.h"
+
+typedef struct UnitInfo {
+ const char *machine;
+ const char *id;
+ const char *description;
+ const char *load_state;
+ const char *active_state;
+ const char *sub_state;
+ const char *following;
+ const char *unit_path;
+ uint32_t job_id;
+ const char *job_type;
+ const char *job_path;
+} UnitInfo;
+
+int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u);
+
+int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment);
+int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l);
+
+typedef struct BusWaitForJobs BusWaitForJobs;
+
+int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret);
+void bus_wait_for_jobs_free(BusWaitForJobs *d);
+int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path);
+int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args);
+int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BusWaitForJobs*, bus_wait_for_jobs_free);
+
+int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes);
+
+int unit_show_processes(sd_bus *bus, const char *unit, const char *cgroup_path, const char *prefix, unsigned n_columns, OutputFlags flags, sd_bus_error *error);
+
+int unit_load_state(sd_bus *bus, const char *name, char **load_state);
diff --git a/src/shared/bus-util.c b/src/shared/bus-util.c
new file mode 100644
index 0000000..cbcf698
--- /dev/null
+++ b/src/shared/bus-util.c
@@ -0,0 +1,1753 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "sd-bus-protocol.h"
+#include "sd-bus.h"
+#include "sd-daemon.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "bus-internal.h"
+#include "bus-label.h"
+#include "bus-message.h"
+#include "bus-util.h"
+#include "cap-list.h"
+#include "cgroup-util.h"
+#include "def.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "missing.h"
+#include "mountpoint-util.h"
+#include "nsflags.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "rlimit-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int name_owner_change_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ sd_event *e = userdata;
+
+ assert(m);
+ assert(e);
+
+ sd_bus_close(sd_bus_message_get_bus(m));
+ sd_event_exit(e, 0);
+
+ return 1;
+}
+
+int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name) {
+ const char *match;
+ const char *unique;
+ int r;
+
+ assert(e);
+ assert(bus);
+ assert(name);
+
+ /* We unregister the name here and then wait for the
+ * NameOwnerChanged signal for this event to arrive before we
+ * quit. We do this in order to make sure that any queued
+ * requests are still processed before we really exit. */
+
+ r = sd_bus_get_unique_name(bus, &unique);
+ if (r < 0)
+ return r;
+
+ match = strjoina(
+ "sender='org.freedesktop.DBus',"
+ "type='signal',"
+ "interface='org.freedesktop.DBus',"
+ "member='NameOwnerChanged',"
+ "path='/org/freedesktop/DBus',"
+ "arg0='", name, "',",
+ "arg1='", unique, "',",
+ "arg2=''");
+
+ r = sd_bus_add_match_async(bus, NULL, match, name_owner_change_callback, NULL, e);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_release_name_async(bus, NULL, name, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int bus_event_loop_with_idle(
+ sd_event *e,
+ sd_bus *bus,
+ const char *name,
+ usec_t timeout,
+ check_idle_t check_idle,
+ void *userdata) {
+ bool exiting = false;
+ int r, code;
+
+ assert(e);
+ assert(bus);
+ assert(name);
+
+ for (;;) {
+ bool idle;
+
+ r = sd_event_get_state(e);
+ if (r < 0)
+ return r;
+ if (r == SD_EVENT_FINISHED)
+ break;
+
+ if (check_idle)
+ idle = check_idle(userdata);
+ else
+ idle = true;
+
+ r = sd_event_run(e, exiting || !idle ? (uint64_t) -1 : timeout);
+ if (r < 0)
+ return r;
+
+ if (r == 0 && !exiting && idle) {
+
+ r = sd_bus_try_close(bus);
+ if (r == -EBUSY)
+ continue;
+
+ /* Fallback for dbus1 connections: we
+ * unregister the name and wait for the
+ * response to come through for it */
+ if (r == -EOPNOTSUPP) {
+
+ /* Inform the service manager that we
+ * are going down, so that it will
+ * queue all further start requests,
+ * instead of assuming we are already
+ * running. */
+ sd_notify(false, "STOPPING=1");
+
+ r = bus_async_unregister_and_exit(e, bus, name);
+ if (r < 0)
+ return r;
+
+ exiting = true;
+ continue;
+ }
+
+ if (r < 0)
+ return r;
+
+ sd_event_exit(e, 0);
+ break;
+ }
+ }
+
+ r = sd_event_get_exit_code(e, &code);
+ if (r < 0)
+ return r;
+
+ return code;
+}
+
+int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *rep = NULL;
+ int r, has_owner = 0;
+
+ assert(c);
+ assert(name);
+
+ r = sd_bus_call_method(c,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/dbus",
+ "org.freedesktop.DBus",
+ "NameHasOwner",
+ error,
+ &rep,
+ "s",
+ name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(rep, 'b', &has_owner);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ return has_owner;
+}
+
+static int check_good_user(sd_bus_message *m, uid_t good_user) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ uid_t sender_uid;
+ int r;
+
+ assert(m);
+
+ if (good_user == UID_INVALID)
+ return 0;
+
+ r = sd_bus_query_sender_creds(m, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ /* Don't trust augmented credentials for authorization */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_EUID) == 0, -EPERM);
+
+ r = sd_bus_creds_get_euid(creds, &sender_uid);
+ if (r < 0)
+ return r;
+
+ return sender_uid == good_user;
+}
+
+int bus_test_polkit(
+ sd_bus_message *call,
+ int capability,
+ const char *action,
+ const char **details,
+ uid_t good_user,
+ bool *_challenge,
+ sd_bus_error *e) {
+
+ int r;
+
+ assert(call);
+ assert(action);
+
+ /* Tests non-interactively! */
+
+ r = check_good_user(call, good_user);
+ if (r != 0)
+ return r;
+
+ r = sd_bus_query_sender_privilege(call, capability);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ return 1;
+#if ENABLE_POLKIT
+ else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *request = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int authorized = false, challenge = false;
+ const char *sender, **k, **v;
+
+ sender = sd_bus_message_get_sender(call);
+ if (!sender)
+ return -EBADMSG;
+
+ r = sd_bus_message_new_method_call(
+ call->bus,
+ &request,
+ "org.freedesktop.PolicyKit1",
+ "/org/freedesktop/PolicyKit1/Authority",
+ "org.freedesktop.PolicyKit1.Authority",
+ "CheckAuthorization");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(
+ request,
+ "(sa{sv})s",
+ "system-bus-name", 1, "name", "s", sender,
+ action);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(request, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(k, v, details) {
+ r = sd_bus_message_append(request, "{ss}", *k, *v);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(request);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(request, "us", 0, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(call->bus, request, 0, e, &reply);
+ if (r < 0) {
+ /* Treat no PK available as access denied */
+ if (sd_bus_error_has_name(e, SD_BUS_ERROR_SERVICE_UNKNOWN)) {
+ sd_bus_error_free(e);
+ return -EACCES;
+ }
+
+ return r;
+ }
+
+ r = sd_bus_message_enter_container(reply, 'r', "bba{ss}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "bb", &authorized, &challenge);
+ if (r < 0)
+ return r;
+
+ if (authorized)
+ return 1;
+
+ if (_challenge) {
+ *_challenge = challenge;
+ return 0;
+ }
+ }
+#endif
+
+ return -EACCES;
+}
+
+#if ENABLE_POLKIT
+
+typedef struct AsyncPolkitQuery {
+ sd_bus_message *request, *reply;
+ sd_bus_message_handler_t callback;
+ void *userdata;
+ sd_bus_slot *slot;
+ Hashmap *registry;
+} AsyncPolkitQuery;
+
+static void async_polkit_query_free(AsyncPolkitQuery *q) {
+
+ if (!q)
+ return;
+
+ sd_bus_slot_unref(q->slot);
+
+ if (q->registry && q->request)
+ hashmap_remove(q->registry, q->request);
+
+ sd_bus_message_unref(q->request);
+ sd_bus_message_unref(q->reply);
+
+ free(q);
+}
+
+static int async_polkit_callback(sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL;
+ AsyncPolkitQuery *q = userdata;
+ int r;
+
+ assert(reply);
+ assert(q);
+
+ q->slot = sd_bus_slot_unref(q->slot);
+ q->reply = sd_bus_message_ref(reply);
+
+ r = sd_bus_message_rewind(q->request, true);
+ if (r < 0) {
+ r = sd_bus_reply_method_errno(q->request, r, NULL);
+ goto finish;
+ }
+
+ r = q->callback(q->request, q->userdata, &error_buffer);
+ r = bus_maybe_reply_error(q->request, r, &error_buffer);
+
+finish:
+ async_polkit_query_free(q);
+
+ return r;
+}
+
+#endif
+
+int bus_verify_polkit_async(
+ sd_bus_message *call,
+ int capability,
+ const char *action,
+ const char **details,
+ bool interactive,
+ uid_t good_user,
+ Hashmap **registry,
+ sd_bus_error *error) {
+
+#if ENABLE_POLKIT
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *pk = NULL;
+ AsyncPolkitQuery *q;
+ const char *sender, **k, **v;
+ sd_bus_message_handler_t callback;
+ void *userdata;
+ int c;
+#endif
+ int r;
+
+ assert(call);
+ assert(action);
+ assert(registry);
+
+ r = check_good_user(call, good_user);
+ if (r != 0)
+ return r;
+
+#if ENABLE_POLKIT
+ q = hashmap_get(*registry, call);
+ if (q) {
+ int authorized, challenge;
+
+ /* This is the second invocation of this function, and
+ * there's already a response from polkit, let's
+ * process it */
+ assert(q->reply);
+
+ if (sd_bus_message_is_method_error(q->reply, NULL)) {
+ const sd_bus_error *e;
+
+ /* Copy error from polkit reply */
+ e = sd_bus_message_get_error(q->reply);
+ sd_bus_error_copy(error, e);
+
+ /* Treat no PK available as access denied */
+ if (sd_bus_error_has_name(e, SD_BUS_ERROR_SERVICE_UNKNOWN))
+ return -EACCES;
+
+ return -sd_bus_error_get_errno(e);
+ }
+
+ r = sd_bus_message_enter_container(q->reply, 'r', "bba{ss}");
+ if (r >= 0)
+ r = sd_bus_message_read(q->reply, "bb", &authorized, &challenge);
+
+ if (r < 0)
+ return r;
+
+ if (authorized)
+ return 1;
+
+ if (challenge)
+ return sd_bus_error_set(error, SD_BUS_ERROR_INTERACTIVE_AUTHORIZATION_REQUIRED, "Interactive authentication required.");
+
+ return -EACCES;
+ }
+#endif
+
+ r = sd_bus_query_sender_privilege(call, capability);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ return 1;
+
+#if ENABLE_POLKIT
+ if (sd_bus_get_current_message(call->bus) != call)
+ return -EINVAL;
+
+ callback = sd_bus_get_current_handler(call->bus);
+ if (!callback)
+ return -EINVAL;
+
+ userdata = sd_bus_get_current_userdata(call->bus);
+
+ sender = sd_bus_message_get_sender(call);
+ if (!sender)
+ return -EBADMSG;
+
+ c = sd_bus_message_get_allow_interactive_authorization(call);
+ if (c < 0)
+ return c;
+ if (c > 0)
+ interactive = true;
+
+ r = hashmap_ensure_allocated(registry, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(
+ call->bus,
+ &pk,
+ "org.freedesktop.PolicyKit1",
+ "/org/freedesktop/PolicyKit1/Authority",
+ "org.freedesktop.PolicyKit1.Authority",
+ "CheckAuthorization");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(
+ pk,
+ "(sa{sv})s",
+ "system-bus-name", 1, "name", "s", sender,
+ action);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(pk, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(k, v, details) {
+ r = sd_bus_message_append(pk, "{ss}", *k, *v);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(pk);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(pk, "us", interactive, NULL);
+ if (r < 0)
+ return r;
+
+ q = new0(AsyncPolkitQuery, 1);
+ if (!q)
+ return -ENOMEM;
+
+ q->request = sd_bus_message_ref(call);
+ q->callback = callback;
+ q->userdata = userdata;
+
+ r = hashmap_put(*registry, call, q);
+ if (r < 0) {
+ async_polkit_query_free(q);
+ return r;
+ }
+
+ q->registry = *registry;
+
+ r = sd_bus_call_async(call->bus, &q->slot, pk, async_polkit_callback, q, 0);
+ if (r < 0) {
+ async_polkit_query_free(q);
+ return r;
+ }
+
+ return 0;
+#endif
+
+ return -EACCES;
+}
+
+void bus_verify_polkit_async_registry_free(Hashmap *registry) {
+#if ENABLE_POLKIT
+ hashmap_free_with_destructor(registry, async_polkit_query_free);
+#endif
+}
+
+int bus_check_peercred(sd_bus *c) {
+ struct ucred ucred;
+ int fd, r;
+
+ assert(c);
+
+ fd = sd_bus_get_fd(c);
+ if (fd < 0)
+ return fd;
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ if (ucred.uid != 0 && ucred.uid != geteuid())
+ return -EPERM;
+
+ return 1;
+}
+
+int bus_connect_system_systemd(sd_bus **_bus) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(_bus);
+
+ if (geteuid() != 0)
+ return sd_bus_default_system(_bus);
+
+ /* If we are root then let's talk directly to the system
+ * instance, instead of going via the bus */
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_address(bus, "unix:path=/run/systemd/private");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return sd_bus_default_system(_bus);
+
+ r = bus_check_peercred(bus);
+ if (r < 0)
+ return r;
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_connect_user_systemd(sd_bus **_bus) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *ee = NULL;
+ const char *e;
+ int r;
+
+ assert(_bus);
+
+ e = secure_getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return sd_bus_default_user(_bus);
+
+ ee = bus_address_escape(e);
+ if (!ee)
+ return -ENOMEM;
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ bus->address = strjoin("unix:path=", ee, "/systemd/private");
+ if (!bus->address)
+ return -ENOMEM;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return sd_bus_default_user(_bus);
+
+ r = bus_check_peercred(bus);
+ if (r < 0)
+ return r;
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) {
+ va_list ap;
+ int r;
+
+ assert(name);
+ assert(fmt);
+
+ if (expected_value) {
+ _cleanup_free_ char *s = NULL;
+
+ va_start(ap, fmt);
+ r = vasprintf(&s, fmt, ap);
+ va_end(ap);
+ if (r < 0)
+ return -ENOMEM;
+
+ if (streq_ptr(expected_value, s)) {
+ if (only_value)
+ puts(s);
+ else
+ printf("%s=%s\n", name, s);
+ }
+
+ return 0;
+ }
+
+ if (!only_value)
+ printf("%s=", name);
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ puts("");
+
+ return 0;
+}
+
+static int bus_print_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) {
+ char type;
+ const char *contents;
+ int r;
+
+ assert(name);
+ assert(m);
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRING: {
+ const char *s;
+
+ r = sd_bus_message_read_basic(m, type, &s);
+ if (r < 0)
+ return r;
+
+ if (all || !isempty(s)) {
+ bool good;
+
+ /* This property has a single value, so we need to take
+ * care not to print a new line, everything else is OK. */
+ good = !strchr(s, '\n');
+ bus_print_property_value(name, expected_value, value, "%s", good ? s : "[unprintable]");
+ }
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_BOOLEAN: {
+ int b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return r;
+
+ if (expected_value && parse_boolean(expected_value) != b)
+ return 1;
+
+ bus_print_property_value(name, NULL, value, "%s", yes_no(b));
+ return 1;
+ }
+
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t u;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ /* Yes, heuristics! But we can change this check
+ * should it turn out to not be sufficient */
+
+ if (endswith(name, "Timestamp") ||
+ STR_IN_SET(name, "NextElapseUSecRealtime", "LastTriggerUSec", "TimeUSec", "RTCTimeUSec")) {
+ char timestamp[FORMAT_TIMESTAMP_MAX];
+ const char *t;
+
+ t = format_timestamp(timestamp, sizeof(timestamp), u);
+ if (t || all)
+ bus_print_property_value(name, expected_value, value, "%s", strempty(t));
+
+ } else if (strstr(name, "USec")) {
+ char timespan[FORMAT_TIMESPAN_MAX];
+
+ (void) format_timespan(timespan, sizeof(timespan), u, 0);
+ bus_print_property_value(name, expected_value, value, "%s", timespan);
+
+ } else if (streq(name, "RestrictNamespaces")) {
+ _cleanup_free_ char *s = NULL;
+ const char *result;
+
+ if ((u & NAMESPACE_FLAGS_ALL) == 0)
+ result = "yes";
+ else if ((u & NAMESPACE_FLAGS_ALL) == NAMESPACE_FLAGS_ALL)
+ result = "no";
+ else {
+ r = namespace_flags_to_string(u, &s);
+ if (r < 0)
+ return r;
+
+ result = s;
+ }
+
+ bus_print_property_value(name, expected_value, value, "%s", result);
+
+ } else if (streq(name, "MountFlags")) {
+ const char *result;
+
+ result = mount_propagation_flags_to_string(u);
+ if (!result)
+ return -EINVAL;
+
+ bus_print_property_value(name, expected_value, value, "%s", result);
+
+ } else if (STR_IN_SET(name, "CapabilityBoundingSet", "AmbientCapabilities")) {
+ _cleanup_free_ char *s = NULL;
+
+ r = capability_set_to_string_alloc(u, &s);
+ if (r < 0)
+ return r;
+
+ bus_print_property_value(name, expected_value, value, "%s", s);
+
+ } else if ((STR_IN_SET(name, "CPUWeight", "StartupCPUWeight", "IOWeight", "StartupIOWeight") && u == CGROUP_WEIGHT_INVALID) ||
+ (STR_IN_SET(name, "CPUShares", "StartupCPUShares") && u == CGROUP_CPU_SHARES_INVALID) ||
+ (STR_IN_SET(name, "BlockIOWeight", "StartupBlockIOWeight") && u == CGROUP_BLKIO_WEIGHT_INVALID) ||
+ (STR_IN_SET(name, "MemoryCurrent", "TasksCurrent") && u == (uint64_t) -1) ||
+ (endswith(name, "NSec") && u == (uint64_t) -1))
+
+ bus_print_property_value(name, expected_value, value, "%s", "[not set]");
+
+ else if ((STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit") && u == CGROUP_LIMIT_MAX) ||
+ (STR_IN_SET(name, "TasksMax", "DefaultTasksMax") && u == (uint64_t) -1) ||
+ (startswith(name, "Limit") && u == (uint64_t) -1) ||
+ (startswith(name, "DefaultLimit") && u == (uint64_t) -1))
+
+ bus_print_property_value(name, expected_value, value, "%s", "infinity");
+ else
+ bus_print_property_value(name, expected_value, value, "%"PRIu64, u);
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_INT64: {
+ int64_t i;
+
+ r = sd_bus_message_read_basic(m, type, &i);
+ if (r < 0)
+ return r;
+
+ bus_print_property_value(name, expected_value, value, "%"PRIi64, i);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t u;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ if (strstr(name, "UMask") || strstr(name, "Mode"))
+ bus_print_property_value(name, expected_value, value, "%04o", u);
+
+ else if (streq(name, "UID")) {
+ if (u == UID_INVALID)
+ bus_print_property_value(name, expected_value, value, "%s", "[not set]");
+ else
+ bus_print_property_value(name, expected_value, value, "%"PRIu32, u);
+ } else if (streq(name, "GID")) {
+ if (u == GID_INVALID)
+ bus_print_property_value(name, expected_value, value, "%s", "[not set]");
+ else
+ bus_print_property_value(name, expected_value, value, "%"PRIu32, u);
+ } else
+ bus_print_property_value(name, expected_value, value, "%"PRIu32, u);
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_INT32: {
+ int32_t i;
+
+ r = sd_bus_message_read_basic(m, type, &i);
+ if (r < 0)
+ return r;
+
+ bus_print_property_value(name, expected_value, value, "%"PRIi32, i);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double d;
+
+ r = sd_bus_message_read_basic(m, type, &d);
+ if (r < 0)
+ return r;
+
+ bus_print_property_value(name, expected_value, value, "%g", d);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_ARRAY:
+ if (streq(contents, "s")) {
+ bool first = true;
+ const char *str;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, contents);
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &str)) > 0) {
+ bool good;
+
+ if (first && !value)
+ printf("%s=", name);
+
+ /* This property has multiple space-separated values, so
+ * neither spaces nor newlines can be allowed in a value. */
+ good = str[strcspn(str, " \n")] == '\0';
+
+ printf("%s%s", first ? "" : " ", good ? str : "[unprintable]");
+
+ first = false;
+ }
+ if (r < 0)
+ return r;
+
+ if (first && all && !value)
+ printf("%s=", name);
+ if (!first || all)
+ puts("");
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 1;
+
+ } else if (streq(contents, "y")) {
+ const uint8_t *u;
+ size_t n;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, (const void**) &u, &n);
+ if (r < 0)
+ return r;
+
+ if (all || n > 0) {
+ unsigned i;
+
+ if (!value)
+ printf("%s=", name);
+
+ for (i = 0; i < n; i++)
+ printf("%02x", u[i]);
+
+ puts("");
+ }
+
+ return 1;
+
+ } else if (streq(contents, "u")) {
+ uint32_t *u;
+ size_t n;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_UINT32, (const void**) &u, &n);
+ if (r < 0)
+ return r;
+
+ if (all || n > 0) {
+ unsigned i;
+
+ if (!value)
+ printf("%s=", name);
+
+ for (i = 0; i < n; i++)
+ printf("%08x", u[i]);
+
+ puts("");
+ }
+
+ return 1;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int bus_message_print_all_properties(
+ sd_bus_message *m,
+ bus_message_print_t func,
+ char **filter,
+ bool value,
+ bool all,
+ Set **found_properties) {
+
+ int r;
+
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) {
+ _cleanup_free_ char *name_with_equal = NULL;
+ const char *name, *contents, *expected_value = NULL;
+
+ r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &name);
+ if (r < 0)
+ return r;
+
+ if (found_properties) {
+ r = set_ensure_allocated(found_properties, &string_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ r = set_put(*found_properties, name);
+ if (r < 0 && r != -EEXIST)
+ return log_oom();
+ }
+
+ name_with_equal = strappend(name, "=");
+ if (!name_with_equal)
+ return log_oom();
+
+ if (!filter || strv_find(filter, name) ||
+ (expected_value = strv_find_startswith(filter, name_with_equal))) {
+ r = sd_bus_message_peek_type(m, NULL, &contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r < 0)
+ return r;
+
+ if (func)
+ r = func(name, expected_value, m, value, all);
+ if (!func || r == 0)
+ r = bus_print_property(name, expected_value, m, value, all);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (all && !expected_value)
+ printf("%s=[unprintable]\n", name);
+ /* skip what we didn't read */
+ r = sd_bus_message_skip(m, contents);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_bus_message_skip(m, "v");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int bus_print_all_properties(
+ sd_bus *bus,
+ const char *dest,
+ const char *path,
+ bus_message_print_t func,
+ char **filter,
+ bool value,
+ bool all,
+ Set **found_properties) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+
+ r = sd_bus_call_method(bus,
+ dest,
+ path,
+ "org.freedesktop.DBus.Properties",
+ "GetAll",
+ &error,
+ &reply,
+ "s", "");
+ if (r < 0)
+ return r;
+
+ return bus_message_print_all_properties(reply, func, filter, value, all, found_properties);
+}
+
+int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ sd_id128_t *p = userdata;
+ const void *v;
+ size_t n;
+ int r;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, &v, &n);
+ if (r < 0)
+ return r;
+
+ if (n == 0)
+ *p = SD_ID128_NULL;
+ else if (n == 16)
+ memcpy((*p).bytes, v, n);
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int map_basic(sd_bus *bus, const char *member, sd_bus_message *m, unsigned flags, sd_bus_error *error, void *userdata) {
+ char type;
+ int r;
+
+ r = sd_bus_message_peek_type(m, &type, NULL);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRING: {
+ const char **p = userdata;
+ const char *s;
+
+ r = sd_bus_message_read_basic(m, type, &s);
+ if (r < 0)
+ return r;
+
+ if (isempty(s))
+ s = NULL;
+
+ if (flags & BUS_MAP_STRDUP)
+ return free_and_strdup((char **) userdata, s);
+
+ *p = s;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_ARRAY: {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***p = userdata;
+
+ r = bus_message_read_strv_extend(m, &l);
+ if (r < 0)
+ return r;
+
+ return strv_free_and_replace(*p, l);
+ }
+
+ case SD_BUS_TYPE_BOOLEAN: {
+ int b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return r;
+
+ if (flags & BUS_MAP_BOOLEAN_AS_BOOL)
+ *(bool*) userdata = b;
+ else
+ *(int*) userdata = b;
+
+ return 0;
+ }
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t u, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ *p = u;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t t, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &t);
+ if (r < 0)
+ return r;
+
+ *p = t;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double d, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &d);
+ if (r < 0)
+ return r;
+
+ *p = d;
+ return 0;
+ }}
+
+ return -EOPNOTSUPP;
+}
+
+int bus_message_map_all_properties(
+ sd_bus_message *m,
+ const struct bus_properties_map *map,
+ unsigned flags,
+ sd_bus_error *error,
+ void *userdata) {
+
+ int r;
+
+ assert(m);
+ assert(map);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) {
+ const struct bus_properties_map *prop;
+ const char *member;
+ const char *contents;
+ void *v;
+ unsigned i;
+
+ r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &member);
+ if (r < 0)
+ return r;
+
+ for (i = 0, prop = NULL; map[i].member; i++)
+ if (streq(map[i].member, member)) {
+ prop = &map[i];
+ break;
+ }
+
+ if (prop) {
+ r = sd_bus_message_peek_type(m, NULL, &contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r < 0)
+ return r;
+
+ v = (uint8_t *)userdata + prop->offset;
+ if (map[i].set)
+ r = prop->set(sd_bus_message_get_bus(m), member, m, error, v);
+ else
+ r = map_basic(sd_bus_message_get_bus(m), member, m, flags, error, v);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_bus_message_skip(m, "v");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_exit_container(m);
+}
+
+int bus_map_all_properties(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const struct bus_properties_map *map,
+ unsigned flags,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ void *userdata) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+ assert(destination);
+ assert(path);
+ assert(map);
+ assert(reply || (flags & BUS_MAP_STRDUP));
+
+ r = sd_bus_call_method(
+ bus,
+ destination,
+ path,
+ "org.freedesktop.DBus.Properties",
+ "GetAll",
+ error,
+ &m,
+ "s", "");
+ if (r < 0)
+ return r;
+
+ r = bus_message_map_all_properties(m, map, flags, error, userdata);
+ if (r < 0)
+ return r;
+
+ if (reply)
+ *reply = sd_bus_message_ref(m);
+
+ return r;
+}
+
+int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **ret) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(transport >= 0);
+ assert(transport < _BUS_TRANSPORT_MAX);
+ assert(ret);
+
+ assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL);
+ assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP);
+
+ switch (transport) {
+
+ case BUS_TRANSPORT_LOCAL:
+ if (user)
+ r = sd_bus_default_user(&bus);
+ else {
+ if (sd_booted() <= 0) {
+ /* Print a friendly message when the local system is actually not running systemd as PID 1. */
+ log_error("System has not been booted with systemd as init system (PID 1). Can't operate.");
+
+ return -EHOSTDOWN;
+ }
+ r = sd_bus_default_system(&bus);
+ }
+ break;
+
+ case BUS_TRANSPORT_REMOTE:
+ r = sd_bus_open_system_remote(&bus, host);
+ break;
+
+ case BUS_TRANSPORT_MACHINE:
+ r = sd_bus_open_system_machine(&bus, host);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unknown transport type.");
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_exit_on_disconnect(bus, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus) {
+ int r;
+
+ assert(transport >= 0);
+ assert(transport < _BUS_TRANSPORT_MAX);
+ assert(bus);
+
+ assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL);
+ assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP);
+
+ switch (transport) {
+
+ case BUS_TRANSPORT_LOCAL:
+ if (user)
+ r = bus_connect_user_systemd(bus);
+ else {
+ if (sd_booted() <= 0)
+ /* Print a friendly message when the local system is actually not running systemd as PID 1. */
+ return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
+ "System has not been booted with systemd as init system (PID 1). Can't operate.");
+ r = bus_connect_system_systemd(bus);
+ }
+ break;
+
+ case BUS_TRANSPORT_REMOTE:
+ r = sd_bus_open_system_remote(bus, host);
+ break;
+
+ case BUS_TRANSPORT_MACHINE:
+ r = sd_bus_open_system_machine(bus, host);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unknown transport type.");
+ }
+
+ return r;
+}
+
+int bus_property_get_bool(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int b = *(bool*) userdata;
+
+ return sd_bus_message_append_basic(reply, 'b', &b);
+}
+
+int bus_property_set_bool(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int b, r;
+
+ r = sd_bus_message_read(value, "b", &b);
+ if (r < 0)
+ return r;
+
+ *(bool*) userdata = b;
+ return 0;
+}
+
+int bus_property_get_id128(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ sd_id128_t *id = userdata;
+
+ if (sd_id128_is_null(*id)) /* Add an empty array if the ID is zero */
+ return sd_bus_message_append(reply, "ay", 0);
+ else
+ return sd_bus_message_append_array(reply, 'y', id->bytes, 16);
+}
+
+#if __SIZEOF_SIZE_T__ != 8
+int bus_property_get_size(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t sz = *(size_t*) userdata;
+
+ return sd_bus_message_append_basic(reply, 't', &sz);
+}
+#endif
+
+#if __SIZEOF_LONG__ != 8
+int bus_property_get_long(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int64_t l = *(long*) userdata;
+
+ return sd_bus_message_append_basic(reply, 'x', &l);
+}
+
+int bus_property_get_ulong(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t ul = *(unsigned long*) userdata;
+
+ return sd_bus_message_append_basic(reply, 't', &ul);
+}
+#endif
+
+int bus_log_parse_error(int r) {
+ return log_error_errno(r, "Failed to parse bus message: %m");
+}
+
+int bus_log_create_error(int r) {
+ return log_error_errno(r, "Failed to create bus message: %m");
+}
+
+/**
+ * bus_path_encode_unique() - encode unique object path
+ * @b: bus connection or NULL
+ * @prefix: object path prefix
+ * @sender_id: unique-name of client, or NULL
+ * @external_id: external ID to be chosen by client, or NULL
+ * @ret_path: storage for encoded object path pointer
+ *
+ * Whenever we provide a bus API that allows clients to create and manage
+ * server-side objects, we need to provide a unique name for these objects. If
+ * we let the server choose the name, we suffer from a race condition: If a
+ * client creates an object asynchronously, it cannot destroy that object until
+ * it received the method reply. It cannot know the name of the new object,
+ * thus, it cannot destroy it. Furthermore, it enforces a round-trip.
+ *
+ * Therefore, many APIs allow the client to choose the unique name for newly
+ * created objects. There're two problems to solve, though:
+ * 1) Object names are usually defined via dbus object paths, which are
+ * usually globally namespaced. Therefore, multiple clients must be able
+ * to choose unique object names without interference.
+ * 2) If multiple libraries share the same bus connection, they must be
+ * able to choose unique object names without interference.
+ * The first problem is solved easily by prefixing a name with the
+ * unique-bus-name of a connection. The server side must enforce this and
+ * reject any other name. The second problem is solved by providing unique
+ * suffixes from within sd-bus.
+ *
+ * This helper allows clients to create unique object-paths. It uses the
+ * template '/prefix/sender_id/external_id' and returns the new path in
+ * @ret_path (must be freed by the caller).
+ * If @sender_id is NULL, the unique-name of @b is used. If @external_id is
+ * NULL, this function allocates a unique suffix via @b (by requesting a new
+ * cookie). If both @sender_id and @external_id are given, @b can be passed as
+ * NULL.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path) {
+ _cleanup_free_ char *sender_label = NULL, *external_label = NULL;
+ char external_buf[DECIMAL_STR_MAX(uint64_t)], *p;
+ int r;
+
+ assert_return(b || (sender_id && external_id), -EINVAL);
+ assert_return(object_path_is_valid(prefix), -EINVAL);
+ assert_return(ret_path, -EINVAL);
+
+ if (!sender_id) {
+ r = sd_bus_get_unique_name(b, &sender_id);
+ if (r < 0)
+ return r;
+ }
+
+ if (!external_id) {
+ xsprintf(external_buf, "%"PRIu64, ++b->cookie);
+ external_id = external_buf;
+ }
+
+ sender_label = bus_label_escape(sender_id);
+ if (!sender_label)
+ return -ENOMEM;
+
+ external_label = bus_label_escape(external_id);
+ if (!external_label)
+ return -ENOMEM;
+
+ p = strjoin(prefix, "/", sender_label, "/", external_label);
+ if (!p)
+ return -ENOMEM;
+
+ *ret_path = p;
+ return 0;
+}
+
+/**
+ * bus_path_decode_unique() - decode unique object path
+ * @path: object path to decode
+ * @prefix: object path prefix
+ * @ret_sender: output parameter for sender-id label
+ * @ret_external: output parameter for external-id label
+ *
+ * This does the reverse of bus_path_encode_unique() (see its description for
+ * details). Both trailing labels, sender-id and external-id, are unescaped and
+ * returned in the given output parameters (the caller must free them).
+ *
+ * Note that this function returns 0 if the path does not match the template
+ * (see bus_path_encode_unique()), 1 if it matched.
+ *
+ * Returns: Negative error code on failure, 0 if the given object path does not
+ * match the template (return parameters are set to NULL), 1 if it was
+ * parsed successfully (return parameters contain allocated labels).
+ */
+int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external) {
+ const char *p, *q;
+ char *sender, *external;
+
+ assert(object_path_is_valid(path));
+ assert(object_path_is_valid(prefix));
+ assert(ret_sender);
+ assert(ret_external);
+
+ p = object_path_startswith(path, prefix);
+ if (!p) {
+ *ret_sender = NULL;
+ *ret_external = NULL;
+ return 0;
+ }
+
+ q = strchr(p, '/');
+ if (!q) {
+ *ret_sender = NULL;
+ *ret_external = NULL;
+ return 0;
+ }
+
+ sender = bus_label_unescape_n(p, q - p);
+ external = bus_label_unescape(q + 1);
+ if (!sender || !external) {
+ free(sender);
+ free(external);
+ return -ENOMEM;
+ }
+
+ *ret_sender = sender;
+ *ret_external = external;
+ return 1;
+}
+
+int bus_property_get_rlimit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const char *is_soft;
+ struct rlimit *rl;
+ uint64_t u;
+ rlim_t x;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ is_soft = endswith(property, "Soft");
+
+ rl = *(struct rlimit**) userdata;
+ if (rl)
+ x = is_soft ? rl->rlim_cur : rl->rlim_max;
+ else {
+ struct rlimit buf = {};
+ const char *s, *p;
+ int z;
+
+ /* Chop off "Soft" suffix */
+ s = is_soft ? strndupa(property, is_soft - property) : property;
+
+ /* Skip over any prefix, such as "Default" */
+ assert_se(p = strstr(s, "Limit"));
+
+ z = rlimit_from_string(p + 5);
+ assert(z >= 0);
+
+ (void) getrlimit(z, &buf);
+ x = is_soft ? buf.rlim_cur : buf.rlim_max;
+ }
+
+ /* rlim_t might have different sizes, let's map RLIMIT_INFINITY to (uint64_t) -1, so that it is the same on all
+ * archs */
+ u = x == RLIM_INFINITY ? (uint64_t) -1 : (uint64_t) x;
+
+ return sd_bus_message_append(reply, "t", u);
+}
+
+int bus_track_add_name_many(sd_bus_track *t, char **l) {
+ int r = 0;
+ char **i;
+
+ assert(t);
+
+ /* Continues adding after failure, and returns the first failure. */
+
+ STRV_FOREACH(i, l) {
+ int k;
+
+ k = sd_bus_track_add_name(t, *i);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ const char *e;
+ int r;
+
+ assert(ret);
+
+ /* Match like sd_bus_open_system(), but with the "watch_bind" feature and the Connected() signal turned on. */
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ if (description) {
+ r = sd_bus_set_description(bus, description);
+ if (r < 0)
+ return r;
+ }
+
+ e = secure_getenv("DBUS_SYSTEM_BUS_ADDRESS");
+ if (!e)
+ e = DEFAULT_SYSTEM_BUS_ADDRESS;
+
+ r = sd_bus_set_address(bus, e);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_bus_client(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_trusted(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_negotiate_creds(bus, true, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_watch_bind(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_connected_signal(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_reply_pair_array(sd_bus_message *m, char **l) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ char **k, **v;
+ int r;
+
+ assert(m);
+
+ /* Reply to the specified message with a message containing a dictionary put together from the specified
+ * strv */
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(k, v, l) {
+ r = sd_bus_message_append(reply, "{ss}", *k, *v);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
diff --git a/src/shared/bus-util.h b/src/shared/bus-util.h
new file mode 100644
index 0000000..71c248f
--- /dev/null
+++ b/src/shared/bus-util.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "hashmap.h"
+#include "macro.h"
+#include "string-util.h"
+
+typedef enum BusTransport {
+ BUS_TRANSPORT_LOCAL,
+ BUS_TRANSPORT_REMOTE,
+ BUS_TRANSPORT_MACHINE,
+ _BUS_TRANSPORT_MAX,
+ _BUS_TRANSPORT_INVALID = -1
+} BusTransport;
+
+typedef int (*bus_property_set_t) (sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata);
+
+struct bus_properties_map {
+ const char *member;
+ const char *signature;
+ bus_property_set_t set;
+ size_t offset;
+};
+
+enum {
+ BUS_MAP_STRDUP = 1 << 0, /* If set, each "s" message is duplicated. Thus, each pointer needs to be freed. */
+ BUS_MAP_BOOLEAN_AS_BOOL = 1 << 1, /* If set, each "b" message is written to a bool pointer. If not set, "b" is written to a int pointer. */
+};
+
+int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata);
+
+int bus_message_map_all_properties(sd_bus_message *m, const struct bus_properties_map *map, unsigned flags, sd_bus_error *error, void *userdata);
+int bus_map_all_properties(sd_bus *bus, const char *destination, const char *path, const struct bus_properties_map *map,
+ unsigned flags, sd_bus_error *error, sd_bus_message **reply, void *userdata);
+
+int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name);
+
+typedef bool (*check_idle_t)(void *userdata);
+
+int bus_event_loop_with_idle(sd_event *e, sd_bus *bus, const char *name, usec_t timeout, check_idle_t check_idle, void *userdata);
+
+int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error);
+
+int bus_check_peercred(sd_bus *c);
+
+int bus_test_polkit(sd_bus_message *call, int capability, const char *action, const char **details, uid_t good_user, bool *_challenge, sd_bus_error *e);
+
+int bus_verify_polkit_async(sd_bus_message *call, int capability, const char *action, const char **details, bool interactive, uid_t good_user, Hashmap **registry, sd_bus_error *error);
+void bus_verify_polkit_async_registry_free(Hashmap *registry);
+
+int bus_connect_system_systemd(sd_bus **_bus);
+int bus_connect_user_systemd(sd_bus **_bus);
+
+int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **bus);
+int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus);
+
+typedef int (*bus_message_print_t) (const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all);
+
+int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) _printf_(4,5);
+int bus_message_print_all_properties(sd_bus_message *m, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties);
+int bus_print_all_properties(sd_bus *bus, const char *dest, const char *path, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties);
+
+int bus_property_get_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_set_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error);
+int bus_property_get_id128(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+#define bus_property_get_usec ((sd_bus_property_get_t) NULL)
+#define bus_property_set_usec ((sd_bus_property_set_t) NULL)
+
+assert_cc(sizeof(int) == sizeof(int32_t));
+#define bus_property_get_int ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(unsigned) == sizeof(uint32_t));
+#define bus_property_get_unsigned ((sd_bus_property_get_t) NULL)
+
+/* On 64bit machines we can use the default serializer for size_t and
+ * friends, otherwise we need to cast this manually */
+#if __SIZEOF_SIZE_T__ == 8
+#define bus_property_get_size ((sd_bus_property_get_t) NULL)
+#else
+int bus_property_get_size(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+#endif
+
+#if __SIZEOF_LONG__ == 8
+#define bus_property_get_long ((sd_bus_property_get_t) NULL)
+#define bus_property_get_ulong ((sd_bus_property_get_t) NULL)
+#else
+int bus_property_get_long(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_get_ulong(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+#endif
+
+/* uid_t and friends on Linux 32 bit. This means we can just use the
+ * default serializer for 32bit unsigned, for serializing it, and map
+ * it to NULL here */
+assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+#define bus_property_get_uid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(gid_t) == sizeof(uint32_t));
+#define bus_property_get_gid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+#define bus_property_get_pid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(mode_t) == sizeof(uint32_t));
+#define bus_property_get_mode ((sd_bus_property_get_t) NULL)
+
+int bus_log_parse_error(int r);
+int bus_log_create_error(int r);
+
+#define BUS_DEFINE_PROPERTY_GET_GLOBAL(function, bus_type, val) \
+ int function(sd_bus *bus, \
+ const char *path, \
+ const char *interface, \
+ const char *property, \
+ sd_bus_message *reply, \
+ void *userdata, \
+ sd_bus_error *error) { \
+ \
+ assert(bus); \
+ assert(reply); \
+ \
+ return sd_bus_message_append(reply, bus_type, val); \
+ }
+
+#define BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, get2) \
+ int function(sd_bus *bus, \
+ const char *path, \
+ const char *interface, \
+ const char *property, \
+ sd_bus_message *reply, \
+ void *userdata, \
+ sd_bus_error *error) { \
+ \
+ data_type *data = userdata; \
+ \
+ assert(bus); \
+ assert(reply); \
+ assert(data); \
+ \
+ return sd_bus_message_append(reply, bus_type, \
+ get2(get1(data))); \
+ }
+
+#define ident(x) (x)
+#define BUS_DEFINE_PROPERTY_GET(function, bus_type, data_type, get1) \
+ BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, ident)
+
+#define ref(x) (*(x))
+#define BUS_DEFINE_PROPERTY_GET_REF(function, bus_type, data_type, get) \
+ BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, ref, get)
+
+#define BUS_DEFINE_PROPERTY_GET_ENUM(function, name, type) \
+ BUS_DEFINE_PROPERTY_GET_REF(function, "s", type, name##_to_string)
+
+#define BUS_PROPERTY_DUAL_TIMESTAMP(name, offset, flags) \
+ SD_BUS_PROPERTY(name, "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, realtime), (flags)), \
+ SD_BUS_PROPERTY(name "Monotonic", "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, monotonic), (flags))
+
+int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path);
+int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external);
+
+int bus_property_get_rlimit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+int bus_track_add_name_many(sd_bus_track *t, char **l);
+
+int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description);
+static inline int bus_open_system_watch_bind(sd_bus **ret) {
+ return bus_open_system_watch_bind_with_description(ret, NULL);
+}
+
+int bus_reply_pair_array(sd_bus_message *m, char **l);
diff --git a/src/shared/calendarspec.c b/src/shared/calendarspec.c
new file mode 100644
index 0000000..dafc09e
--- /dev/null
+++ b/src/shared/calendarspec.c
@@ -0,0 +1,1370 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <alloca.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <time.h>
+
+#include "alloc-util.h"
+#include "calendarspec.h"
+#include "fileio.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+#define BITS_WEEKDAYS 127
+#define MIN_YEAR 1970
+#define MAX_YEAR 2199
+
+/* An arbitrary limit on the length of the chains of components. We don't want to
+ * build a very long linked list, which would be slow to iterate over and might cause
+ * our stack to overflow. It's unlikely that legitimate uses require more than a few
+ * linked compenents anyway. */
+#define CALENDARSPEC_COMPONENTS_MAX 240
+
+static void free_chain(CalendarComponent *c) {
+ CalendarComponent *n;
+
+ while (c) {
+ n = c->next;
+ free(c);
+ c = n;
+ }
+}
+
+CalendarSpec* calendar_spec_free(CalendarSpec *c) {
+
+ if (!c)
+ return NULL;
+
+ free_chain(c->year);
+ free_chain(c->month);
+ free_chain(c->day);
+ free_chain(c->hour);
+ free_chain(c->minute);
+ free_chain(c->microsecond);
+ free(c->timezone);
+
+ return mfree(c);
+}
+
+static int component_compare(CalendarComponent * const *a, CalendarComponent * const *b) {
+ int r;
+
+ r = CMP((*a)->start, (*b)->start);
+ if (r != 0)
+ return r;
+
+ r = CMP((*a)->stop, (*b)->stop);
+ if (r != 0)
+ return r;
+
+ return CMP((*a)->repeat, (*b)->repeat);
+}
+
+static void normalize_chain(CalendarComponent **c) {
+ CalendarComponent **b, *i, **j, *next;
+ size_t n = 0, k;
+
+ assert(c);
+
+ for (i = *c; i; i = i->next) {
+ n++;
+
+ /*
+ * While we're counting the chain, also normalize `stop`
+ * so the length of the range is a multiple of `repeat`
+ */
+ if (i->stop > i->start && i->repeat > 0)
+ i->stop -= (i->stop - i->start) % i->repeat;
+
+ }
+
+ if (n <= 1)
+ return;
+
+ j = b = newa(CalendarComponent*, n);
+ for (i = *c; i; i = i->next)
+ *(j++) = i;
+
+ typesafe_qsort(b, n, component_compare);
+
+ b[n-1]->next = NULL;
+ next = b[n-1];
+
+ /* Drop non-unique entries */
+ for (k = n-1; k > 0; k--) {
+ if (component_compare(&b[k-1], &next) == 0) {
+ free(b[k-1]);
+ continue;
+ }
+
+ b[k-1]->next = next;
+ next = b[k-1];
+ }
+
+ *c = next;
+}
+
+static void fix_year(CalendarComponent *c) {
+ /* Turns 12 → 2012, 89 → 1989 */
+
+ while (c) {
+ if (c->start >= 0 && c->start < 70)
+ c->start += 2000;
+
+ if (c->stop >= 0 && c->stop < 70)
+ c->stop += 2000;
+
+ if (c->start >= 70 && c->start < 100)
+ c->start += 1900;
+
+ if (c->stop >= 70 && c->stop < 100)
+ c->stop += 1900;
+
+ c = c->next;
+ }
+}
+
+int calendar_spec_normalize(CalendarSpec *c) {
+ assert(c);
+
+ if (streq_ptr(c->timezone, "UTC")) {
+ c->utc = true;
+ c->timezone = mfree(c->timezone);
+ }
+
+ if (c->weekdays_bits <= 0 || c->weekdays_bits >= BITS_WEEKDAYS)
+ c->weekdays_bits = -1;
+
+ if (c->end_of_month && !c->day)
+ c->end_of_month = false;
+
+ fix_year(c->year);
+
+ normalize_chain(&c->year);
+ normalize_chain(&c->month);
+ normalize_chain(&c->day);
+ normalize_chain(&c->hour);
+ normalize_chain(&c->minute);
+ normalize_chain(&c->microsecond);
+
+ return 0;
+}
+
+_pure_ static bool chain_valid(CalendarComponent *c, int from, int to, bool end_of_month) {
+ assert(to >= from);
+
+ if (!c)
+ return true;
+
+ /* Forbid dates more than 28 days from the end of the month */
+ if (end_of_month)
+ to -= 3;
+
+ if (c->start < from || c->start > to)
+ return false;
+
+ /* Avoid overly large values that could cause overflow */
+ if (c->repeat > to - from)
+ return false;
+
+ /*
+ * c->repeat must be short enough so at least one repetition may
+ * occur before the end of the interval. For dates scheduled
+ * relative to the end of the month, c->start and c->stop
+ * correspond to the Nth last day of the month.
+ */
+ if (c->stop >= 0) {
+ if (c->stop < from || c ->stop > to)
+ return false;
+
+ if (c->start + c->repeat > c->stop)
+ return false;
+ } else {
+ if (end_of_month && c->start - c->repeat < from)
+ return false;
+
+ if (!end_of_month && c->start + c->repeat > to)
+ return false;
+ }
+
+ if (c->next)
+ return chain_valid(c->next, from, to, end_of_month);
+
+ return true;
+}
+
+_pure_ bool calendar_spec_valid(CalendarSpec *c) {
+ assert(c);
+
+ if (c->weekdays_bits > BITS_WEEKDAYS)
+ return false;
+
+ if (!chain_valid(c->year, MIN_YEAR, MAX_YEAR, false))
+ return false;
+
+ if (!chain_valid(c->month, 1, 12, false))
+ return false;
+
+ if (!chain_valid(c->day, 1, 31, c->end_of_month))
+ return false;
+
+ if (!chain_valid(c->hour, 0, 23, false))
+ return false;
+
+ if (!chain_valid(c->minute, 0, 59, false))
+ return false;
+
+ if (!chain_valid(c->microsecond, 0, 60*USEC_PER_SEC-1, false))
+ return false;
+
+ return true;
+}
+
+static void format_weekdays(FILE *f, const CalendarSpec *c) {
+ static const char *const days[] = {
+ "Mon",
+ "Tue",
+ "Wed",
+ "Thu",
+ "Fri",
+ "Sat",
+ "Sun"
+ };
+
+ int l, x;
+ bool need_comma = false;
+
+ assert(f);
+ assert(c);
+ assert(c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS);
+
+ for (x = 0, l = -1; x < (int) ELEMENTSOF(days); x++) {
+
+ if (c->weekdays_bits & (1 << x)) {
+
+ if (l < 0) {
+ if (need_comma)
+ fputc(',', f);
+ else
+ need_comma = true;
+
+ fputs(days[x], f);
+ l = x;
+ }
+
+ } else if (l >= 0) {
+
+ if (x > l + 1) {
+ fputs(x > l + 2 ? ".." : ",", f);
+ fputs(days[x-1], f);
+ }
+
+ l = -1;
+ }
+ }
+
+ if (l >= 0 && x > l + 1) {
+ fputs(x > l + 2 ? ".." : ",", f);
+ fputs(days[x-1], f);
+ }
+}
+
+static void format_chain(FILE *f, int space, const CalendarComponent *c, bool usec) {
+ int d = usec ? (int) USEC_PER_SEC : 1;
+
+ assert(f);
+
+ if (!c) {
+ fputc('*', f);
+ return;
+ }
+
+ if (usec && c->start == 0 && c->repeat == USEC_PER_SEC && !c->next) {
+ fputc('*', f);
+ return;
+ }
+
+ assert(c->start >= 0);
+
+ fprintf(f, "%0*i", space, c->start / d);
+ if (c->start % d > 0)
+ fprintf(f, ".%06i", c->start % d);
+
+ if (c->stop > 0)
+ fprintf(f, "..%0*i", space, c->stop / d);
+ if (c->stop % d > 0)
+ fprintf(f, ".%06i", c->stop % d);
+
+ if (c->repeat > 0 && !(c->stop > 0 && c->repeat == d))
+ fprintf(f, "/%i", c->repeat / d);
+ if (c->repeat % d > 0)
+ fprintf(f, ".%06i", c->repeat % d);
+
+ if (c->next) {
+ fputc(',', f);
+ format_chain(f, space, c->next, usec);
+ }
+}
+
+int calendar_spec_to_string(const CalendarSpec *c, char **p) {
+ char *buf = NULL;
+ size_t sz = 0;
+ FILE *f;
+ int r;
+
+ assert(c);
+ assert(p);
+
+ f = open_memstream(&buf, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ if (c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS) {
+ format_weekdays(f, c);
+ fputc(' ', f);
+ }
+
+ format_chain(f, 4, c->year, false);
+ fputc('-', f);
+ format_chain(f, 2, c->month, false);
+ fputc(c->end_of_month ? '~' : '-', f);
+ format_chain(f, 2, c->day, false);
+ fputc(' ', f);
+ format_chain(f, 2, c->hour, false);
+ fputc(':', f);
+ format_chain(f, 2, c->minute, false);
+ fputc(':', f);
+ format_chain(f, 2, c->microsecond, true);
+
+ if (c->utc)
+ fputs(" UTC", f);
+ else if (c->timezone != NULL) {
+ fputc(' ', f);
+ fputs(c->timezone, f);
+ } else if (IN_SET(c->dst, 0, 1)) {
+
+ /* If daylight saving is explicitly on or off, let's show the used timezone. */
+
+ tzset();
+
+ if (!isempty(tzname[c->dst])) {
+ fputc(' ', f);
+ fputs(tzname[c->dst], f);
+ }
+ }
+
+ r = fflush_and_check(f);
+ if (r < 0) {
+ free(buf);
+ fclose(f);
+ return r;
+ }
+
+ fclose(f);
+
+ *p = buf;
+ return 0;
+}
+
+static int parse_weekdays(const char **p, CalendarSpec *c) {
+ static const struct {
+ const char *name;
+ const int nr;
+ } day_nr[] = {
+ { "Monday", 0 },
+ { "Mon", 0 },
+ { "Tuesday", 1 },
+ { "Tue", 1 },
+ { "Wednesday", 2 },
+ { "Wed", 2 },
+ { "Thursday", 3 },
+ { "Thu", 3 },
+ { "Friday", 4 },
+ { "Fri", 4 },
+ { "Saturday", 5 },
+ { "Sat", 5 },
+ { "Sunday", 6 },
+ { "Sun", 6 }
+ };
+
+ int l = -1;
+ bool first = true;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ for (;;) {
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(day_nr); i++) {
+ size_t skip;
+
+ if (!startswith_no_case(*p, day_nr[i].name))
+ continue;
+
+ skip = strlen(day_nr[i].name);
+
+ if (!IN_SET((*p)[skip], 0, '-', '.', ',', ' '))
+ return -EINVAL;
+
+ c->weekdays_bits |= 1 << day_nr[i].nr;
+
+ if (l >= 0) {
+ int j;
+
+ if (l > day_nr[i].nr)
+ return -EINVAL;
+
+ for (j = l + 1; j < day_nr[i].nr; j++)
+ c->weekdays_bits |= 1 << j;
+ }
+
+ *p += skip;
+ break;
+ }
+
+ /* Couldn't find this prefix, so let's assume the
+ weekday was not specified and let's continue with
+ the date */
+ if (i >= ELEMENTSOF(day_nr))
+ return first ? 0 : -EINVAL;
+
+ /* We reached the end of the string */
+ if (**p == 0)
+ return 0;
+
+ /* We reached the end of the weekday spec part */
+ if (**p == ' ') {
+ *p += strspn(*p, " ");
+ return 0;
+ }
+
+ if (**p == '.') {
+ if (l >= 0)
+ return -EINVAL;
+
+ if ((*p)[1] != '.')
+ return -EINVAL;
+
+ l = day_nr[i].nr;
+ *p += 2;
+
+ /* Support ranges with "-" for backwards compatibility */
+ } else if (**p == '-') {
+ if (l >= 0)
+ return -EINVAL;
+
+ l = day_nr[i].nr;
+ *p += 1;
+
+ } else if (**p == ',') {
+ l = -1;
+ *p += 1;
+ }
+
+ /* Allow a trailing comma but not an open range */
+ if (IN_SET(**p, 0, ' ')) {
+ *p += strspn(*p, " ");
+ return l < 0 ? 0 : -EINVAL;
+ }
+
+ first = false;
+ }
+}
+
+static int parse_one_number(const char *p, const char **e, unsigned long *ret) {
+ char *ee = NULL;
+ unsigned long value;
+
+ errno = 0;
+ value = strtoul(p, &ee, 10);
+ if (errno > 0)
+ return -errno;
+ if (ee == p)
+ return -EINVAL;
+
+ *ret = value;
+ *e = ee;
+ return 0;
+}
+
+static int parse_component_decimal(const char **p, bool usec, int *res) {
+ unsigned long value;
+ const char *e = NULL;
+ int r;
+
+ if (!isdigit(**p))
+ return -EINVAL;
+
+ r = parse_one_number(*p, &e, &value);
+ if (r < 0)
+ return r;
+
+ if (usec) {
+ if (value * USEC_PER_SEC / USEC_PER_SEC != value)
+ return -ERANGE;
+
+ value *= USEC_PER_SEC;
+
+ /* One "." is a decimal point, but ".." is a range separator */
+ if (e[0] == '.' && e[1] != '.') {
+ unsigned add;
+
+ e++;
+ r = parse_fractional_part_u(&e, 6, &add);
+ if (r < 0)
+ return r;
+
+ if (add + value < value)
+ return -ERANGE;
+ value += add;
+ }
+ }
+
+ if (value > INT_MAX)
+ return -ERANGE;
+
+ *p = e;
+ *res = value;
+
+ return 0;
+}
+
+static int const_chain(int value, CalendarComponent **c) {
+ CalendarComponent *cc = NULL;
+
+ assert(c);
+
+ cc = new0(CalendarComponent, 1);
+ if (!cc)
+ return -ENOMEM;
+
+ cc->start = value;
+ cc->stop = -1;
+ cc->repeat = 0;
+ cc->next = *c;
+
+ *c = cc;
+
+ return 0;
+}
+
+static int calendarspec_from_time_t(CalendarSpec *c, time_t time) {
+ struct tm tm;
+ CalendarComponent *year = NULL, *month = NULL, *day = NULL, *hour = NULL, *minute = NULL, *us = NULL;
+ int r;
+
+ if (!gmtime_r(&time, &tm))
+ return -ERANGE;
+
+ r = const_chain(tm.tm_year + 1900, &year);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_mon + 1, &month);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_mday, &day);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_hour, &hour);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_min, &minute);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_sec * USEC_PER_SEC, &us);
+ if (r < 0)
+ return r;
+
+ c->utc = true;
+ c->year = year;
+ c->month = month;
+ c->day = day;
+ c->hour = hour;
+ c->minute = minute;
+ c->microsecond = us;
+ return 0;
+}
+
+static int prepend_component(const char **p, bool usec, unsigned nesting, CalendarComponent **c) {
+ int r, start, stop = -1, repeat = 0;
+ CalendarComponent *cc;
+ const char *e = *p;
+
+ assert(p);
+ assert(c);
+
+ if (nesting > CALENDARSPEC_COMPONENTS_MAX)
+ return -ENOBUFS;
+
+ r = parse_component_decimal(&e, usec, &start);
+ if (r < 0)
+ return r;
+
+ if (e[0] == '.' && e[1] == '.') {
+ e += 2;
+ r = parse_component_decimal(&e, usec, &stop);
+ if (r < 0)
+ return r;
+
+ repeat = usec ? USEC_PER_SEC : 1;
+ }
+
+ if (*e == '/') {
+ e++;
+ r = parse_component_decimal(&e, usec, &repeat);
+ if (r < 0)
+ return r;
+
+ if (repeat == 0)
+ return -ERANGE;
+ }
+
+ if (!IN_SET(*e, 0, ' ', ',', '-', '~', ':'))
+ return -EINVAL;
+
+ cc = new0(CalendarComponent, 1);
+ if (!cc)
+ return -ENOMEM;
+
+ cc->start = start;
+ cc->stop = stop;
+ cc->repeat = repeat;
+ cc->next = *c;
+
+ *p = e;
+ *c = cc;
+
+ if (*e ==',') {
+ *p += 1;
+ return prepend_component(p, usec, nesting + 1, c);
+ }
+
+ return 0;
+}
+
+static int parse_chain(const char **p, bool usec, CalendarComponent **c) {
+ const char *t;
+ CalendarComponent *cc = NULL;
+ int r;
+
+ assert(p);
+ assert(c);
+
+ t = *p;
+
+ if (t[0] == '*') {
+ if (usec) {
+ r = const_chain(0, c);
+ if (r < 0)
+ return r;
+ (*c)->repeat = USEC_PER_SEC;
+ } else
+ *c = NULL;
+
+ *p = t + 1;
+ return 0;
+ }
+
+ r = prepend_component(&t, usec, 0, &cc);
+ if (r < 0) {
+ free_chain(cc);
+ return r;
+ }
+
+ *p = t;
+ *c = cc;
+ return 0;
+}
+
+static int parse_date(const char **p, CalendarSpec *c) {
+ const char *t;
+ int r;
+ CalendarComponent *first, *second, *third;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ t = *p;
+
+ if (*t == 0)
+ return 0;
+
+ /* @TIMESTAMP — UNIX time in seconds since the epoch */
+ if (*t == '@') {
+ unsigned long value;
+ time_t time;
+
+ r = parse_one_number(t + 1, &t, &value);
+ if (r < 0)
+ return r;
+
+ time = value;
+ if ((unsigned long) time != value)
+ return -ERANGE;
+
+ r = calendarspec_from_time_t(c, time);
+ if (r < 0)
+ return r;
+
+ *p = t;
+ return 1; /* finito, don't parse H:M:S after that */
+ }
+
+ r = parse_chain(&t, false, &first);
+ if (r < 0)
+ return r;
+
+ /* Already the end? A ':' as separator? In that case this was a time, not a date */
+ if (IN_SET(*t, 0, ':')) {
+ free_chain(first);
+ return 0;
+ }
+
+ if (*t == '~')
+ c->end_of_month = true;
+ else if (*t != '-') {
+ free_chain(first);
+ return -EINVAL;
+ }
+
+ t++;
+ r = parse_chain(&t, false, &second);
+ if (r < 0) {
+ free_chain(first);
+ return r;
+ }
+
+ /* Got two parts, hence it's month and day */
+ if (IN_SET(*t, 0, ' ')) {
+ *p = t + strspn(t, " ");
+ c->month = first;
+ c->day = second;
+ return 0;
+ } else if (c->end_of_month) {
+ free_chain(first);
+ free_chain(second);
+ return -EINVAL;
+ }
+
+ if (*t == '~')
+ c->end_of_month = true;
+ else if (*t != '-') {
+ free_chain(first);
+ free_chain(second);
+ return -EINVAL;
+ }
+
+ t++;
+ r = parse_chain(&t, false, &third);
+ if (r < 0) {
+ free_chain(first);
+ free_chain(second);
+ return r;
+ }
+
+ /* Got three parts, hence it is year, month and day */
+ if (IN_SET(*t, 0, ' ')) {
+ *p = t + strspn(t, " ");
+ c->year = first;
+ c->month = second;
+ c->day = third;
+ return 0;
+ }
+
+ free_chain(first);
+ free_chain(second);
+ free_chain(third);
+ return -EINVAL;
+}
+
+static int parse_calendar_time(const char **p, CalendarSpec *c) {
+ CalendarComponent *h = NULL, *m = NULL, *s = NULL;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ t = *p;
+
+ /* If no time is specified at all, then this means 00:00:00 */
+ if (*t == 0)
+ goto null_hour;
+
+ r = parse_chain(&t, false, &h);
+ if (r < 0)
+ goto fail;
+
+ if (*t != ':') {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ t++;
+ r = parse_chain(&t, false, &m);
+ if (r < 0)
+ goto fail;
+
+ /* Already at the end? Then it's hours and minutes, and seconds are 0 */
+ if (*t == 0)
+ goto null_second;
+
+ if (*t != ':') {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ t++;
+ r = parse_chain(&t, true, &s);
+ if (r < 0)
+ goto fail;
+
+ /* At the end? Then it's hours, minutes and seconds */
+ if (*t == 0)
+ goto finish;
+
+ r = -EINVAL;
+ goto fail;
+
+null_hour:
+ r = const_chain(0, &h);
+ if (r < 0)
+ goto fail;
+
+ r = const_chain(0, &m);
+ if (r < 0)
+ goto fail;
+
+null_second:
+ r = const_chain(0, &s);
+ if (r < 0)
+ goto fail;
+
+finish:
+ *p = t;
+ c->hour = h;
+ c->minute = m;
+ c->microsecond = s;
+
+ return 0;
+
+fail:
+ free_chain(h);
+ free_chain(m);
+ free_chain(s);
+ return r;
+}
+
+int calendar_spec_from_string(const char *p, CalendarSpec **spec) {
+ const char *utc;
+ _cleanup_(calendar_spec_freep) CalendarSpec *c = NULL;
+ int r;
+
+ assert(p);
+ assert(spec);
+
+ c = new0(CalendarSpec, 1);
+ if (!c)
+ return -ENOMEM;
+ c->dst = -1;
+ c->timezone = NULL;
+
+ utc = endswith_no_case(p, " UTC");
+ if (utc) {
+ c->utc = true;
+ p = strndupa(p, utc - p);
+ } else {
+ const char *e = NULL;
+ int j;
+
+ tzset();
+
+ /* Check if the local timezone was specified? */
+ for (j = 0; j <= 1; j++) {
+ if (isempty(tzname[j]))
+ continue;
+
+ e = endswith_no_case(p, tzname[j]);
+ if (!e)
+ continue;
+ if (e == p)
+ continue;
+ if (e[-1] != ' ')
+ continue;
+
+ break;
+ }
+
+ /* Found one of the two timezones specified? */
+ if (IN_SET(j, 0, 1)) {
+ p = strndupa(p, e - p - 1);
+ c->dst = j;
+ } else {
+ const char *last_space;
+
+ last_space = strrchr(p, ' ');
+ if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG)) {
+ c->timezone = strdup(last_space + 1);
+ if (!c->timezone)
+ return -ENOMEM;
+
+ p = strndupa(p, last_space - p);
+ }
+ }
+ }
+
+ if (isempty(p))
+ return -EINVAL;
+
+ if (strcaseeq(p, "minutely")) {
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "hourly")) {
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "daily")) {
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "monthly")) {
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "annually") ||
+ strcaseeq(p, "yearly") ||
+ strcaseeq(p, "anually") /* backwards compatibility */ ) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "weekly")) {
+
+ c->weekdays_bits = 1;
+
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "quarterly")) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(4, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(7, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(10, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "biannually") ||
+ strcaseeq(p, "bi-annually") ||
+ strcaseeq(p, "semiannually") ||
+ strcaseeq(p, "semi-annually")) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(7, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else {
+ r = parse_weekdays(&p, c);
+ if (r < 0)
+ return r;
+
+ r = parse_date(&p, c);
+ if (r < 0)
+ return r;
+
+ if (r == 0) {
+ r = parse_calendar_time(&p, c);
+ if (r < 0)
+ return r;
+ }
+
+ if (*p != 0)
+ return -EINVAL;
+ }
+
+ r = calendar_spec_normalize(c);
+ if (r < 0)
+ return r;
+
+ if (!calendar_spec_valid(c))
+ return -EINVAL;
+
+ *spec = TAKE_PTR(c);
+ return 0;
+}
+
+static int find_end_of_month(struct tm *tm, bool utc, int day) {
+ struct tm t = *tm;
+
+ t.tm_mon++;
+ t.tm_mday = 1 - day;
+
+ if (mktime_or_timegm(&t, utc) < 0 ||
+ t.tm_mon != tm->tm_mon)
+ return -1;
+
+ return t.tm_mday;
+}
+
+static int find_matching_component(const CalendarSpec *spec, const CalendarComponent *c,
+ struct tm *tm, int *val) {
+ const CalendarComponent *p = c;
+ int start, stop, d = -1;
+ bool d_set = false;
+ int r;
+
+ assert(val);
+
+ if (!c)
+ return 0;
+
+ while (c) {
+ start = c->start;
+ stop = c->stop;
+
+ if (spec->end_of_month && p == spec->day) {
+ start = find_end_of_month(tm, spec->utc, start);
+ stop = find_end_of_month(tm, spec->utc, stop);
+
+ if (stop > 0)
+ SWAP_TWO(start, stop);
+ }
+
+ if (start >= *val) {
+
+ if (!d_set || start < d) {
+ d = start;
+ d_set = true;
+ }
+
+ } else if (c->repeat > 0) {
+ int k;
+
+ k = start + c->repeat * DIV_ROUND_UP(*val - start, c->repeat);
+
+ if ((!d_set || k < d) && (stop < 0 || k <= stop)) {
+ d = k;
+ d_set = true;
+ }
+ }
+
+ c = c->next;
+ }
+
+ if (!d_set)
+ return -ENOENT;
+
+ r = *val != d;
+ *val = d;
+ return r;
+}
+
+static bool tm_out_of_bounds(const struct tm *tm, bool utc) {
+ struct tm t;
+ assert(tm);
+
+ t = *tm;
+
+ if (mktime_or_timegm(&t, utc) < 0)
+ return true;
+
+ /*
+ * Set an upper bound on the year so impossible dates like "*-02-31"
+ * don't cause find_next() to loop forever. tm_year contains years
+ * since 1900, so adjust it accordingly.
+ */
+ if (tm->tm_year + 1900 > MAX_YEAR)
+ return true;
+
+ /* Did any normalization take place? If so, it was out of bounds before */
+ return
+ t.tm_year != tm->tm_year ||
+ t.tm_mon != tm->tm_mon ||
+ t.tm_mday != tm->tm_mday ||
+ t.tm_hour != tm->tm_hour ||
+ t.tm_min != tm->tm_min ||
+ t.tm_sec != tm->tm_sec;
+}
+
+static bool matches_weekday(int weekdays_bits, const struct tm *tm, bool utc) {
+ struct tm t;
+ int k;
+
+ if (weekdays_bits < 0 || weekdays_bits >= BITS_WEEKDAYS)
+ return true;
+
+ t = *tm;
+ if (mktime_or_timegm(&t, utc) < 0)
+ return false;
+
+ k = t.tm_wday == 0 ? 6 : t.tm_wday - 1;
+ return (weekdays_bits & (1 << k));
+}
+
+static int find_next(const CalendarSpec *spec, struct tm *tm, usec_t *usec) {
+ struct tm c;
+ int tm_usec;
+ int r;
+
+ assert(spec);
+ assert(tm);
+
+ c = *tm;
+ tm_usec = *usec;
+
+ for (;;) {
+ /* Normalize the current date */
+ (void) mktime_or_timegm(&c, spec->utc);
+ c.tm_isdst = spec->dst;
+
+ c.tm_year += 1900;
+ r = find_matching_component(spec, spec->year, &c, &c.tm_year);
+ c.tm_year -= 1900;
+
+ if (r > 0) {
+ c.tm_mon = 0;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ }
+ if (r < 0)
+ return r;
+ if (tm_out_of_bounds(&c, spec->utc))
+ return -ENOENT;
+
+ c.tm_mon += 1;
+ r = find_matching_component(spec, spec->month, &c, &c.tm_mon);
+ c.tm_mon -= 1;
+
+ if (r > 0) {
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ }
+ if (r < 0 || tm_out_of_bounds(&c, spec->utc)) {
+ c.tm_year++;
+ c.tm_mon = 0;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ r = find_matching_component(spec, spec->day, &c, &c.tm_mday);
+ if (r > 0)
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ if (r < 0 || tm_out_of_bounds(&c, spec->utc)) {
+ c.tm_mon++;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ if (!matches_weekday(spec->weekdays_bits, &c, spec->utc)) {
+ c.tm_mday++;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ r = find_matching_component(spec, spec->hour, &c, &c.tm_hour);
+ if (r > 0)
+ c.tm_min = c.tm_sec = tm_usec = 0;
+ if (r < 0 || tm_out_of_bounds(&c, spec->utc)) {
+ c.tm_mday++;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ r = find_matching_component(spec, spec->minute, &c, &c.tm_min);
+ if (r > 0)
+ c.tm_sec = tm_usec = 0;
+ if (r < 0 || tm_out_of_bounds(&c, spec->utc)) {
+ c.tm_hour++;
+ c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ c.tm_sec = c.tm_sec * USEC_PER_SEC + tm_usec;
+ r = find_matching_component(spec, spec->microsecond, &c, &c.tm_sec);
+ tm_usec = c.tm_sec % USEC_PER_SEC;
+ c.tm_sec /= USEC_PER_SEC;
+
+ if (r < 0 || tm_out_of_bounds(&c, spec->utc)) {
+ c.tm_min++;
+ c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ *tm = c;
+ *usec = tm_usec;
+ return 0;
+ }
+}
+
+static int calendar_spec_next_usec_impl(const CalendarSpec *spec, usec_t usec, usec_t *next) {
+ struct tm tm;
+ time_t t;
+ int r;
+ usec_t tm_usec;
+
+ assert(spec);
+ assert(next);
+
+ if (usec > USEC_TIMESTAMP_FORMATTABLE_MAX)
+ return -EINVAL;
+
+ usec++;
+ t = (time_t) (usec / USEC_PER_SEC);
+ assert_se(localtime_or_gmtime_r(&t, &tm, spec->utc));
+ tm_usec = usec % USEC_PER_SEC;
+
+ r = find_next(spec, &tm, &tm_usec);
+ if (r < 0)
+ return r;
+
+ t = mktime_or_timegm(&tm, spec->utc);
+ if (t < 0)
+ return -EINVAL;
+
+ *next = (usec_t) t * USEC_PER_SEC + tm_usec;
+ return 0;
+}
+
+typedef struct SpecNextResult {
+ usec_t next;
+ int return_value;
+} SpecNextResult;
+
+int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *next) {
+ SpecNextResult *shared, tmp;
+ int r;
+
+ if (isempty(spec->timezone))
+ return calendar_spec_next_usec_impl(spec, usec, next);
+
+ shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+ if (shared == MAP_FAILED)
+ return negative_errno();
+
+ r = safe_fork("(sd-calendar)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT, NULL);
+ if (r < 0) {
+ (void) munmap(shared, sizeof *shared);
+ return r;
+ }
+ if (r == 0) {
+ if (setenv("TZ", spec->timezone, 1) != 0) {
+ shared->return_value = negative_errno();
+ _exit(EXIT_FAILURE);
+ }
+
+ tzset();
+
+ shared->return_value = calendar_spec_next_usec_impl(spec, usec, &shared->next);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ tmp = *shared;
+ if (munmap(shared, sizeof *shared) < 0)
+ return negative_errno();
+
+ if (tmp.return_value == 0)
+ *next = tmp.next;
+
+ return tmp.return_value;
+}
diff --git a/src/shared/calendarspec.h b/src/shared/calendarspec.h
new file mode 100644
index 0000000..3bf8a39
--- /dev/null
+++ b/src/shared/calendarspec.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* A structure for specifying (possibly repetitive) points in calendar
+ * time, a la cron */
+
+#include <stdbool.h>
+
+#include "time-util.h"
+#include "util.h"
+
+typedef struct CalendarComponent {
+ int start;
+ int stop;
+ int repeat;
+
+ struct CalendarComponent *next;
+} CalendarComponent;
+
+typedef struct CalendarSpec {
+ int weekdays_bits;
+ bool end_of_month;
+ bool utc;
+ int dst;
+ char *timezone;
+
+ CalendarComponent *year;
+ CalendarComponent *month;
+ CalendarComponent *day;
+
+ CalendarComponent *hour;
+ CalendarComponent *minute;
+ CalendarComponent *microsecond;
+} CalendarSpec;
+
+CalendarSpec* calendar_spec_free(CalendarSpec *c);
+
+int calendar_spec_normalize(CalendarSpec *spec);
+bool calendar_spec_valid(CalendarSpec *spec);
+
+int calendar_spec_to_string(const CalendarSpec *spec, char **p);
+int calendar_spec_from_string(const char *p, CalendarSpec **spec);
+
+int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *next);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(CalendarSpec*, calendar_spec_free);
diff --git a/src/shared/cgroup-show.c b/src/shared/cgroup-show.c
new file mode 100644
index 0000000..61df751
--- /dev/null
+++ b/src/shared/cgroup-show.c
@@ -0,0 +1,354 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "output-mode.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+
+static void show_pid_array(
+ pid_t pids[],
+ unsigned n_pids,
+ const char *prefix,
+ unsigned n_columns,
+ bool extra,
+ bool more,
+ OutputFlags flags) {
+
+ unsigned i, j, pid_width;
+
+ if (n_pids == 0)
+ return;
+
+ typesafe_qsort(pids, n_pids, pid_compare_func);
+
+ /* Filter duplicates */
+ for (j = 0, i = 1; i < n_pids; i++) {
+ if (pids[i] == pids[j])
+ continue;
+ pids[++j] = pids[i];
+ }
+ n_pids = j + 1;
+ pid_width = DECIMAL_STR_WIDTH(pids[j]);
+
+ if (flags & OUTPUT_FULL_WIDTH)
+ n_columns = 0;
+ else {
+ if (n_columns > pid_width+2)
+ n_columns -= pid_width+2;
+ else
+ n_columns = 20;
+ }
+ for (i = 0; i < n_pids; i++) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) get_process_cmdline(pids[i], n_columns, true, &t);
+
+ if (extra)
+ printf("%s%s ", prefix, special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET));
+ else
+ printf("%s%s", prefix, special_glyph(((more || i < n_pids-1) ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT)));
+
+ printf("%*"PID_PRI" %s\n", pid_width, pids[i], strna(t));
+ }
+}
+
+static int show_cgroup_one_by_path(
+ const char *path,
+ const char *prefix,
+ unsigned n_columns,
+ bool more,
+ OutputFlags flags) {
+
+ char *fn;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t n = 0, n_allocated = 0;
+ _cleanup_free_ pid_t *pids = NULL;
+ _cleanup_free_ char *p = NULL;
+ pid_t pid;
+ int r;
+
+ r = cg_mangle_path(path, &p);
+ if (r < 0)
+ return r;
+
+ fn = strjoina(p, "/cgroup.procs");
+ f = fopen(fn, "re");
+ if (!f)
+ return -errno;
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ if (!(flags & OUTPUT_KERNEL_THREADS) && is_kernel_thread(pid) > 0)
+ continue;
+
+ if (!GREEDY_REALLOC(pids, n_allocated, n + 1))
+ return -ENOMEM;
+
+ assert(n < n_allocated);
+ pids[n++] = pid;
+ }
+
+ if (r < 0)
+ return r;
+
+ show_pid_array(pids, n, prefix, n_columns, false, more, flags);
+
+ return 0;
+}
+
+int show_cgroup_by_path(
+ const char *path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ _cleanup_free_ char *fn = NULL, *p1 = NULL, *last = NULL, *p2 = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ char *gn = NULL;
+ bool shown_pids = false;
+ int r;
+
+ assert(path);
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ r = cg_mangle_path(path, &fn);
+ if (r < 0)
+ return r;
+
+ d = opendir(fn);
+ if (!d)
+ return -errno;
+
+ while ((r = cg_read_subgroup(d, &gn)) > 0) {
+ _cleanup_free_ char *k = NULL;
+
+ k = strjoin(fn, "/", gn);
+ free(gn);
+ if (!k)
+ return -ENOMEM;
+
+ if (!(flags & OUTPUT_SHOW_ALL) && cg_is_empty_recursive(NULL, k) > 0)
+ continue;
+
+ if (!shown_pids) {
+ show_cgroup_one_by_path(path, prefix, n_columns, true, flags);
+ shown_pids = true;
+ }
+
+ if (last) {
+ printf("%s%s%s\n", prefix, special_glyph(SPECIAL_GLYPH_TREE_BRANCH), cg_unescape(basename(last)));
+
+ if (!p1) {
+ p1 = strappend(prefix, special_glyph(SPECIAL_GLYPH_TREE_VERTICAL));
+ if (!p1)
+ return -ENOMEM;
+ }
+
+ show_cgroup_by_path(last, p1, n_columns-2, flags);
+ free(last);
+ }
+
+ last = TAKE_PTR(k);
+ }
+
+ if (r < 0)
+ return r;
+
+ if (!shown_pids)
+ show_cgroup_one_by_path(path, prefix, n_columns, !!last, flags);
+
+ if (last) {
+ printf("%s%s%s\n", prefix, special_glyph(SPECIAL_GLYPH_TREE_RIGHT), cg_unescape(basename(last)));
+
+ if (!p2) {
+ p2 = strappend(prefix, " ");
+ if (!p2)
+ return -ENOMEM;
+ }
+
+ show_cgroup_by_path(last, p2, n_columns-2, flags);
+ }
+
+ return 0;
+}
+
+int show_cgroup(const char *controller,
+ const char *path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(path);
+
+ r = cg_get_path(controller, path, NULL, &p);
+ if (r < 0)
+ return r;
+
+ return show_cgroup_by_path(p, prefix, n_columns, flags);
+}
+
+static int show_extra_pids(
+ const char *controller,
+ const char *path,
+ const char *prefix,
+ unsigned n_columns,
+ const pid_t pids[],
+ unsigned n_pids,
+ OutputFlags flags) {
+
+ _cleanup_free_ pid_t *copy = NULL;
+ unsigned i, j;
+ int r;
+
+ assert(path);
+
+ if (n_pids <= 0)
+ return 0;
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ copy = new(pid_t, n_pids);
+ if (!copy)
+ return -ENOMEM;
+
+ for (i = 0, j = 0; i < n_pids; i++) {
+ _cleanup_free_ char *k = NULL;
+
+ r = cg_pid_get_path(controller, pids[i], &k);
+ if (r < 0)
+ return r;
+
+ if (path_startswith(k, path))
+ continue;
+
+ copy[j++] = pids[i];
+ }
+
+ show_pid_array(copy, j, prefix, n_columns, true, false, flags);
+
+ return 0;
+}
+
+int show_cgroup_and_extra(
+ const char *controller,
+ const char *path,
+ const char *prefix,
+ unsigned n_columns,
+ const pid_t extra_pids[],
+ unsigned n_extra_pids,
+ OutputFlags flags) {
+
+ int r;
+
+ assert(path);
+
+ r = show_cgroup(controller, path, prefix, n_columns, flags);
+ if (r < 0)
+ return r;
+
+ return show_extra_pids(controller, path, prefix, n_columns, extra_pids, n_extra_pids, flags);
+}
+
+int show_cgroup_get_unit_path_and_warn(
+ sd_bus *bus,
+ const char *unit,
+ char **ret) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ path = unit_dbus_path_from_name(unit);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ unit_dbus_interface_from_name(unit),
+ "ControlGroup",
+ &error,
+ ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query unit control group path: %s",
+ bus_error_message(&error, r));
+
+ return 0;
+}
+
+int show_cgroup_get_path_and_warn(
+ const char *machine,
+ const char *prefix,
+ char **ret) {
+
+ int r;
+ _cleanup_free_ char *root = NULL;
+
+ if (machine) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *unit = NULL;
+ const char *m;
+
+ m = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, m, "SCOPE", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load machine data: %m");
+
+ r = bus_connect_transport_systemd(BUS_TRANSPORT_LOCAL, NULL, false, &bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create bus connection: %m");
+
+ r = show_cgroup_get_unit_path_and_warn(bus, unit, &root);
+ if (r < 0)
+ return r;
+ } else {
+ r = cg_get_root_path(&root);
+ if (r == -ENOMEDIUM)
+ return log_error_errno(r, "Failed to get root control group path.\n"
+ "No cgroup filesystem mounted on /sys/fs/cgroup");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to get root control group path: %m");
+ }
+
+ if (prefix) {
+ char *t;
+
+ t = strjoin(root, prefix);
+ if (!t)
+ return log_oom();
+
+ *ret = t;
+ } else
+ *ret = TAKE_PTR(root);
+
+ return 0;
+}
diff --git a/src/shared/cgroup-show.h b/src/shared/cgroup-show.h
new file mode 100644
index 0000000..3593e9d
--- /dev/null
+++ b/src/shared/cgroup-show.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-bus.h"
+
+#include "logs-show.h"
+#include "output-mode.h"
+
+int show_cgroup_by_path(const char *path, const char *prefix, unsigned columns, OutputFlags flags);
+int show_cgroup(const char *controller, const char *path, const char *prefix, unsigned columns, OutputFlags flags);
+
+int show_cgroup_and_extra(const char *controller, const char *path, const char *prefix, unsigned n_columns, const pid_t extra_pids[], unsigned n_extra_pids, OutputFlags flags);
+
+int show_cgroup_get_unit_path_and_warn(
+ sd_bus *bus,
+ const char *unit,
+ char **ret);
+int show_cgroup_get_path_and_warn(
+ const char *machine,
+ const char *prefix,
+ char **ret);
diff --git a/src/shared/clean-ipc.c b/src/shared/clean-ipc.c
new file mode 100644
index 0000000..46fa680
--- /dev/null
+++ b/src/shared/clean-ipc.c
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <mqueue.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ipc.h>
+#include <sys/msg.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "clean-ipc.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static bool match_uid_gid(uid_t subject_uid, gid_t subject_gid, uid_t delete_uid, gid_t delete_gid) {
+
+ if (uid_is_valid(delete_uid) && subject_uid == delete_uid)
+ return true;
+
+ if (gid_is_valid(delete_gid) && subject_gid == delete_gid)
+ return true;
+
+ return false;
+}
+
+static int clean_sysvipc_shm(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/shm", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/shm: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ unsigned n_attached;
+ pid_t cpid, lpid;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ int shmid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to read /proc/sysvipc/shm: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u " PID_FMT " " PID_FMT " %u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &shmid, &cpid, &lpid, &n_attached, &uid, &gid, &cuid, &cgid) != 8)
+ continue;
+
+ if (n_attached > 0)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (shmctl(shmid, IPC_RMID, NULL) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV shared memory segment %i: %m",
+ shmid);
+ } else {
+ log_debug("Removed SysV shared memory segment %i.", shmid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_sysvipc_sem(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/sem", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/sem: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ int semid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/sysvipc/sem: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &semid, &uid, &gid, &cuid, &cgid) != 5)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (semctl(semid, 0, IPC_RMID) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV semaphores object %i: %m",
+ semid);
+ } else {
+ log_debug("Removed SysV semaphore %i.", semid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_sysvipc_msg(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/msg", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/msg: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ pid_t cpid, lpid;
+ int msgid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/sysvipc/msg: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u %*u " PID_FMT " " PID_FMT " " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &msgid, &cpid, &lpid, &uid, &gid, &cuid, &cgid) != 7)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (msgctl(msgid, IPC_RMID, NULL) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV message queue %i: %m",
+ msgid);
+ } else {
+ log_debug("Removed SysV message queue %i.", msgid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_posix_shm_internal(DIR *dir, uid_t uid, gid_t gid, bool rm) {
+ struct dirent *de;
+ int ret = 0, r;
+
+ assert(dir);
+
+ FOREACH_DIRENT_ALL(de, dir, goto fail) {
+ struct stat st;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to stat() POSIX shared memory segment %s: %m", de->d_name);
+ continue;
+ }
+
+ if (S_ISDIR(st.st_mode)) {
+ _cleanup_closedir_ DIR *kid;
+
+ kid = xopendirat(dirfd(dir), de->d_name, O_NOFOLLOW|O_NOATIME);
+ if (!kid) {
+ if (errno != ENOENT)
+ ret = log_warning_errno(errno, "Failed to enter shared memory directory %s: %m", de->d_name);
+ } else {
+ r = clean_posix_shm_internal(kid, uid, gid, rm);
+ if (r < 0)
+ ret = r;
+ }
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (unlinkat(dirfd(dir), de->d_name, AT_REMOVEDIR) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to remove POSIX shared memory directory %s: %m", de->d_name);
+ } else {
+ log_debug("Removed POSIX shared memory directory %s", de->d_name);
+ if (ret == 0)
+ ret = 1;
+ }
+ } else {
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (unlinkat(dirfd(dir), de->d_name, 0) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to remove POSIX shared memory segment %s: %m", de->d_name);
+ } else {
+ log_debug("Removed POSIX shared memory segment %s", de->d_name);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+ }
+
+ return ret;
+
+fail:
+ return log_warning_errno(errno, "Failed to read /dev/shm: %m");
+}
+
+static int clean_posix_shm(uid_t uid, gid_t gid, bool rm) {
+ _cleanup_closedir_ DIR *dir = NULL;
+
+ dir = opendir("/dev/shm");
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /dev/shm: %m");
+ }
+
+ return clean_posix_shm_internal(dir, uid, gid, rm);
+}
+
+static int clean_posix_mq(uid_t uid, gid_t gid, bool rm) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *de;
+ int ret = 0;
+
+ dir = opendir("/dev/mqueue");
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /dev/mqueue: %m");
+ }
+
+ FOREACH_DIRENT_ALL(de, dir, goto fail) {
+ struct stat st;
+ char fn[1+strlen(de->d_name)+1];
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to stat() MQ segment %s: %m",
+ de->d_name);
+ continue;
+ }
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ fn[0] = '/';
+ strcpy(fn+1, de->d_name);
+
+ if (mq_unlink(fn) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to unlink POSIX message queue %s: %m",
+ fn);
+ } else {
+ log_debug("Removed POSIX message queue %s", fn);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+
+fail:
+ return log_warning_errno(errno, "Failed to read /dev/mqueue: %m");
+}
+
+int clean_ipc_internal(uid_t uid, gid_t gid, bool rm) {
+ int ret = 0, r;
+
+ /* If 'rm' is true, clean all IPC objects owned by either the specified UID or the specified GID. Return the
+ * last error encountered or == 0 if no matching IPC objects have been found or > 0 if matching IPC objects
+ * have been found and have been removed.
+ *
+ * If 'rm' is false, just search for IPC objects owned by either the specified UID or the specified GID. In
+ * this case we return < 0 on error, > 0 if we found a matching object, == 0 if we didn't.
+ *
+ * As special rule: if UID/GID is specified as root we'll silently not clean up things, and always claim that
+ * there are IPC objects for it. */
+
+ if (uid == 0) {
+ if (!rm)
+ return 1;
+
+ uid = UID_INVALID;
+ }
+ if (gid == 0) {
+ if (!rm)
+ return 1;
+
+ gid = GID_INVALID;
+ }
+
+ /* Anything to do? */
+ if (!uid_is_valid(uid) && !gid_is_valid(gid))
+ return 0;
+
+ r = clean_sysvipc_shm(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_sysvipc_sem(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_sysvipc_msg(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_posix_shm(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_posix_mq(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ return ret;
+}
+
+int clean_ipc_by_uid(uid_t uid) {
+ return clean_ipc_internal(uid, GID_INVALID, true);
+}
+
+int clean_ipc_by_gid(gid_t gid) {
+ return clean_ipc_internal(UID_INVALID, gid, true);
+}
diff --git a/src/shared/clean-ipc.h b/src/shared/clean-ipc.h
new file mode 100644
index 0000000..eaff47d
--- /dev/null
+++ b/src/shared/clean-ipc.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+#include "user-util.h"
+
+int clean_ipc_internal(uid_t uid, gid_t gid, bool rm);
+
+/* Remove all IPC objects owned by the specified UID or GID */
+int clean_ipc_by_uid(uid_t uid);
+int clean_ipc_by_gid(gid_t gid);
+
+/* Check if any IPC object owned by the specified UID or GID exists, returns > 0 if so, == 0 if not */
+static inline int search_ipc(uid_t uid, gid_t gid) {
+ return clean_ipc_internal(uid, gid, false);
+}
diff --git a/src/shared/clock-util.c b/src/shared/clock-util.c
new file mode 100644
index 0000000..1877a81
--- /dev/null
+++ b/src/shared/clock-util.c
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <time.h>
+#include <linux/rtc.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "alloc-util.h"
+#include "clock-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "string-util.h"
+#include "util.h"
+
+int clock_get_hwclock(struct tm *tm) {
+ _cleanup_close_ int fd = -1;
+
+ assert(tm);
+
+ fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ /* This leaves the timezone fields of struct tm
+ * uninitialized! */
+ if (ioctl(fd, RTC_RD_TIME, tm) < 0)
+ return -errno;
+
+ /* We don't know daylight saving, so we reset this in order not
+ * to confuse mktime(). */
+ tm->tm_isdst = -1;
+
+ return 0;
+}
+
+int clock_set_hwclock(const struct tm *tm) {
+ _cleanup_close_ int fd = -1;
+
+ assert(tm);
+
+ fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (ioctl(fd, RTC_SET_TIME, tm) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int clock_is_localtime(const char* adjtime_path) {
+ _cleanup_fclose_ FILE *f;
+ int r;
+
+ if (!adjtime_path)
+ adjtime_path = "/etc/adjtime";
+
+ /*
+ * The third line of adjtime is "UTC" or "LOCAL" or nothing.
+ * # /etc/adjtime
+ * 0.0 0 0
+ * 0
+ * UTC
+ */
+ f = fopen(adjtime_path, "re");
+ if (f) {
+ _cleanup_free_ char *line = NULL;
+ unsigned i;
+
+ for (i = 0; i < 2; i++) { /* skip the first two lines */
+ r = read_line(f, LONG_LINE_MAX, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false; /* less than three lines → default to UTC */
+ }
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false; /* less than three lines → default to UTC */
+
+ return streq(line, "LOCAL");
+
+ } else if (errno != ENOENT)
+ return -errno;
+
+ /* adjtime not present → default to UTC */
+ return false;
+}
+
+int clock_set_timezone(int *min) {
+ const struct timeval *tv_null = NULL;
+ struct timespec ts;
+ struct tm tm;
+ int minutesdelta;
+ struct timezone tz;
+
+ assert_se(clock_gettime(CLOCK_REALTIME, &ts) == 0);
+ assert_se(localtime_r(&ts.tv_sec, &tm));
+ minutesdelta = tm.tm_gmtoff / 60;
+
+ tz.tz_minuteswest = -minutesdelta;
+ tz.tz_dsttime = 0; /* DST_NONE */
+
+ /*
+ * If the RTC does not run in UTC but in local time, the very first
+ * call to settimeofday() will set the kernel's timezone and will warp the
+ * system clock, so that it runs in UTC instead of the local time we
+ * have read from the RTC.
+ */
+ if (settimeofday(tv_null, &tz) < 0)
+ return negative_errno();
+
+ if (min)
+ *min = minutesdelta;
+ return 0;
+}
+
+int clock_reset_timewarp(void) {
+ const struct timeval *tv_null = NULL;
+ struct timezone tz;
+
+ tz.tz_minuteswest = 0;
+ tz.tz_dsttime = 0; /* DST_NONE */
+
+ /*
+ * The very first call to settimeofday() does time warp magic. Do a
+ * dummy call here, so the time warping is sealed and all later calls
+ * behave as expected.
+ */
+ if (settimeofday(tv_null, &tz) < 0)
+ return -errno;
+
+ return 0;
+}
+
+#define TIME_EPOCH_USEC ((usec_t) TIME_EPOCH * USEC_PER_SEC)
+
+int clock_apply_epoch(void) {
+ struct timespec ts;
+
+ if (now(CLOCK_REALTIME) >= TIME_EPOCH_USEC)
+ return 0;
+
+ if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, TIME_EPOCH_USEC)) < 0)
+ return -errno;
+
+ return 1;
+}
diff --git a/src/shared/clock-util.h b/src/shared/clock-util.h
new file mode 100644
index 0000000..b9db54e
--- /dev/null
+++ b/src/shared/clock-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <time.h>
+
+int clock_is_localtime(const char* adjtime_path);
+int clock_set_timezone(int *min);
+int clock_reset_timewarp(void);
+int clock_get_hwclock(struct tm *tm);
+int clock_set_hwclock(const struct tm *tm);
+int clock_apply_epoch(void);
diff --git a/src/shared/condition.c b/src/shared/condition.c
new file mode 100644
index 0000000..fb77966
--- /dev/null
+++ b/src/shared/condition.c
@@ -0,0 +1,733 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "apparmor-util.h"
+#include "architecture.h"
+#include "audit-util.h"
+#include "cap-list.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "efivars.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "glob-util.h"
+#include "hostname-util.h"
+#include "ima-util.h"
+#include "list.h"
+#include "macro.h"
+#include "mountpoint-util.h"
+#include "env-file.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "tomoyo-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate) {
+ Condition *c;
+ int r;
+
+ assert(type >= 0);
+ assert(type < _CONDITION_TYPE_MAX);
+ assert((!parameter) == (type == CONDITION_NULL));
+
+ c = new0(Condition, 1);
+ if (!c)
+ return NULL;
+
+ c->type = type;
+ c->trigger = trigger;
+ c->negate = negate;
+
+ r = free_and_strdup(&c->parameter, parameter);
+ if (r < 0) {
+ return mfree(c);
+ }
+
+ return c;
+}
+
+void condition_free(Condition *c) {
+ assert(c);
+
+ free(c->parameter);
+ free(c);
+}
+
+Condition* condition_free_list(Condition *first) {
+ Condition *c, *n;
+
+ LIST_FOREACH_SAFE(conditions, c, n, first)
+ condition_free(c);
+
+ return NULL;
+}
+
+static int condition_test_kernel_command_line(Condition *c) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ bool equal;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_KERNEL_COMMAND_LINE);
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ equal = strchr(c->parameter, '=');
+
+ for (p = line;;) {
+ _cleanup_free_ char *word = NULL;
+ bool found;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES|EXTRACT_RELAX);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (equal)
+ found = streq(word, c->parameter);
+ else {
+ const char *f;
+
+ f = startswith(word, c->parameter);
+ found = f && IN_SET(*f, 0, '=');
+ }
+
+ if (found)
+ return true;
+ }
+
+ return false;
+}
+
+static int condition_test_kernel_version(Condition *c) {
+ enum {
+ /* Listed in order of checking. Note that some comparators are prefixes of others, hence the longest
+ * should be listed first. */
+ LOWER_OR_EQUAL,
+ GREATER_OR_EQUAL,
+ LOWER,
+ GREATER,
+ EQUAL,
+ _ORDER_MAX,
+ };
+
+ static const char *const prefix[_ORDER_MAX] = {
+ [LOWER_OR_EQUAL] = "<=",
+ [GREATER_OR_EQUAL] = ">=",
+ [LOWER] = "<",
+ [GREATER] = ">",
+ [EQUAL] = "=",
+ };
+ const char *p = NULL;
+ struct utsname u;
+ size_t i;
+ int k;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_KERNEL_VERSION);
+
+ assert_se(uname(&u) >= 0);
+
+ for (i = 0; i < _ORDER_MAX; i++) {
+ p = startswith(c->parameter, prefix[i]);
+ if (p)
+ break;
+ }
+
+ /* No prefix? Then treat as glob string */
+ if (!p)
+ return fnmatch(skip_leading_chars(c->parameter, NULL), u.release, 0) == 0;
+
+ k = str_verscmp(u.release, skip_leading_chars(p, NULL));
+
+ switch (i) {
+
+ case LOWER:
+ return k < 0;
+
+ case LOWER_OR_EQUAL:
+ return k <= 0;
+
+ case EQUAL:
+ return k == 0;
+
+ case GREATER_OR_EQUAL:
+ return k >= 0;
+
+ case GREATER:
+ return k > 0;
+
+ default:
+ assert_not_reached("Can't compare");
+ }
+}
+
+static int condition_test_user(Condition *c) {
+ uid_t id;
+ int r;
+ _cleanup_free_ char *username = NULL;
+ const char *u;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_USER);
+
+ r = parse_uid(c->parameter, &id);
+ if (r >= 0)
+ return id == getuid() || id == geteuid();
+
+ if (streq("@system", c->parameter))
+ return uid_is_system(getuid()) || uid_is_system(geteuid());
+
+ username = getusername_malloc();
+ if (!username)
+ return -ENOMEM;
+
+ if (streq(username, c->parameter))
+ return 1;
+
+ if (getpid_cached() == 1)
+ return streq(c->parameter, "root");
+
+ u = c->parameter;
+ r = get_user_creds(&u, &id, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ return 0;
+
+ return id == getuid() || id == geteuid();
+}
+
+static int condition_test_control_group_controller(Condition *c) {
+ int r;
+ CGroupMask system_mask, wanted_mask = 0;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_CONTROL_GROUP_CONTROLLER);
+
+ r = cg_mask_supported(&system_mask);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine supported controllers: %m");
+
+ r = cg_mask_from_string(c->parameter, &wanted_mask);
+ if (r < 0 || wanted_mask <= 0) {
+ /* This won't catch the case that we have an unknown controller
+ * mixed in with valid ones -- these are only assessed on the
+ * validity of the valid controllers found. */
+ log_debug("Failed to parse cgroup string: %s", c->parameter);
+ return 1;
+ }
+
+ return FLAGS_SET(system_mask, wanted_mask);
+}
+
+static int condition_test_group(Condition *c) {
+ gid_t id;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_GROUP);
+
+ r = parse_gid(c->parameter, &id);
+ if (r >= 0)
+ return in_gid(id);
+
+ /* Avoid any NSS lookups if we are PID1 */
+ if (getpid_cached() == 1)
+ return streq(c->parameter, "root");
+
+ return in_group(c->parameter) > 0;
+}
+
+static int condition_test_virtualization(Condition *c) {
+ int b, v;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_VIRTUALIZATION);
+
+ if (streq(c->parameter, "private-users"))
+ return running_in_userns();
+
+ v = detect_virtualization();
+ if (v < 0)
+ return v;
+
+ /* First, compare with yes/no */
+ b = parse_boolean(c->parameter);
+ if (b >= 0)
+ return b == !!v;
+
+ /* Then, compare categorization */
+ if (streq(c->parameter, "vm"))
+ return VIRTUALIZATION_IS_VM(v);
+
+ if (streq(c->parameter, "container"))
+ return VIRTUALIZATION_IS_CONTAINER(v);
+
+ /* Finally compare id */
+ return v != VIRTUALIZATION_NONE && streq(c->parameter, virtualization_to_string(v));
+}
+
+static int condition_test_architecture(Condition *c) {
+ int a, b;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_ARCHITECTURE);
+
+ a = uname_architecture();
+ if (a < 0)
+ return a;
+
+ if (streq(c->parameter, "native"))
+ b = native_architecture();
+ else {
+ b = architecture_from_string(c->parameter);
+ if (b < 0) /* unknown architecture? Then it's definitely not ours */
+ return false;
+ }
+
+ return a == b;
+}
+
+static int condition_test_host(Condition *c) {
+ _cleanup_free_ char *h = NULL;
+ sd_id128_t x, y;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_HOST);
+
+ if (sd_id128_from_string(c->parameter, &x) >= 0) {
+
+ r = sd_id128_get_machine(&y);
+ if (r < 0)
+ return r;
+
+ return sd_id128_equal(x, y);
+ }
+
+ h = gethostname_malloc();
+ if (!h)
+ return -ENOMEM;
+
+ return fnmatch(c->parameter, h, FNM_CASEFOLD) == 0;
+}
+
+static int condition_test_ac_power(Condition *c) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_AC_POWER);
+
+ r = parse_boolean(c->parameter);
+ if (r < 0)
+ return r;
+
+ return (on_ac_power() != 0) == !!r;
+}
+
+static int condition_test_security(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_SECURITY);
+
+ if (streq(c->parameter, "selinux"))
+ return mac_selinux_use();
+ if (streq(c->parameter, "smack"))
+ return mac_smack_use();
+ if (streq(c->parameter, "apparmor"))
+ return mac_apparmor_use();
+ if (streq(c->parameter, "audit"))
+ return use_audit();
+ if (streq(c->parameter, "ima"))
+ return use_ima();
+ if (streq(c->parameter, "tomoyo"))
+ return mac_tomoyo_use();
+ if (streq(c->parameter, "uefi-secureboot"))
+ return is_efi_secure_boot();
+
+ return false;
+}
+
+static int condition_test_capability(Condition *c) {
+ unsigned long long capabilities = (unsigned long long) -1;
+ _cleanup_fclose_ FILE *f = NULL;
+ int value, r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_CAPABILITY);
+
+ /* If it's an invalid capability, we don't have it */
+ value = capability_from_name(c->parameter);
+ if (value < 0)
+ return -EINVAL;
+
+ /* If it's a valid capability we default to assume
+ * that we have it */
+
+ f = fopen("/proc/self/status", "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ p = startswith(line, "CapBnd:");
+ if (p) {
+ if (sscanf(line+7, "%llx", &capabilities) != 1)
+ return -EIO;
+
+ break;
+ }
+ }
+
+ return !!(capabilities & (1ULL << value));
+}
+
+static int condition_test_needs_update(Condition *c) {
+ const char *p;
+ struct stat usr, other;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_NEEDS_UPDATE);
+
+ /* If the file system is read-only we shouldn't suggest an update */
+ if (path_is_read_only_fs(c->parameter) > 0)
+ return false;
+
+ /* Any other failure means we should allow the condition to be true,
+ * so that we rather invoke too many update tools than too
+ * few. */
+
+ if (!path_is_absolute(c->parameter))
+ return true;
+
+ p = strjoina(c->parameter, "/.updated");
+ if (lstat(p, &other) < 0)
+ return true;
+
+ if (lstat("/usr/", &usr) < 0)
+ return true;
+
+ /*
+ * First, compare seconds as they are always accurate...
+ */
+ if (usr.st_mtim.tv_sec != other.st_mtim.tv_sec)
+ return usr.st_mtim.tv_sec > other.st_mtim.tv_sec;
+
+ /*
+ * ...then compare nanoseconds.
+ *
+ * A false positive is only possible when /usr's nanoseconds > 0
+ * (otherwise /usr cannot be strictly newer than the target file)
+ * AND the target file's nanoseconds == 0
+ * (otherwise the filesystem supports nsec timestamps, see stat(2)).
+ */
+ if (usr.st_mtim.tv_nsec > 0 && other.st_mtim.tv_nsec == 0) {
+ _cleanup_free_ char *timestamp_str = NULL;
+ uint64_t timestamp;
+ int r;
+
+ r = parse_env_file(NULL, p, "TIMESTAMP_NSEC", &timestamp_str);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse timestamp file '%s', using mtime: %m", p);
+ return true;
+ } else if (r == 0) {
+ log_debug("No data in timestamp file '%s', using mtime", p);
+ return true;
+ }
+
+ r = safe_atou64(timestamp_str, &timestamp);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", timestamp_str, p);
+ return true;
+ }
+
+ timespec_store(&other.st_mtim, timestamp);
+ }
+
+ return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec;
+}
+
+static int condition_test_first_boot(Condition *c) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FIRST_BOOT);
+
+ r = parse_boolean(c->parameter);
+ if (r < 0)
+ return r;
+
+ return (access("/run/systemd/first-boot", F_OK) >= 0) == !!r;
+}
+
+static int condition_test_path_exists(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_EXISTS);
+
+ return access(c->parameter, F_OK) >= 0;
+}
+
+static int condition_test_path_exists_glob(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_EXISTS_GLOB);
+
+ return glob_exists(c->parameter) > 0;
+}
+
+static int condition_test_path_is_directory(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_DIRECTORY);
+
+ return is_dir(c->parameter, true) > 0;
+}
+
+static int condition_test_path_is_symbolic_link(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_SYMBOLIC_LINK);
+
+ return is_symlink(c->parameter) > 0;
+}
+
+static int condition_test_path_is_mount_point(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_MOUNT_POINT);
+
+ return path_is_mount_point(c->parameter, NULL, AT_SYMLINK_FOLLOW) > 0;
+}
+
+static int condition_test_path_is_read_write(Condition *c) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_READ_WRITE);
+
+ return path_is_read_only_fs(c->parameter) <= 0;
+}
+
+static int condition_test_directory_not_empty(Condition *c) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_DIRECTORY_NOT_EMPTY);
+
+ r = dir_is_empty(c->parameter);
+ return r <= 0 && r != -ENOENT;
+}
+
+static int condition_test_file_not_empty(Condition *c) {
+ struct stat st;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FILE_NOT_EMPTY);
+
+ return (stat(c->parameter, &st) >= 0 &&
+ S_ISREG(st.st_mode) &&
+ st.st_size > 0);
+}
+
+static int condition_test_file_is_executable(Condition *c) {
+ struct stat st;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FILE_IS_EXECUTABLE);
+
+ return (stat(c->parameter, &st) >= 0 &&
+ S_ISREG(st.st_mode) &&
+ (st.st_mode & 0111));
+}
+
+static int condition_test_null(Condition *c) {
+ assert(c);
+ assert(c->type == CONDITION_NULL);
+
+ /* Note that during parsing we already evaluate the string and
+ * store it in c->negate */
+ return true;
+}
+
+int condition_test(Condition *c) {
+
+ static int (*const condition_tests[_CONDITION_TYPE_MAX])(Condition *c) = {
+ [CONDITION_PATH_EXISTS] = condition_test_path_exists,
+ [CONDITION_PATH_EXISTS_GLOB] = condition_test_path_exists_glob,
+ [CONDITION_PATH_IS_DIRECTORY] = condition_test_path_is_directory,
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = condition_test_path_is_symbolic_link,
+ [CONDITION_PATH_IS_MOUNT_POINT] = condition_test_path_is_mount_point,
+ [CONDITION_PATH_IS_READ_WRITE] = condition_test_path_is_read_write,
+ [CONDITION_DIRECTORY_NOT_EMPTY] = condition_test_directory_not_empty,
+ [CONDITION_FILE_NOT_EMPTY] = condition_test_file_not_empty,
+ [CONDITION_FILE_IS_EXECUTABLE] = condition_test_file_is_executable,
+ [CONDITION_KERNEL_COMMAND_LINE] = condition_test_kernel_command_line,
+ [CONDITION_KERNEL_VERSION] = condition_test_kernel_version,
+ [CONDITION_VIRTUALIZATION] = condition_test_virtualization,
+ [CONDITION_SECURITY] = condition_test_security,
+ [CONDITION_CAPABILITY] = condition_test_capability,
+ [CONDITION_HOST] = condition_test_host,
+ [CONDITION_AC_POWER] = condition_test_ac_power,
+ [CONDITION_ARCHITECTURE] = condition_test_architecture,
+ [CONDITION_NEEDS_UPDATE] = condition_test_needs_update,
+ [CONDITION_FIRST_BOOT] = condition_test_first_boot,
+ [CONDITION_USER] = condition_test_user,
+ [CONDITION_GROUP] = condition_test_group,
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = condition_test_control_group_controller,
+ [CONDITION_NULL] = condition_test_null,
+ };
+
+ int r, b;
+
+ assert(c);
+ assert(c->type >= 0);
+ assert(c->type < _CONDITION_TYPE_MAX);
+
+ r = condition_tests[c->type](c);
+ if (r < 0) {
+ c->result = CONDITION_ERROR;
+ return r;
+ }
+
+ b = (r > 0) == !c->negate;
+ c->result = b ? CONDITION_SUCCEEDED : CONDITION_FAILED;
+ return b;
+}
+
+void condition_dump(Condition *c, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t)) {
+ assert(c);
+ assert(f);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%s\t%s: %s%s%s %s\n",
+ prefix,
+ to_string(c->type),
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->parameter,
+ condition_result_to_string(c->result));
+}
+
+void condition_dump_list(Condition *first, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t)) {
+ Condition *c;
+
+ LIST_FOREACH(conditions, c, first)
+ condition_dump(c, f, prefix, to_string);
+}
+
+static const char* const condition_type_table[_CONDITION_TYPE_MAX] = {
+ [CONDITION_ARCHITECTURE] = "ConditionArchitecture",
+ [CONDITION_VIRTUALIZATION] = "ConditionVirtualization",
+ [CONDITION_HOST] = "ConditionHost",
+ [CONDITION_KERNEL_COMMAND_LINE] = "ConditionKernelCommandLine",
+ [CONDITION_KERNEL_VERSION] = "ConditionKernelVersion",
+ [CONDITION_SECURITY] = "ConditionSecurity",
+ [CONDITION_CAPABILITY] = "ConditionCapability",
+ [CONDITION_AC_POWER] = "ConditionACPower",
+ [CONDITION_NEEDS_UPDATE] = "ConditionNeedsUpdate",
+ [CONDITION_FIRST_BOOT] = "ConditionFirstBoot",
+ [CONDITION_PATH_EXISTS] = "ConditionPathExists",
+ [CONDITION_PATH_EXISTS_GLOB] = "ConditionPathExistsGlob",
+ [CONDITION_PATH_IS_DIRECTORY] = "ConditionPathIsDirectory",
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = "ConditionPathIsSymbolicLink",
+ [CONDITION_PATH_IS_MOUNT_POINT] = "ConditionPathIsMountPoint",
+ [CONDITION_PATH_IS_READ_WRITE] = "ConditionPathIsReadWrite",
+ [CONDITION_DIRECTORY_NOT_EMPTY] = "ConditionDirectoryNotEmpty",
+ [CONDITION_FILE_NOT_EMPTY] = "ConditionFileNotEmpty",
+ [CONDITION_FILE_IS_EXECUTABLE] = "ConditionFileIsExecutable",
+ [CONDITION_USER] = "ConditionUser",
+ [CONDITION_GROUP] = "ConditionGroup",
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = "ConditionControlGroupController",
+ [CONDITION_NULL] = "ConditionNull"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(condition_type, ConditionType);
+
+static const char* const assert_type_table[_CONDITION_TYPE_MAX] = {
+ [CONDITION_ARCHITECTURE] = "AssertArchitecture",
+ [CONDITION_VIRTUALIZATION] = "AssertVirtualization",
+ [CONDITION_HOST] = "AssertHost",
+ [CONDITION_KERNEL_COMMAND_LINE] = "AssertKernelCommandLine",
+ [CONDITION_KERNEL_VERSION] = "AssertKernelVersion",
+ [CONDITION_SECURITY] = "AssertSecurity",
+ [CONDITION_CAPABILITY] = "AssertCapability",
+ [CONDITION_AC_POWER] = "AssertACPower",
+ [CONDITION_NEEDS_UPDATE] = "AssertNeedsUpdate",
+ [CONDITION_FIRST_BOOT] = "AssertFirstBoot",
+ [CONDITION_PATH_EXISTS] = "AssertPathExists",
+ [CONDITION_PATH_EXISTS_GLOB] = "AssertPathExistsGlob",
+ [CONDITION_PATH_IS_DIRECTORY] = "AssertPathIsDirectory",
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = "AssertPathIsSymbolicLink",
+ [CONDITION_PATH_IS_MOUNT_POINT] = "AssertPathIsMountPoint",
+ [CONDITION_PATH_IS_READ_WRITE] = "AssertPathIsReadWrite",
+ [CONDITION_DIRECTORY_NOT_EMPTY] = "AssertDirectoryNotEmpty",
+ [CONDITION_FILE_NOT_EMPTY] = "AssertFileNotEmpty",
+ [CONDITION_FILE_IS_EXECUTABLE] = "AssertFileIsExecutable",
+ [CONDITION_USER] = "AssertUser",
+ [CONDITION_GROUP] = "AssertGroup",
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = "AssertControlGroupController",
+ [CONDITION_NULL] = "AssertNull"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(assert_type, ConditionType);
+
+static const char* const condition_result_table[_CONDITION_RESULT_MAX] = {
+ [CONDITION_UNTESTED] = "untested",
+ [CONDITION_SUCCEEDED] = "succeeded",
+ [CONDITION_FAILED] = "failed",
+ [CONDITION_ERROR] = "error",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(condition_result, ConditionResult);
diff --git a/src/shared/condition.h b/src/shared/condition.h
new file mode 100644
index 0000000..e69fc36
--- /dev/null
+++ b/src/shared/condition.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "list.h"
+#include "macro.h"
+
+typedef enum ConditionType {
+ CONDITION_ARCHITECTURE,
+ CONDITION_VIRTUALIZATION,
+ CONDITION_HOST,
+ CONDITION_KERNEL_COMMAND_LINE,
+ CONDITION_KERNEL_VERSION,
+ CONDITION_SECURITY,
+ CONDITION_CAPABILITY,
+ CONDITION_AC_POWER,
+
+ CONDITION_NEEDS_UPDATE,
+ CONDITION_FIRST_BOOT,
+
+ CONDITION_PATH_EXISTS,
+ CONDITION_PATH_EXISTS_GLOB,
+ CONDITION_PATH_IS_DIRECTORY,
+ CONDITION_PATH_IS_SYMBOLIC_LINK,
+ CONDITION_PATH_IS_MOUNT_POINT,
+ CONDITION_PATH_IS_READ_WRITE,
+ CONDITION_DIRECTORY_NOT_EMPTY,
+ CONDITION_FILE_NOT_EMPTY,
+ CONDITION_FILE_IS_EXECUTABLE,
+
+ CONDITION_NULL,
+
+ CONDITION_USER,
+ CONDITION_GROUP,
+
+ CONDITION_CONTROL_GROUP_CONTROLLER,
+
+ _CONDITION_TYPE_MAX,
+ _CONDITION_TYPE_INVALID = -1
+} ConditionType;
+
+typedef enum ConditionResult {
+ CONDITION_UNTESTED,
+ CONDITION_SUCCEEDED,
+ CONDITION_FAILED,
+ CONDITION_ERROR,
+ _CONDITION_RESULT_MAX,
+ _CONDITION_RESULT_INVALID = -1
+} ConditionResult;
+
+typedef struct Condition {
+ ConditionType type:8;
+
+ bool trigger:1;
+ bool negate:1;
+
+ ConditionResult result:6;
+
+ char *parameter;
+
+ LIST_FIELDS(struct Condition, conditions);
+} Condition;
+
+Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate);
+void condition_free(Condition *c);
+Condition* condition_free_list(Condition *c);
+
+int condition_test(Condition *c);
+
+void condition_dump(Condition *c, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t));
+void condition_dump_list(Condition *c, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t));
+
+const char* condition_type_to_string(ConditionType t) _const_;
+ConditionType condition_type_from_string(const char *s) _pure_;
+
+const char* assert_type_to_string(ConditionType t) _const_;
+ConditionType assert_type_from_string(const char *s) _pure_;
+
+const char* condition_result_to_string(ConditionResult r) _const_;
+ConditionResult condition_result_from_string(const char *s) _pure_;
+
+static inline bool condition_takes_path(ConditionType t) {
+ return IN_SET(t,
+ CONDITION_PATH_EXISTS,
+ CONDITION_PATH_EXISTS_GLOB,
+ CONDITION_PATH_IS_DIRECTORY,
+ CONDITION_PATH_IS_SYMBOLIC_LINK,
+ CONDITION_PATH_IS_MOUNT_POINT,
+ CONDITION_PATH_IS_READ_WRITE,
+ CONDITION_DIRECTORY_NOT_EMPTY,
+ CONDITION_FILE_NOT_EMPTY,
+ CONDITION_FILE_IS_EXECUTABLE,
+ CONDITION_NEEDS_UPDATE);
+}
diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c
new file mode 100644
index 0000000..b80c147
--- /dev/null
+++ b/src/shared/conf-parser.c
@@ -0,0 +1,1113 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "time-util.h"
+#include "utf8.h"
+
+int config_item_table_lookup(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata) {
+
+ const ConfigTableItem *t;
+
+ assert(table);
+ assert(lvalue);
+ assert(func);
+ assert(ltype);
+ assert(data);
+
+ for (t = table; t->lvalue; t++) {
+
+ if (!streq(lvalue, t->lvalue))
+ continue;
+
+ if (!streq_ptr(section, t->section))
+ continue;
+
+ *func = t->parse;
+ *ltype = t->ltype;
+ *data = t->data;
+ return 1;
+ }
+
+ return 0;
+}
+
+int config_item_perf_lookup(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata) {
+
+ ConfigPerfItemLookup lookup = (ConfigPerfItemLookup) table;
+ const ConfigPerfItem *p;
+
+ assert(table);
+ assert(lvalue);
+ assert(func);
+ assert(ltype);
+ assert(data);
+
+ if (section) {
+ const char *key;
+
+ key = strjoina(section, ".", lvalue);
+ p = lookup(key, strlen(key));
+ } else
+ p = lookup(lvalue, strlen(lvalue));
+ if (!p)
+ return 0;
+
+ *func = p->parse;
+ *ltype = p->ltype;
+ *data = (uint8_t*) userdata + p->offset;
+ return 1;
+}
+
+/* Run the user supplied parser for an assignment */
+static int next_assignment(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ ConfigItemLookup lookup,
+ const void *table,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ const char *rvalue,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ ConfigParserCallback func = NULL;
+ int ltype = 0;
+ void *data = NULL;
+ int r;
+
+ assert(filename);
+ assert(line > 0);
+ assert(lookup);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = lookup(table, section, lvalue, &func, &ltype, &data, userdata);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (func)
+ return func(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+
+ return 0;
+ }
+
+ /* Warn about unknown non-extension fields. */
+ if (!(flags & CONFIG_PARSE_RELAXED) && !startswith(lvalue, "X-"))
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown lvalue '%s' in section '%s', ignoring", lvalue, section);
+
+ return 0;
+}
+
+/* Parse a single logical line */
+static int parse_line(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ char **section,
+ unsigned *section_line,
+ bool *section_ignored,
+ char *l,
+ void *userdata) {
+
+ char *e, *include;
+
+ assert(filename);
+ assert(line > 0);
+ assert(lookup);
+ assert(l);
+
+ l = strstrip(l);
+ if (!*l)
+ return 0;
+
+ if (*l == '\n')
+ return 0;
+
+ include = first_word(l, ".include");
+ if (include) {
+ _cleanup_free_ char *fn = NULL;
+
+ /* .includes are a bad idea, we only support them here
+ * for historical reasons. They create cyclic include
+ * problems and make it difficult to detect
+ * configuration file changes with an easy
+ * stat(). Better approaches, such as .d/ drop-in
+ * snippets exist.
+ *
+ * Support for them should be eventually removed. */
+
+ if (!(flags & CONFIG_PARSE_ALLOW_INCLUDE)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, ".include not allowed here. Ignoring.");
+ return 0;
+ }
+
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ ".include directives are deprecated, and support for them will be removed in a future version of systemd. "
+ "Please use drop-in files instead.");
+
+ fn = file_in_same_dir(filename, strstrip(include));
+ if (!fn)
+ return -ENOMEM;
+
+ return config_parse(unit, fn, NULL, sections, lookup, table, flags, userdata);
+ }
+
+ if (!utf8_is_valid(l))
+ return log_syntax_invalid_utf8(unit, LOG_WARNING, filename, line, l);
+
+ if (*l == '[') {
+ size_t k;
+ char *n;
+
+ k = strlen(l);
+ assert(k > 0);
+
+ if (l[k-1] != ']') {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid section header '%s'", l);
+ return -EBADMSG;
+ }
+
+ n = strndup(l+1, k-2);
+ if (!n)
+ return -ENOMEM;
+
+ if (sections && !nulstr_contains(sections, n)) {
+
+ if (!(flags & CONFIG_PARSE_RELAXED) && !startswith(n, "X-"))
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown section '%s'. Ignoring.", n);
+
+ free(n);
+ *section = mfree(*section);
+ *section_line = 0;
+ *section_ignored = true;
+ } else {
+ free_and_replace(*section, n);
+ *section_line = line;
+ *section_ignored = false;
+ }
+
+ return 0;
+ }
+
+ if (sections && !*section) {
+
+ if (!(flags & CONFIG_PARSE_RELAXED) && !*section_ignored)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Assignment outside of section. Ignoring.");
+
+ return 0;
+ }
+
+ e = strchr(l, '=');
+ if (!e) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Missing '='.");
+ return -EINVAL;
+ }
+
+ *e = 0;
+ e++;
+
+ return next_assignment(unit,
+ filename,
+ line,
+ lookup,
+ table,
+ *section,
+ *section_line,
+ strstrip(l),
+ strstrip(e),
+ flags,
+ userdata);
+}
+
+/* Go through the file and parse each line */
+int config_parse(const char *unit,
+ const char *filename,
+ FILE *f,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ _cleanup_free_ char *section = NULL, *continuation = NULL;
+ _cleanup_fclose_ FILE *ours = NULL;
+ unsigned line = 0, section_line = 0;
+ bool section_ignored = false;
+ int r;
+
+ assert(filename);
+ assert(lookup);
+
+ if (!f) {
+ f = ours = fopen(filename, "re");
+ if (!f) {
+ /* Only log on request, except for ENOENT,
+ * since we return 0 to the caller. */
+ if ((flags & CONFIG_PARSE_WARN) || errno == ENOENT)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to open configuration file '%s': %m", filename);
+ return errno == ENOENT ? 0 : -errno;
+ }
+ }
+
+ fd_warn_permissions(filename, fileno(f));
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ bool escaped = false;
+ char *l, *p, *e;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_error_errno(r, "%s:%u: Line too long", filename, line);
+
+ return r;
+ }
+ if (r < 0) {
+ if (CONFIG_PARSE_WARN)
+ log_error_errno(r, "%s:%u: Error while reading configuration file: %m", filename, line);
+
+ return r;
+ }
+
+ if (strchr(COMMENTS, *skip_leading_chars(buf, WHITESPACE)))
+ continue;
+
+ l = buf;
+ if (!(flags & CONFIG_PARSE_REFUSE_BOM)) {
+ char *q;
+
+ q = startswith(buf, UTF8_BYTE_ORDER_MARK);
+ if (q) {
+ l = q;
+ flags |= CONFIG_PARSE_REFUSE_BOM;
+ }
+ }
+
+ if (continuation) {
+ if (strlen(continuation) + strlen(l) > LONG_LINE_MAX) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_error("%s:%u: Continuation line too long", filename, line);
+ return -ENOBUFS;
+ }
+
+ if (!strextend(&continuation, l, NULL)) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_oom();
+ return -ENOMEM;
+ }
+
+ p = continuation;
+ } else
+ p = l;
+
+ for (e = p; *e; e++) {
+ if (escaped)
+ escaped = false;
+ else if (*e == '\\')
+ escaped = true;
+ }
+
+ if (escaped) {
+ *(e-1) = ' ';
+
+ if (!continuation) {
+ continuation = strdup(l);
+ if (!continuation) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_oom();
+ return -ENOMEM;
+ }
+ }
+
+ continue;
+ }
+
+ r = parse_line(unit,
+ filename,
+ ++line,
+ sections,
+ lookup,
+ table,
+ flags,
+ &section,
+ &section_line,
+ &section_ignored,
+ p,
+ userdata);
+ if (r < 0) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line);
+ return r;
+ }
+
+ continuation = mfree(continuation);
+ }
+
+ if (continuation) {
+ r = parse_line(unit,
+ filename,
+ ++line,
+ sections,
+ lookup,
+ table,
+ flags,
+ &section,
+ &section_line,
+ &section_ignored,
+ continuation,
+ userdata);
+ if (r < 0) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int config_parse_many_files(
+ const char *conf_file,
+ char **files,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ char **fn;
+ int r;
+
+ if (conf_file) {
+ r = config_parse(NULL, conf_file, NULL, sections, lookup, table, flags, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(fn, files) {
+ r = config_parse(NULL, *fn, NULL, sections, lookup, table, flags, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+/* Parse each config file in the directories specified as nulstr. */
+int config_parse_many_nulstr(
+ const char *conf_file,
+ const char *conf_file_dirs,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ _cleanup_strv_free_ char **files = NULL;
+ int r;
+
+ r = conf_files_list_nulstr(&files, ".conf", NULL, 0, conf_file_dirs);
+ if (r < 0)
+ return r;
+
+ return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata);
+}
+
+/* Parse each config file in the directories specified as strv. */
+int config_parse_many(
+ const char *conf_file,
+ const char* const* conf_file_dirs,
+ const char *dropin_dirname,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ _cleanup_strv_free_ char **dropin_dirs = NULL;
+ _cleanup_strv_free_ char **files = NULL;
+ const char *suffix;
+ int r;
+
+ suffix = strjoina("/", dropin_dirname);
+ r = strv_extend_strv_concat(&dropin_dirs, (char**) conf_file_dirs, suffix);
+ if (r < 0)
+ return r;
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char* const*) dropin_dirs);
+ if (r < 0)
+ return r;
+
+ return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata);
+}
+
+#define DEFINE_PARSER(type, vartype, conv_func) \
+ DEFINE_CONFIG_PARSE_PTR(config_parse_##type, conv_func, vartype, "Failed to parse " #type " value")
+
+DEFINE_PARSER(int, int, safe_atoi);
+DEFINE_PARSER(long, long, safe_atoli);
+DEFINE_PARSER(uint8, uint8_t, safe_atou8);
+DEFINE_PARSER(uint16, uint16_t, safe_atou16);
+DEFINE_PARSER(uint32, uint32_t, safe_atou32);
+DEFINE_PARSER(uint64, uint64_t, safe_atou64);
+DEFINE_PARSER(unsigned, unsigned, safe_atou);
+DEFINE_PARSER(double, double, safe_atod);
+DEFINE_PARSER(nsec, nsec_t, parse_nsec);
+DEFINE_PARSER(sec, usec_t, parse_sec);
+DEFINE_PARSER(mode, mode_t, parse_mode);
+
+int config_parse_iec_size(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ size_t *sz = data;
+ uint64_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1024, &v);
+ if (r >= 0 && (uint64_t) (size_t) v != v)
+ r = -ERANGE;
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ *sz = (size_t) v;
+ return 0;
+}
+
+int config_parse_si_size(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ size_t *sz = data;
+ uint64_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1000, &v);
+ if (r >= 0 && (uint64_t) (size_t) v != v)
+ r = -ERANGE;
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ *sz = (size_t) v;
+ return 0;
+}
+
+int config_parse_iec_uint64(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *bytes = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1024, bytes);
+ if (r < 0)
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse size value, ignoring: %s", rvalue);
+
+ return 0;
+}
+
+int config_parse_bool(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int k;
+ bool *b = data;
+ bool fatal = ltype;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ k = parse_boolean(rvalue);
+ if (k < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, k,
+ "Failed to parse boolean value%s: %s",
+ fatal ? "" : ", ignoring", rvalue);
+ return fatal ? -ENOEXEC : 0;
+ }
+
+ *b = k;
+ return 0;
+}
+
+int config_parse_tristate(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int k, *t = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* A tristate is pretty much a boolean, except that it can
+ * also take the special value -1, indicating "uninitialized",
+ * much like NULL is for a pointer type. */
+
+ k = parse_boolean(rvalue);
+ if (k < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse boolean value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *t = !!k;
+ return 0;
+}
+
+int config_parse_string(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (free_and_strdup(s, empty_to_null(rvalue)) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_path(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *n = NULL;
+ bool fatal = ltype;
+ char **s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue))
+ goto finalize;
+
+ n = strdup(rvalue);
+ if (!n)
+ return log_oom();
+
+ r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE | (fatal ? PATH_CHECK_FATAL : 0), unit, filename, line, lvalue);
+ if (r < 0)
+ return fatal ? -ENOEXEC : 0;
+
+finalize:
+ return free_and_replace(*s, n);
+}
+
+int config_parse_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***sv = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *sv = strv_free(*sv);
+ return 0;
+ }
+
+ for (;;) {
+ char *word = NULL;
+
+ r = extract_first_word(&rvalue, &word, NULL, EXTRACT_QUOTES|EXTRACT_RETAIN_ESCAPE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ break;
+ }
+
+ r = strv_consume(sv, word);
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+int config_parse_warn_compat(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Disabled reason = ltype;
+
+ switch(reason) {
+
+ case DISABLED_CONFIGURATION:
+ log_syntax(unit, LOG_DEBUG, filename, line, 0,
+ "Support for option %s= has been disabled at compile time and it is ignored", lvalue);
+ break;
+
+ case DISABLED_LEGACY:
+ log_syntax(unit, LOG_INFO, filename, line, 0,
+ "Support for option %s= has been removed and it is ignored", lvalue);
+ break;
+
+ case DISABLED_EXPERIMENTAL:
+ log_syntax(unit, LOG_INFO, filename, line, 0,
+ "Support for option %s= has not yet been enabled and it is ignored", lvalue);
+ break;
+ }
+
+ return 0;
+}
+
+int config_parse_log_facility(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *o = data, x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ x = log_facility_unshifted_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse log facility, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *o = (x << 3) | LOG_PRI(*o);
+
+ return 0;
+}
+
+int config_parse_log_level(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *o = data, x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ x = log_level_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse log level, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (*o < 0) /* if it wasn't initialized so far, assume zero facility */
+ *o = x;
+ else
+ *o = (*o & LOG_FACMASK) | x;
+
+ return 0;
+}
+
+int config_parse_signal(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *sig = data, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(sig);
+
+ r = signal_from_string(rvalue);
+ if (r <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse signal name, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *sig = r;
+ return 0;
+}
+
+int config_parse_personality(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned long *personality = data, p;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(personality);
+
+ if (isempty(rvalue))
+ p = PERSONALITY_INVALID;
+ else {
+ p = personality_from_string(rvalue);
+ if (p == PERSONALITY_INVALID) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse personality, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ *personality = p;
+ return 0;
+}
+
+int config_parse_ifname(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!ifname_valid(rvalue)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Interface name is not valid or too long, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = free_and_strdup(s, rvalue);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_ip_port(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t *s = data;
+ uint16_t port;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *s = 0;
+ return 0;
+ }
+
+ r = parse_ip_port(rvalue, &port);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse port '%s'.", rvalue);
+ return 0;
+ }
+
+ *s = port;
+
+ return 0;
+}
+
+int config_parse_mtu(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint32_t *mtu = data;
+ int r;
+
+ assert(rvalue);
+ assert(mtu);
+
+ r = parse_mtu(ltype, rvalue, mtu);
+ if (r == -ERANGE) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Maximum transfer unit (MTU) value out of range. Permitted range is %" PRIu32 "…%" PRIu32 ", ignoring: %s",
+ (uint32_t) (ltype == AF_INET6 ? IPV6_MIN_MTU : IPV4_MIN_MTU), (uint32_t) UINT32_MAX,
+ rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse MTU value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_rlimit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ struct rlimit **rl = data, d = {};
+ int r;
+
+ assert(rvalue);
+ assert(rl);
+
+ r = rlimit_parse(ltype, rvalue, &d);
+ if (r == -EILSEQ) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Soft resource limit chosen higher than hard limit, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse resource value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (rl[ltype])
+ *rl[ltype] = d;
+ else {
+ rl[ltype] = newdup(struct rlimit, &d, 1);
+ if (!rl[ltype])
+ return log_oom();
+ }
+
+ return 0;
+}
+
+int config_parse_permille(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned *permille = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(permille);
+
+ r = parse_permille(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r,
+ "Failed to parse permille value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *permille = (unsigned) r;
+
+ return 0;
+}
diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h
new file mode 100644
index 0000000..865db42
--- /dev/null
+++ b/src/shared/conf-parser.h
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <syslog.h>
+
+#include "alloc-util.h"
+#include "log.h"
+#include "macro.h"
+
+/* An abstract parser for simple, line based, shallow configuration files consisting of variable assignments only. */
+
+typedef enum ConfigParseFlags {
+ CONFIG_PARSE_RELAXED = 1 << 0,
+ CONFIG_PARSE_ALLOW_INCLUDE = 1 << 1,
+ CONFIG_PARSE_WARN = 1 << 2,
+ CONFIG_PARSE_REFUSE_BOM = 1 << 3,
+} ConfigParseFlags;
+
+/* Argument list for parsers of specific configuration settings. */
+#define CONFIG_PARSER_ARGUMENTS \
+ const char *unit, \
+ const char *filename, \
+ unsigned line, \
+ const char *section, \
+ unsigned section_line, \
+ const char *lvalue, \
+ int ltype, \
+ const char *rvalue, \
+ void *data, \
+ void *userdata
+
+/* Prototype for a parser for a specific configuration setting */
+typedef int (*ConfigParserCallback)(CONFIG_PARSER_ARGUMENTS);
+
+/* A macro declaring the a function prototype, following the typedef above, simply because it's so cumbersomely long
+ * otherwise. (And current emacs gets irritatingly slow when editing files that contain lots of very long function
+ * prototypes on the same screen…) */
+#define CONFIG_PARSER_PROTOTYPE(name) int name(CONFIG_PARSER_ARGUMENTS)
+
+/* Wraps information for parsing a specific configuration variable, to
+ * be stored in a simple array */
+typedef struct ConfigTableItem {
+ const char *section; /* Section */
+ const char *lvalue; /* Name of the variable */
+ ConfigParserCallback parse; /* Function that is called to parse the variable's value */
+ int ltype; /* Distinguish different variables passed to the same callback */
+ void *data; /* Where to store the variable's data */
+} ConfigTableItem;
+
+/* Wraps information for parsing a specific configuration variable, to
+ * be stored in a gperf perfect hashtable */
+typedef struct ConfigPerfItem {
+ const char *section_and_lvalue; /* Section + "." + name of the variable */
+ ConfigParserCallback parse; /* Function that is called to parse the variable's value */
+ int ltype; /* Distinguish different variables passed to the same callback */
+ size_t offset; /* Offset where to store data, from the beginning of userdata */
+} ConfigPerfItem;
+
+/* Prototype for a low-level gperf lookup function */
+typedef const ConfigPerfItem* (*ConfigPerfItemLookup)(const char *section_and_lvalue, unsigned length);
+
+/* Prototype for a generic high-level lookup function */
+typedef int (*ConfigItemLookup)(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata);
+
+/* Linear table search implementation of ConfigItemLookup, based on
+ * ConfigTableItem arrays */
+int config_item_table_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata);
+
+/* gperf implementation of ConfigItemLookup, based on gperf
+ * ConfigPerfItem tables */
+int config_item_perf_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata);
+
+int config_parse(
+ const char *unit,
+ const char *filename,
+ FILE *f,
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata);
+
+int config_parse_many_nulstr(
+ const char *conf_file, /* possibly NULL */
+ const char *conf_file_dirs, /* nulstr */
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata);
+
+int config_parse_many(
+ const char *conf_file, /* possibly NULL */
+ const char* const* conf_file_dirs,
+ const char *dropin_dirname,
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_int);
+CONFIG_PARSER_PROTOTYPE(config_parse_unsigned);
+CONFIG_PARSER_PROTOTYPE(config_parse_long);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint8);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint16);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint32);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint64);
+CONFIG_PARSER_PROTOTYPE(config_parse_double);
+CONFIG_PARSER_PROTOTYPE(config_parse_iec_size);
+CONFIG_PARSER_PROTOTYPE(config_parse_si_size);
+CONFIG_PARSER_PROTOTYPE(config_parse_iec_uint64);
+CONFIG_PARSER_PROTOTYPE(config_parse_bool);
+CONFIG_PARSER_PROTOTYPE(config_parse_tristate);
+CONFIG_PARSER_PROTOTYPE(config_parse_string);
+CONFIG_PARSER_PROTOTYPE(config_parse_path);
+CONFIG_PARSER_PROTOTYPE(config_parse_strv);
+CONFIG_PARSER_PROTOTYPE(config_parse_sec);
+CONFIG_PARSER_PROTOTYPE(config_parse_nsec);
+CONFIG_PARSER_PROTOTYPE(config_parse_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_warn_compat);
+CONFIG_PARSER_PROTOTYPE(config_parse_log_facility);
+CONFIG_PARSER_PROTOTYPE(config_parse_log_level);
+CONFIG_PARSER_PROTOTYPE(config_parse_signal);
+CONFIG_PARSER_PROTOTYPE(config_parse_personality);
+CONFIG_PARSER_PROTOTYPE(config_parse_permille);
+CONFIG_PARSER_PROTOTYPE(config_parse_ifname);
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_port);
+CONFIG_PARSER_PROTOTYPE(config_parse_mtu);
+CONFIG_PARSER_PROTOTYPE(config_parse_rlimit);
+
+typedef enum Disabled {
+ DISABLED_CONFIGURATION,
+ DISABLED_LEGACY,
+ DISABLED_EXPERIMENTAL,
+} Disabled;
+
+#define DEFINE_CONFIG_PARSE(function, parser, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ int *i = data, r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ r = parser(rvalue); \
+ if (r < 0) { \
+ log_syntax(unit, LOG_ERR, filename, line, r, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = r; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_PTR(function, parser, type, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data; \
+ int r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ r = parser(rvalue, i); \
+ if (r < 0) \
+ log_syntax(unit, LOG_ERR, filename, line, r, \
+ msg ", ignoring: %s", rvalue); \
+ \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUM(function, name, type, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data, x; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ x = name##_from_string(rvalue); \
+ if (x < 0) { \
+ log_syntax(unit, LOG_ERR, filename, line, 0, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = x; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(function, name, type, default_value, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data, x; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ if (isempty(rvalue)) { \
+ *i = default_value; \
+ return 0; \
+ } \
+ \
+ x = name##_from_string(rvalue); \
+ if (x < 0) { \
+ log_syntax(unit, LOG_ERR, filename, line, 0, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = x; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUMV(function, name, type, invalid, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type **enums = data, x, *ys; \
+ _cleanup_free_ type *xs = NULL; \
+ const char *word, *state; \
+ size_t l, i = 0; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ xs = new0(type, 1); \
+ if (!xs) \
+ return -ENOMEM; \
+ \
+ *xs = invalid; \
+ \
+ FOREACH_WORD(word, l, rvalue, state) { \
+ _cleanup_free_ char *en = NULL; \
+ type *new_xs; \
+ \
+ en = strndup(word, l); \
+ if (!en) \
+ return -ENOMEM; \
+ \
+ if ((x = name##_from_string(en)) < 0) { \
+ log_syntax(unit, LOG_ERR, filename, line, 0, \
+ msg ", ignoring: %s", en); \
+ continue; \
+ } \
+ \
+ for (ys = xs; x != invalid && *ys != invalid; ys++) { \
+ if (*ys == x) { \
+ log_syntax(unit, LOG_NOTICE, filename, \
+ line, 0, \
+ "Duplicate entry, ignoring: %s", \
+ en); \
+ x = invalid; \
+ } \
+ } \
+ \
+ if (x == invalid) \
+ continue; \
+ \
+ *(xs + i) = x; \
+ new_xs = realloc(xs, (++i + 1) * sizeof(type)); \
+ if (new_xs) \
+ xs = new_xs; \
+ else \
+ return -ENOMEM; \
+ \
+ *(xs + i) = invalid; \
+ } \
+ \
+ free_and_replace(*enums, xs); \
+ return 0; \
+ }
diff --git a/src/shared/cpu-set-util.c b/src/shared/cpu-set-util.c
new file mode 100644
index 0000000..9a789ae
--- /dev/null
+++ b/src/shared/cpu-set-util.c
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <syslog.h>
+
+#include "alloc-util.h"
+#include "cpu-set-util.h"
+#include "extract-word.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+cpu_set_t* cpu_set_malloc(unsigned *ncpus) {
+ cpu_set_t *c;
+ unsigned n = 1024;
+
+ /* Allocates the cpuset in the right size */
+
+ for (;;) {
+ c = CPU_ALLOC(n);
+ if (!c)
+ return NULL;
+
+ if (sched_getaffinity(0, CPU_ALLOC_SIZE(n), c) >= 0) {
+ CPU_ZERO_S(CPU_ALLOC_SIZE(n), c);
+
+ if (ncpus)
+ *ncpus = n;
+
+ return c;
+ }
+
+ CPU_FREE(c);
+
+ if (errno != EINVAL)
+ return NULL;
+
+ n *= 2;
+ }
+}
+
+int parse_cpu_set_internal(
+ const char *rvalue,
+ cpu_set_t **cpu_set,
+ bool warn,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue) {
+
+ _cleanup_cpu_free_ cpu_set_t *c = NULL;
+ const char *p = rvalue;
+ unsigned ncpus = 0;
+
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ unsigned cpu, cpu_lower, cpu_upper;
+ int r;
+
+ r = extract_first_word(&p, &word, WHITESPACE ",", EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return warn ? log_oom() : -ENOMEM;
+ if (r < 0)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, rvalue) : r;
+ if (r == 0)
+ break;
+
+ if (!c) {
+ c = cpu_set_malloc(&ncpus);
+ if (!c)
+ return warn ? log_oom() : -ENOMEM;
+ }
+
+ r = parse_range(word, &cpu_lower, &cpu_upper);
+ if (r < 0)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CPU affinity '%s'", word) : r;
+ if (cpu_lower >= ncpus || cpu_upper >= ncpus)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, EINVAL, "CPU out of range '%s' ncpus is %u", word, ncpus) : -EINVAL;
+
+ if (cpu_lower > cpu_upper) {
+ if (warn)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Range '%s' is invalid, %u > %u, ignoring", word, cpu_lower, cpu_upper);
+ continue;
+ }
+
+ for (cpu = cpu_lower; cpu <= cpu_upper; cpu++)
+ CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
+ }
+
+ /* On success, sets *cpu_set and returns ncpus for the system. */
+ if (c)
+ *cpu_set = TAKE_PTR(c);
+
+ return (int) ncpus;
+}
diff --git a/src/shared/cpu-set-util.h b/src/shared/cpu-set-util.h
new file mode 100644
index 0000000..1b6bd35
--- /dev/null
+++ b/src/shared/cpu-set-util.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sched.h>
+
+#include "macro.h"
+
+#ifdef __NCPUBITS
+#define CPU_SIZE_TO_NUM(n) ((n) * __NCPUBITS)
+#else
+#define CPU_SIZE_TO_NUM(n) ((n) * sizeof(cpu_set_t) * 8)
+#endif
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(cpu_set_t*, CPU_FREE);
+#define _cleanup_cpu_free_ _cleanup_(CPU_FREEp)
+
+static inline cpu_set_t* cpu_set_mfree(cpu_set_t *p) {
+ if (p)
+ CPU_FREE(p);
+ return NULL;
+}
+
+cpu_set_t* cpu_set_malloc(unsigned *ncpus);
+
+int parse_cpu_set_internal(const char *rvalue, cpu_set_t **cpu_set, bool warn, const char *unit, const char *filename, unsigned line, const char *lvalue);
+
+static inline int parse_cpu_set_and_warn(const char *rvalue, cpu_set_t **cpu_set, const char *unit, const char *filename, unsigned line, const char *lvalue) {
+ assert(lvalue);
+
+ return parse_cpu_set_internal(rvalue, cpu_set, true, unit, filename, line, lvalue);
+}
+
+static inline int parse_cpu_set(const char *rvalue, cpu_set_t **cpu_set){
+ return parse_cpu_set_internal(rvalue, cpu_set, false, NULL, NULL, 0, NULL);
+}
diff --git a/src/shared/crypt-util.c b/src/shared/crypt-util.c
new file mode 100644
index 0000000..20bdc54
--- /dev/null
+++ b/src/shared/crypt-util.c
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_LIBCRYPTSETUP
+#include "crypt-util.h"
+#include "log.h"
+
+void cryptsetup_log_glue(int level, const char *msg, void *usrptr) {
+ switch (level) {
+ case CRYPT_LOG_NORMAL:
+ level = LOG_NOTICE;
+ break;
+ case CRYPT_LOG_ERROR:
+ level = LOG_ERR;
+ break;
+ case CRYPT_LOG_VERBOSE:
+ level = LOG_INFO;
+ break;
+ case CRYPT_LOG_DEBUG:
+ level = LOG_DEBUG;
+ break;
+ default:
+ log_error("Unknown libcryptsetup log level: %d", level);
+ level = LOG_ERR;
+ }
+
+ log_full(level, "%s", msg);
+}
+#endif
diff --git a/src/shared/crypt-util.h b/src/shared/crypt-util.h
new file mode 100644
index 0000000..8c86714
--- /dev/null
+++ b/src/shared/crypt-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if HAVE_LIBCRYPTSETUP
+#include <libcryptsetup.h>
+
+#include "macro.h"
+
+/* libcryptsetup define for any LUKS version, compatible with libcryptsetup 1.x */
+#ifndef CRYPT_LUKS
+#define CRYPT_LUKS NULL
+#endif
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct crypt_device *, crypt_free);
+
+void cryptsetup_log_glue(int level, const char *msg, void *usrptr);
+#endif
diff --git a/src/shared/daemon-util.h b/src/shared/daemon-util.h
new file mode 100644
index 0000000..5e9eca1
--- /dev/null
+++ b/src/shared/daemon-util.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-daemon.h"
+
+#define NOTIFY_READY "READY=1\n" "STATUS=Processing requests..."
+#define NOTIFY_STOPPING "STOPPING=1\n" "STATUS=Shutting down..."
+
+static inline const char *notify_start(const char *start, const char *stop) {
+ if (start)
+ (void) sd_notify(false, start);
+
+ return stop;
+}
+
+/* This is intended to be used with _cleanup_ attribute. */
+static inline void notify_on_cleanup(const char **p) {
+ if (p)
+ (void) sd_notify(false, *p);
+}
diff --git a/src/shared/dev-setup.c b/src/shared/dev-setup.c
new file mode 100644
index 0000000..b545c2a
--- /dev/null
+++ b/src/shared/dev-setup.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dev-setup.h"
+#include "label.h"
+#include "log.h"
+#include "path-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int dev_setup(const char *prefix, uid_t uid, gid_t gid) {
+ static const char symlinks[] =
+ "-/proc/kcore\0" "/dev/core\0"
+ "/proc/self/fd\0" "/dev/fd\0"
+ "/proc/self/fd/0\0" "/dev/stdin\0"
+ "/proc/self/fd/1\0" "/dev/stdout\0"
+ "/proc/self/fd/2\0" "/dev/stderr\0";
+
+ const char *j, *k;
+ int r;
+
+ NULSTR_FOREACH_PAIR(j, k, symlinks) {
+ _cleanup_free_ char *link_name = NULL;
+ const char *n;
+
+ if (j[0] == '-') {
+ j++;
+
+ if (access(j, F_OK) < 0)
+ continue;
+ }
+
+ if (prefix) {
+ link_name = prefix_root(prefix, k);
+ if (!link_name)
+ return -ENOMEM;
+
+ n = link_name;
+ } else
+ n = k;
+
+ r = symlink_label(j, n);
+ if (r < 0)
+ log_debug_errno(r, "Failed to symlink %s to %s: %m", j, n);
+
+ if (uid != UID_INVALID || gid != GID_INVALID)
+ if (lchown(n, uid, gid) < 0)
+ log_debug_errno(errno, "Failed to chown %s: %m", n);
+ }
+
+ return 0;
+}
+
+int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid) {
+ static const struct {
+ const char *name;
+ mode_t mode;
+ } table[] = {
+ { "/run/systemd", S_IFDIR | 0755 },
+ { "/run/systemd/inaccessible", S_IFDIR | 0000 },
+ { "/run/systemd/inaccessible/reg", S_IFREG | 0000 },
+ { "/run/systemd/inaccessible/dir", S_IFDIR | 0000 },
+ { "/run/systemd/inaccessible/fifo", S_IFIFO | 0000 },
+ { "/run/systemd/inaccessible/sock", S_IFSOCK | 0000 },
+
+ /* The following two are likely to fail if we lack the privs for it (for example in an userns
+ * environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0
+ * device nodes to be created). But that's entirely fine. Consumers of these files should carry
+ * fallback to use a different node then, for example /run/systemd/inaccessible/sock, which is close
+ * enough in behaviour and semantics for most uses. */
+ { "/run/systemd/inaccessible/chr", S_IFCHR | 0000 },
+ { "/run/systemd/inaccessible/blk", S_IFBLK | 0000 },
+ };
+
+ _cleanup_umask_ mode_t u;
+ size_t i;
+ int r;
+
+ u = umask(0000);
+
+ /* Set up inaccessible (and empty) file nodes of all types. This are used to as mount sources for over-mounting
+ * ("masking") file nodes that shall become inaccessible and empty for specific containers or services. We try
+ * to lock down these nodes as much as we can, but otherwise try to match them as closely as possible with the
+ * underlying file, i.e. in the best case we offer the same node type as the underlying node. */
+
+ for (i = 0; i < ELEMENTSOF(table); i++) {
+ _cleanup_free_ char *path = NULL;
+
+ path = prefix_root(root, table[i].name);
+ if (!path)
+ return log_oom();
+
+ if (S_ISDIR(table[i].mode))
+ r = mkdir(path, table[i].mode & 07777);
+ else
+ r = mknod(path, table[i].mode, makedev(0, 0));
+ if (r < 0) {
+ if (errno != EEXIST)
+ log_debug_errno(errno, "Failed to create '%s', ignoring: %m", path);
+ continue;
+ }
+
+ if (uid != UID_INVALID || gid != GID_INVALID) {
+ if (lchown(path, uid, gid) < 0)
+ log_debug_errno(errno, "Failed to chown '%s': %m", path);
+ }
+ }
+
+ return 0;
+}
diff --git a/src/shared/dev-setup.h b/src/shared/dev-setup.h
new file mode 100644
index 0000000..72b90ec
--- /dev/null
+++ b/src/shared/dev-setup.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <sys/types.h>
+
+int dev_setup(const char *prefix, uid_t uid, gid_t gid);
+
+int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid);
diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c
new file mode 100644
index 0000000..d340487
--- /dev/null
+++ b/src/shared/dissect-image.c
@@ -0,0 +1,1507 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+
+#include "architecture.h"
+#include "ask-password-api.h"
+#include "blkid-util.h"
+#include "blockdev-util.h"
+#include "copy.h"
+#include "crypt-util.h"
+#include "def.h"
+#include "device-nodes.h"
+#include "device-util.h"
+#include "dissect-image.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "gpt.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "linux-3.13/dm-ioctl.h"
+#include "missing.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "os-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "udev-util.h"
+#include "user-util.h"
+#include "xattr-util.h"
+
+int probe_filesystem(const char *node, char **ret_fstype) {
+ /* Try to find device content type and return it in *ret_fstype. If nothing is found,
+ * 0/NULL will be returned. -EUCLEAN will be returned for ambigous results, and an
+ * different error otherwise. */
+
+#if HAVE_BLKID
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ const char *fstype;
+ int r;
+
+ errno = 0;
+ b = blkid_new_probe_from_filename(node);
+ if (!b)
+ return -errno ?: -ENOMEM;
+
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == 1) {
+ log_debug("No type detected on partition %s", node);
+ goto not_found;
+ }
+ if (r == -2) {
+ log_debug("Results ambiguous for partition %s", node);
+ return -EUCLEAN;
+ }
+ if (r != 0)
+ return -errno ?: -EIO;
+
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+
+ if (fstype) {
+ char *t;
+
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+
+ *ret_fstype = t;
+ return 1;
+ }
+
+not_found:
+ *ret_fstype = NULL;
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+#if HAVE_BLKID
+/* Detect RPMB and Boot partitions, which are not listed by blkid.
+ * See https://github.com/systemd/systemd/issues/5806. */
+static bool device_is_mmc_special_partition(sd_device *d) {
+ const char *sysname;
+
+ assert(d);
+
+ if (sd_device_get_sysname(d, &sysname) < 0)
+ return false;
+
+ return startswith(sysname, "mmcblk") &&
+ (endswith(sysname, "rpmb") || endswith(sysname, "boot0") || endswith(sysname, "boot1"));
+}
+
+static bool device_is_block(sd_device *d) {
+ const char *ss;
+
+ assert(d);
+
+ if (sd_device_get_subsystem(d, &ss) < 0)
+ return false;
+
+ return streq(ss, "block");
+}
+
+static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ int r;
+
+ assert(d);
+ assert(ret);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_parent(e, d);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(e);
+ return 0;
+}
+
+/* how many times to wait for the device nodes to appear */
+#define N_DEVICE_NODE_LIST_ATTEMPTS 10
+
+static int wait_for_partitions_to_appear(
+ int fd,
+ sd_device *d,
+ unsigned num_partitions,
+ DissectImageFlags flags,
+ sd_device_enumerator **ret_enumerator) {
+
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *q;
+ unsigned n;
+ int r;
+
+ assert(fd >= 0);
+ assert(d);
+ assert(ret_enumerator);
+
+ r = enumerator_for_parent(d, &e);
+ if (r < 0)
+ return r;
+
+ /* Count the partitions enumerated by the kernel */
+ n = 0;
+ FOREACH_DEVICE(e, q) {
+ if (sd_device_get_devnum(q, NULL) < 0)
+ continue;
+ if (!device_is_block(q))
+ continue;
+ if (device_is_mmc_special_partition(q))
+ continue;
+
+ if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
+ r = device_wait_for_initialization(q, "block", NULL);
+ if (r < 0)
+ return r;
+ }
+
+ n++;
+ }
+
+ if (n == num_partitions + 1) {
+ *ret_enumerator = TAKE_PTR(e);
+ return 0; /* success! */
+ }
+ if (n > num_partitions + 1)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "blkid and kernel partition lists do not match.");
+
+ /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running or it
+ * got EBUSY because udev already opened the device. Let's reprobe the device, which is a synchronous
+ * call that waits until probing is complete. */
+
+ for (unsigned j = 0; ; j++) {
+ if (j++ > 20)
+ return -EBUSY;
+
+ if (ioctl(fd, BLKRRPART, 0) >= 0)
+ break;
+ r = -errno;
+ if (r == -EINVAL) {
+ struct loop_info64 info;
+
+ /* If we are running on a loop device that has partition scanning off, return
+ * an explicit recognizable error about this, so that callers can generate a
+ * proper message explaining the situation. */
+
+ if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) {
+ log_debug("Device is a loop device and partition scanning is off!");
+ return -EPROTONOSUPPORT;
+ }
+ }
+ if (r != -EBUSY)
+ return r;
+
+ /* If something else has the device open, such as an udev rule, the ioctl will return
+ * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a bit,
+ * and try again.
+ *
+ * This is really something they should fix in the kernel! */
+ (void) usleep(50 * USEC_PER_MSEC);
+
+ }
+
+ return -EAGAIN; /* no success yet, try again */
+}
+
+static int loop_wait_for_partitions_to_appear(
+ int fd,
+ sd_device *d,
+ unsigned num_partitions,
+ DissectImageFlags flags,
+ sd_device_enumerator **ret_enumerator) {
+ _cleanup_(sd_device_unrefp) sd_device *device = NULL;
+ int r;
+
+ assert(fd >= 0);
+ assert(d);
+ assert(ret_enumerator);
+
+ log_debug("Waiting for device (parent + %d partitions) to appear...", num_partitions);
+
+ if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
+ r = device_wait_for_initialization(d, "block", &device);
+ if (r < 0)
+ return r;
+ } else
+ device = sd_device_ref(d);
+
+ for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) {
+ r = wait_for_partitions_to_appear(fd, device, num_partitions, flags, ret_enumerator);
+ if (r != -EAGAIN)
+ return r;
+ }
+
+ return log_debug_errno(SYNTHETIC_ERRNO(ENXIO),
+ "Kernel partitions dit not appear within %d attempts",
+ N_DEVICE_NODE_LIST_ATTEMPTS);
+}
+
+#endif
+
+int dissect_image(
+ int fd,
+ const void *root_hash,
+ size_t root_hash_size,
+ DissectImageFlags flags,
+ DissectedImage **ret) {
+
+#if HAVE_BLKID
+ sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL;
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ _cleanup_free_ char *generic_node = NULL;
+ sd_id128_t generic_uuid = SD_ID128_NULL;
+ const char *pttype = NULL;
+ blkid_partlist pl;
+ int r, generic_nr;
+ struct stat st;
+ sd_device *q;
+ unsigned i;
+
+ assert(fd >= 0);
+ assert(ret);
+ assert(root_hash || root_hash_size == 0);
+
+ /* Probes a disk image, and returns information about what it found in *ret.
+ *
+ * Returns -ENOPKG if no suitable partition table or file system could be found.
+ * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
+
+ if (root_hash) {
+ /* If a root hash is supplied, then we use the root partition that has a UUID that match the first
+ * 128bit of the root hash. And we use the verity partition that has a UUID that match the final
+ * 128bit. */
+
+ if (root_hash_size < sizeof(sd_id128_t))
+ return -EINVAL;
+
+ memcpy(&root_uuid, root_hash, sizeof(sd_id128_t));
+ memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
+
+ if (sd_id128_is_null(root_uuid))
+ return -EINVAL;
+ if (sd_id128_is_null(verity_uuid))
+ return -EINVAL;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ b = blkid_new_probe();
+ if (!b)
+ return -ENOMEM;
+
+ errno = 0;
+ r = blkid_probe_set_device(b, fd, 0, 0);
+ if (r != 0)
+ return -errno ?: -ENOMEM;
+
+ if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
+ /* Look for file system superblocks, unless we only shall look for GPT partition tables */
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
+ }
+
+ blkid_probe_enable_partitions(b, 1);
+ blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (IN_SET(r, -2, 1)) {
+ log_debug("Failed to identify any partition table.");
+ return -ENOPKG;
+ }
+ if (r != 0)
+ return -errno ?: -EIO;
+
+ m = new0(DissectedImage, 1);
+ if (!m)
+ return -ENOMEM;
+
+ r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+ if (r < 0)
+ return r;
+
+ if (!(flags & DISSECT_IMAGE_GPT_ONLY) &&
+ (flags & DISSECT_IMAGE_REQUIRE_ROOT)) {
+ const char *usage = NULL;
+
+ (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
+ if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
+ _cleanup_free_ char *t = NULL, *n = NULL;
+ const char *fstype = NULL;
+
+ /* OK, we have found a file system, that's our root partition then. */
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+
+ if (fstype) {
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+ }
+
+ r = device_path_make_major_minor(st.st_mode, st.st_rdev, &n);
+ if (r < 0)
+ return r;
+
+ m->partitions[PARTITION_ROOT] = (DissectedPartition) {
+ .found = true,
+ .rw = true,
+ .partno = -1,
+ .architecture = _ARCHITECTURE_INVALID,
+ .fstype = TAKE_PTR(t),
+ .node = TAKE_PTR(n),
+ };
+
+ m->encrypted = streq_ptr(fstype, "crypto_LUKS");
+
+ r = loop_wait_for_partitions_to_appear(fd, d, 0, flags, &e);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+ }
+ }
+
+ (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
+ if (!pttype)
+ return -ENOPKG;
+
+ is_gpt = streq_ptr(pttype, "gpt");
+ is_mbr = streq_ptr(pttype, "dos");
+
+ if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
+ return -ENOPKG;
+
+ errno = 0;
+ pl = blkid_probe_get_partitions(b);
+ if (!pl)
+ return -errno ?: -ENOMEM;
+
+ r = loop_wait_for_partitions_to_appear(fd, d, blkid_partlist_numof_partitions(pl), flags, &e);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, q) {
+ unsigned long long pflags;
+ blkid_partition pp;
+ const char *node;
+ dev_t qn;
+ int nr;
+
+ r = sd_device_get_devnum(q, &qn);
+ if (r < 0)
+ continue;
+
+ if (st.st_rdev == qn)
+ continue;
+
+ if (!device_is_block(q))
+ continue;
+
+ if (device_is_mmc_special_partition(q))
+ continue;
+
+ r = sd_device_get_devname(q, &node);
+ if (r < 0)
+ continue;
+
+ pp = blkid_partlist_devno_to_partition(pl, qn);
+ if (!pp)
+ continue;
+
+ pflags = blkid_partition_get_flags(pp);
+
+ nr = blkid_partition_get_partno(pp);
+ if (nr < 0)
+ continue;
+
+ if (is_gpt) {
+ int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID;
+ const char *stype, *sid, *fstype = NULL;
+ sd_id128_t type_id, id;
+ bool rw = true;
+
+ sid = blkid_partition_get_uuid(pp);
+ if (!sid)
+ continue;
+ if (sd_id128_from_string(sid, &id) < 0)
+ continue;
+
+ stype = blkid_partition_get_type_string(pp);
+ if (!stype)
+ continue;
+ if (sd_id128_from_string(stype, &type_id) < 0)
+ continue;
+
+ if (sd_id128_equal(type_id, GPT_HOME)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_HOME;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ } else if (sd_id128_equal(type_id, GPT_SRV)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_SRV;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ } else if (sd_id128_equal(type_id, GPT_ESP)) {
+
+ /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined
+ * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the
+ * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */
+
+ if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
+ continue;
+
+ designator = PARTITION_ESP;
+ fstype = "vfat";
+ }
+#ifdef GPT_ROOT_NATIVE
+ else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a root ID is specified, ignore everything but the root id */
+ if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT;
+ architecture = native_architecture();
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless a root hash is specified */
+ if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = native_architecture();
+ rw = false;
+ }
+#endif
+#ifdef GPT_ROOT_SECONDARY
+ else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a root ID is specified, ignore everything but the root id */
+ if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_SECONDARY;
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless root has is specified */
+ if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_SECONDARY_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = false;
+ }
+#endif
+ else if (sd_id128_equal(type_id, GPT_SWAP)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_SWAP;
+ fstype = "swap";
+ } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ if (generic_node)
+ multiple_generic = true;
+ else {
+ generic_nr = nr;
+ generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
+ generic_uuid = id;
+ generic_node = strdup(node);
+ if (!generic_node)
+ return -ENOMEM;
+ }
+ }
+
+ if (designator != _PARTITION_DESIGNATOR_INVALID) {
+ _cleanup_free_ char *t = NULL, *n = NULL;
+
+ /* First one wins */
+ if (m->partitions[designator].found)
+ continue;
+
+ if (fstype) {
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+ }
+
+ n = strdup(node);
+ if (!n)
+ return -ENOMEM;
+
+ m->partitions[designator] = (DissectedPartition) {
+ .found = true,
+ .partno = nr,
+ .rw = rw,
+ .architecture = architecture,
+ .node = TAKE_PTR(n),
+ .fstype = TAKE_PTR(t),
+ .uuid = id,
+ };
+ }
+
+ } else if (is_mbr) {
+
+ if (pflags != 0x80) /* Bootable flag */
+ continue;
+
+ if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */
+ continue;
+
+ if (generic_node)
+ multiple_generic = true;
+ else {
+ generic_nr = nr;
+ generic_rw = true;
+ generic_node = strdup(node);
+ if (!generic_node)
+ return -ENOMEM;
+ }
+ }
+ }
+
+ if (!m->partitions[PARTITION_ROOT].found) {
+ /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
+ * either, then check if there's a single generic one, and use that. */
+
+ if (m->partitions[PARTITION_ROOT_VERITY].found)
+ return -EADDRNOTAVAIL;
+
+ if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
+ m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
+ zero(m->partitions[PARTITION_ROOT_SECONDARY]);
+
+ m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
+ zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
+
+ } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
+
+ /* If the root has was set, then we won't fallback to a generic node, because the root hash
+ * decides */
+ if (root_hash)
+ return -EADDRNOTAVAIL;
+
+ /* If we didn't find a generic node, then we can't fix this up either */
+ if (!generic_node)
+ return -ENXIO;
+
+ /* If we didn't find a properly marked root partition, but we did find a single suitable
+ * generic Linux partition, then use this as root partition, if the caller asked for it. */
+ if (multiple_generic)
+ return -ENOTUNIQ;
+
+ m->partitions[PARTITION_ROOT] = (DissectedPartition) {
+ .found = true,
+ .rw = generic_rw,
+ .partno = generic_nr,
+ .architecture = _ARCHITECTURE_INVALID,
+ .node = TAKE_PTR(generic_node),
+ .uuid = generic_uuid,
+ };
+ }
+ }
+
+ if (root_hash) {
+ if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
+ return -EADDRNOTAVAIL;
+
+ /* If we found the primary root with the hash, then we definitely want to suppress any secondary root
+ * (which would be weird, after all the root hash should only be assigned to one pair of
+ * partitions... */
+ m->partitions[PARTITION_ROOT_SECONDARY].found = false;
+ m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
+
+ /* If we found a verity setup, then the root partition is necessarily read-only. */
+ m->partitions[PARTITION_ROOT].rw = false;
+
+ m->verity = true;
+ }
+
+ blkid_free_probe(b);
+ b = NULL;
+
+ /* Fill in file system types if we don't know them yet. */
+ for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ DissectedPartition *p = m->partitions + i;
+
+ if (!p->found)
+ continue;
+
+ if (!p->fstype && p->node) {
+ r = probe_filesystem(p->node, &p->fstype);
+ if (r < 0 && r != -EUCLEAN)
+ return r;
+ }
+
+ if (streq_ptr(p->fstype, "crypto_LUKS"))
+ m->encrypted = true;
+
+ if (p->fstype && fstype_is_ro(p->fstype))
+ p->rw = false;
+ }
+
+ *ret = TAKE_PTR(m);
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+DissectedImage* dissected_image_unref(DissectedImage *m) {
+ unsigned i;
+
+ if (!m)
+ return NULL;
+
+ for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ free(m->partitions[i].fstype);
+ free(m->partitions[i].node);
+ free(m->partitions[i].decrypted_fstype);
+ free(m->partitions[i].decrypted_node);
+ }
+
+ free(m->hostname);
+ strv_free(m->machine_info);
+ strv_free(m->os_release);
+
+ return mfree(m);
+}
+
+static int is_loop_device(const char *path) {
+ char s[SYS_BLOCK_PATH_MAX("/../loop/")];
+ struct stat st;
+
+ assert(path);
+
+ if (stat(path, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ xsprintf_sys_block_path(s, "/loop/", st.st_dev);
+ if (access(s, F_OK) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
+ xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
+ if (access(s, F_OK) < 0)
+ return errno == ENOENT ? false : -errno;
+ }
+
+ return true;
+}
+
+static int mount_partition(
+ DissectedPartition *m,
+ const char *where,
+ const char *directory,
+ uid_t uid_shift,
+ DissectImageFlags flags) {
+
+ _cleanup_free_ char *chased = NULL, *options = NULL;
+ const char *p, *node, *fstype;
+ bool rw;
+ int r;
+
+ assert(m);
+ assert(where);
+
+ node = m->decrypted_node ?: m->node;
+ fstype = m->decrypted_fstype ?: m->fstype;
+
+ if (!m->found || !node || !fstype)
+ return 0;
+
+ /* Stacked encryption? Yuck */
+ if (streq_ptr(fstype, "crypto_LUKS"))
+ return -ELOOP;
+
+ rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
+
+ if (directory) {
+ r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased);
+ if (r < 0)
+ return r;
+
+ p = chased;
+ } else
+ p = where;
+
+ /* If requested, turn on discard support. */
+ if (fstype_can_discard(fstype) &&
+ ((flags & DISSECT_IMAGE_DISCARD) ||
+ ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) {
+ options = strdup("discard");
+ if (!options)
+ return -ENOMEM;
+ }
+
+ if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
+ _cleanup_free_ char *uid_option = NULL;
+
+ if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
+ return -ENOMEM;
+
+ if (!strextend_with_separator(&options, ",", uid_option, NULL))
+ return -ENOMEM;
+ }
+
+ return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
+}
+
+int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
+ int r;
+
+ assert(m);
+ assert(where);
+
+ if (!m->partitions[PARTITION_ROOT].found)
+ return -ENXIO;
+
+ if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
+ r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ if (flags & DISSECT_IMAGE_VALIDATE_OS) {
+ r = path_is_os_tree(where);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+ }
+ }
+
+ if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
+ return 0;
+
+ r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ if (m->partitions[PARTITION_ESP].found) {
+ const char *mp;
+
+ /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */
+
+ FOREACH_STRING(mp, "/efi", "/boot") {
+ _cleanup_free_ char *p = NULL;
+
+ r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p);
+ if (r < 0)
+ continue;
+
+ r = dir_is_empty(p);
+ if (r > 0) {
+ r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+#if HAVE_LIBCRYPTSETUP
+typedef struct DecryptedPartition {
+ struct crypt_device *device;
+ char *name;
+ bool relinquished;
+} DecryptedPartition;
+
+struct DecryptedImage {
+ DecryptedPartition *decrypted;
+ size_t n_decrypted;
+ size_t n_allocated;
+};
+#endif
+
+DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
+#if HAVE_LIBCRYPTSETUP
+ size_t i;
+ int r;
+
+ if (!d)
+ return NULL;
+
+ for (i = 0; i < d->n_decrypted; i++) {
+ DecryptedPartition *p = d->decrypted + i;
+
+ if (p->device && p->name && !p->relinquished) {
+ r = crypt_deactivate(p->device, p->name);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
+ }
+
+ if (p->device)
+ crypt_free(p->device);
+ free(p->name);
+ }
+
+ free(d);
+#endif
+ return NULL;
+}
+
+#if HAVE_LIBCRYPTSETUP
+
+static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
+ _cleanup_free_ char *name = NULL, *node = NULL;
+ const char *base;
+
+ assert(original_node);
+ assert(suffix);
+ assert(ret_name);
+ assert(ret_node);
+
+ base = strrchr(original_node, '/');
+ if (!base)
+ return -EINVAL;
+ base++;
+ if (isempty(base))
+ return -EINVAL;
+
+ name = strjoin(base, suffix);
+ if (!name)
+ return -ENOMEM;
+ if (!filename_is_valid(name))
+ return -EINVAL;
+
+ node = strjoin(crypt_get_dir(), "/", name);
+ if (!node)
+ return -ENOMEM;
+
+ *ret_name = TAKE_PTR(name);
+ *ret_node = TAKE_PTR(node);
+
+ return 0;
+}
+
+static int decrypt_partition(
+ DissectedPartition *m,
+ const char *passphrase,
+ DissectImageFlags flags,
+ DecryptedImage *d) {
+
+ _cleanup_free_ char *node = NULL, *name = NULL;
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ if (!m->found || !m->node || !m->fstype)
+ return 0;
+
+ if (!streq(m->fstype, "crypto_LUKS"))
+ return 0;
+
+ if (!passphrase)
+ return -ENOKEY;
+
+ r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
+ return -ENOMEM;
+
+ r = crypt_init(&cd, m->node);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
+
+ r = crypt_load(cd, CRYPT_LUKS, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load LUKS metadata: %m");
+
+ r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
+ ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
+ ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to activate LUKS device: %m");
+ return r == -EPERM ? -EKEYREJECTED : r;
+ }
+
+ d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
+ d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
+ d->n_decrypted++;
+
+ m->decrypted_node = TAKE_PTR(node);
+
+ return 0;
+}
+
+static int verity_partition(
+ DissectedPartition *m,
+ DissectedPartition *v,
+ const void *root_hash,
+ size_t root_hash_size,
+ DissectImageFlags flags,
+ DecryptedImage *d) {
+
+ _cleanup_free_ char *node = NULL, *name = NULL;
+ _cleanup_(crypt_freep) struct crypt_device *cd = NULL;
+ int r;
+
+ assert(m);
+ assert(v);
+
+ if (!root_hash)
+ return 0;
+
+ if (!m->found || !m->node || !m->fstype)
+ return 0;
+ if (!v->found || !v->node || !v->fstype)
+ return 0;
+
+ if (!streq(v->fstype, "DM_verity_hash"))
+ return 0;
+
+ r = make_dm_name_and_node(m->node, "-verity", &name, &node);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
+ return -ENOMEM;
+
+ r = crypt_init(&cd, v->node);
+ if (r < 0)
+ return r;
+
+ r = crypt_load(cd, CRYPT_VERITY, NULL);
+ if (r < 0)
+ return r;
+
+ r = crypt_set_data_device(cd, m->node);
+ if (r < 0)
+ return r;
+
+ r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY);
+ if (r < 0)
+ return r;
+
+ d->decrypted[d->n_decrypted].name = TAKE_PTR(name);
+ d->decrypted[d->n_decrypted].device = TAKE_PTR(cd);
+ d->n_decrypted++;
+
+ m->decrypted_node = TAKE_PTR(node);
+
+ return 0;
+}
+#endif
+
+int dissected_image_decrypt(
+ DissectedImage *m,
+ const char *passphrase,
+ const void *root_hash,
+ size_t root_hash_size,
+ DissectImageFlags flags,
+ DecryptedImage **ret) {
+
+#if HAVE_LIBCRYPTSETUP
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
+ unsigned i;
+ int r;
+#endif
+
+ assert(m);
+ assert(root_hash || root_hash_size == 0);
+
+ /* Returns:
+ *
+ * = 0 → There was nothing to decrypt
+ * > 0 → Decrypted successfully
+ * -ENOKEY → There's something to decrypt but no key was supplied
+ * -EKEYREJECTED → Passed key was not correct
+ */
+
+ if (root_hash && root_hash_size < sizeof(sd_id128_t))
+ return -EINVAL;
+
+ if (!m->encrypted && !m->verity) {
+ *ret = NULL;
+ return 0;
+ }
+
+#if HAVE_LIBCRYPTSETUP
+ d = new0(DecryptedImage, 1);
+ if (!d)
+ return -ENOMEM;
+
+ for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ DissectedPartition *p = m->partitions + i;
+ int k;
+
+ if (!p->found)
+ continue;
+
+ r = decrypt_partition(p, passphrase, flags, d);
+ if (r < 0)
+ return r;
+
+ k = PARTITION_VERITY_OF(i);
+ if (k >= 0) {
+ r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d);
+ if (r < 0)
+ return r;
+ }
+
+ if (!p->decrypted_fstype && p->decrypted_node) {
+ r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
+ if (r < 0 && r != -EUCLEAN)
+ return r;
+ }
+ }
+
+ *ret = TAKE_PTR(d);
+
+ return 1;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+int dissected_image_decrypt_interactively(
+ DissectedImage *m,
+ const char *passphrase,
+ const void *root_hash,
+ size_t root_hash_size,
+ DissectImageFlags flags,
+ DecryptedImage **ret) {
+
+ _cleanup_strv_free_erase_ char **z = NULL;
+ int n = 3, r;
+
+ if (passphrase)
+ n--;
+
+ for (;;) {
+ r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret);
+ if (r >= 0)
+ return r;
+ if (r == -EKEYREJECTED)
+ log_error_errno(r, "Incorrect passphrase, try again!");
+ else if (r != -ENOKEY)
+ return log_error_errno(r, "Failed to decrypt image: %m");
+
+ if (--n < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EKEYREJECTED),
+ "Too many retries.");
+
+ z = strv_free(z);
+
+ r = ask_password_auto("Please enter image passphrase:", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query for passphrase: %m");
+
+ passphrase = z[0];
+ }
+}
+
+#if HAVE_LIBCRYPTSETUP
+static int deferred_remove(DecryptedPartition *p) {
+ struct dm_ioctl dm = {
+ .version = {
+ DM_VERSION_MAJOR,
+ DM_VERSION_MINOR,
+ DM_VERSION_PATCHLEVEL
+ },
+ .data_size = sizeof(dm),
+ .flags = DM_DEFERRED_REMOVE,
+ };
+
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+
+ /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */
+
+ fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (strlen(p->name) > sizeof(dm.name))
+ return -ENAMETOOLONG;
+
+ strncpy(dm.name, p->name, sizeof(dm.name));
+
+ if (ioctl(fd, DM_DEV_REMOVE, &dm))
+ return -errno;
+
+ return 0;
+}
+#endif
+
+int decrypted_image_relinquish(DecryptedImage *d) {
+
+#if HAVE_LIBCRYPTSETUP
+ size_t i;
+ int r;
+#endif
+
+ assert(d);
+
+ /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
+ * that we don't clean it up ourselves either anymore */
+
+#if HAVE_LIBCRYPTSETUP
+ for (i = 0; i < d->n_decrypted; i++) {
+ DecryptedPartition *p = d->decrypted + i;
+
+ if (p->relinquished)
+ continue;
+
+ r = deferred_remove(p);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
+
+ p->relinquished = true;
+ }
+#endif
+
+ return 0;
+}
+
+int root_hash_load(const char *image, void **ret, size_t *ret_size) {
+ _cleanup_free_ char *text = NULL;
+ _cleanup_free_ void *k = NULL;
+ size_t l;
+ int r;
+
+ assert(image);
+ assert(ret);
+ assert(ret_size);
+
+ if (is_device_path(image)) {
+ /* If we are asked to load the root hash for a device node, exit early */
+ *ret = NULL;
+ *ret_size = 0;
+ return 0;
+ }
+
+ r = getxattr_malloc(image, "user.verity.roothash", &text, true);
+ if (r < 0) {
+ char *fn, *e, *n;
+
+ if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT))
+ return r;
+
+ fn = newa(char, strlen(image) + STRLEN(".roothash") + 1);
+ n = stpcpy(fn, image);
+ e = endswith(fn, ".raw");
+ if (e)
+ n = e;
+
+ strcpy(n, ".roothash");
+
+ r = read_one_line_file(fn, &text);
+ if (r == -ENOENT) {
+ *ret = NULL;
+ *ret_size = 0;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+ }
+
+ r = unhexmem(text, strlen(text), &k, &l);
+ if (r < 0)
+ return r;
+ if (l < sizeof(sd_id128_t))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(k);
+ *ret_size = l;
+
+ return 1;
+}
+
+int dissected_image_acquire_metadata(DissectedImage *m) {
+
+ enum {
+ META_HOSTNAME,
+ META_MACHINE_ID,
+ META_MACHINE_INFO,
+ META_OS_RELEASE,
+ _META_MAX,
+ };
+
+ static const char *const paths[_META_MAX] = {
+ [META_HOSTNAME] = "/etc/hostname\0",
+ [META_MACHINE_ID] = "/etc/machine-id\0",
+ [META_MACHINE_INFO] = "/etc/machine-info\0",
+ [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0",
+ };
+
+ _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
+ _cleanup_(rmdir_and_freep) char *t = NULL;
+ _cleanup_(sigkill_waitp) pid_t child = 0;
+ sd_id128_t machine_id = SD_ID128_NULL;
+ _cleanup_free_ char *hostname = NULL;
+ unsigned n_meta_initialized = 0, k;
+ int fds[2 * _META_MAX], r;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ assert(m);
+
+ for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
+ if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
+ if (r < 0)
+ goto finish;
+
+ r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child);
+ if (r < 0)
+ goto finish;
+ if (r == 0) {
+ r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_VALIDATE_OS);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to mount dissected image: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ for (k = 0; k < _META_MAX; k++) {
+ _cleanup_close_ int fd = -1;
+ const char *p;
+
+ fds[2*k] = safe_close(fds[2*k]);
+
+ NULSTR_FOREACH(p, paths[k]) {
+ fd = chase_symlinks_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
+ if (fd >= 0)
+ break;
+ }
+ if (fd < 0) {
+ log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]);
+ continue;
+ }
+
+ r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ fds[2*k+1] = safe_close(fds[2*k+1]);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ for (k = 0; k < _META_MAX; k++) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ fds[2*k+1] = safe_close(fds[2*k+1]);
+
+ f = fdopen(fds[2*k], "r");
+ if (!f) {
+ r = -errno;
+ goto finish;
+ }
+
+ fds[2*k] = -1;
+
+ switch (k) {
+
+ case META_HOSTNAME:
+ r = read_etc_hostname_stream(f, &hostname);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/hostname: %m");
+
+ break;
+
+ case META_MACHINE_ID: {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/machine-id: %m");
+ else if (r == 33) {
+ r = sd_id128_from_string(line, &machine_id);
+ if (r < 0)
+ log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
+ } else if (r == 0)
+ log_debug("/etc/machine-id file is empty.");
+ else
+ log_debug("/etc/machine-id has unexpected length %i.", r);
+
+ break;
+ }
+
+ case META_MACHINE_INFO:
+ r = load_env_file_pairs(f, "machine-info", &machine_info);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/machine-info: %m");
+
+ break;
+
+ case META_OS_RELEASE:
+ r = load_env_file_pairs(f, "os-release", &os_release);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read OS release file: %m");
+
+ break;
+ }
+ }
+
+ r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
+ child = 0;
+ if (r < 0)
+ goto finish;
+ if (r != EXIT_SUCCESS)
+ return -EPROTO;
+
+ free_and_replace(m->hostname, hostname);
+ m->machine_id = machine_id;
+ strv_free_and_replace(m->machine_info, machine_info);
+ strv_free_and_replace(m->os_release, os_release);
+
+finish:
+ for (k = 0; k < n_meta_initialized; k++)
+ safe_close_pair(fds + 2*k);
+
+ return r;
+}
+
+int dissect_image_and_warn(
+ int fd,
+ const char *name,
+ const void *root_hash,
+ size_t root_hash_size,
+ DissectImageFlags flags,
+ DissectedImage **ret) {
+
+ _cleanup_free_ char *buffer = NULL;
+ int r;
+
+ if (!name) {
+ r = fd_get_path(fd, &buffer);
+ if (r < 0)
+ return r;
+
+ name = buffer;
+ }
+
+ r = dissect_image(fd, root_hash, root_hash_size, flags, ret);
+
+ switch (r) {
+
+ case -EOPNOTSUPP:
+ return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support.");
+
+ case -ENOPKG:
+ return log_error_errno(r, "Couldn't identify a suitable partition table or file system in '%s'.", name);
+
+ case -EADDRNOTAVAIL:
+ return log_error_errno(r, "No root partition for specified root hash found in '%s'.", name);
+
+ case -ENOTUNIQ:
+ return log_error_errno(r, "Multiple suitable root partitions found in image '%s'.", name);
+
+ case -ENXIO:
+ return log_error_errno(r, "No suitable root partition found in image '%s'.", name);
+
+ case -EPROTONOSUPPORT:
+ return log_error_errno(r, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name);
+
+ default:
+ if (r < 0)
+ return log_error_errno(r, "Failed to dissect image '%s': %m", name);
+
+ return r;
+ }
+}
+
+static const char *const partition_designator_table[] = {
+ [PARTITION_ROOT] = "root",
+ [PARTITION_ROOT_SECONDARY] = "root-secondary",
+ [PARTITION_HOME] = "home",
+ [PARTITION_SRV] = "srv",
+ [PARTITION_ESP] = "esp",
+ [PARTITION_SWAP] = "swap",
+ [PARTITION_ROOT_VERITY] = "root-verity",
+ [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(partition_designator, int);
diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h
new file mode 100644
index 0000000..f50b40e
--- /dev/null
+++ b/src/shared/dissect-image.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+#include "macro.h"
+
+typedef struct DissectedImage DissectedImage;
+typedef struct DissectedPartition DissectedPartition;
+typedef struct DecryptedImage DecryptedImage;
+
+struct DissectedPartition {
+ bool found:1;
+ bool rw:1;
+ int partno; /* -1 if there was no partition and the images contains a file system directly */
+ int architecture; /* Intended architecture: either native, secondary or unset (-1). */
+ sd_id128_t uuid; /* Partition entry UUID as reported by the GPT */
+ char *fstype;
+ char *node;
+ char *decrypted_node;
+ char *decrypted_fstype;
+};
+
+enum {
+ PARTITION_ROOT,
+ PARTITION_ROOT_SECONDARY, /* Secondary architecture */
+ PARTITION_HOME,
+ PARTITION_SRV,
+ PARTITION_ESP,
+ PARTITION_SWAP,
+ PARTITION_ROOT_VERITY, /* verity data for the PARTITION_ROOT partition */
+ PARTITION_ROOT_SECONDARY_VERITY, /* verity data for the PARTITION_ROOT_SECONDARY partition */
+ _PARTITION_DESIGNATOR_MAX,
+ _PARTITION_DESIGNATOR_INVALID = -1
+};
+
+static inline int PARTITION_VERITY_OF(int p) {
+ if (p == PARTITION_ROOT)
+ return PARTITION_ROOT_VERITY;
+ if (p == PARTITION_ROOT_SECONDARY)
+ return PARTITION_ROOT_SECONDARY_VERITY;
+ return _PARTITION_DESIGNATOR_INVALID;
+}
+
+typedef enum DissectImageFlags {
+ DISSECT_IMAGE_READ_ONLY = 1 << 0,
+ DISSECT_IMAGE_DISCARD_ON_LOOP = 1 << 1, /* Turn on "discard" if on a loop device and file system supports it */
+ DISSECT_IMAGE_DISCARD = 1 << 2, /* Turn on "discard" if file system supports it, on all block devices */
+ DISSECT_IMAGE_DISCARD_ON_CRYPTO = 1 << 3, /* Turn on "discard" also on crypto devices */
+ DISSECT_IMAGE_DISCARD_ANY = DISSECT_IMAGE_DISCARD_ON_LOOP |
+ DISSECT_IMAGE_DISCARD |
+ DISSECT_IMAGE_DISCARD_ON_CRYPTO,
+ DISSECT_IMAGE_GPT_ONLY = 1 << 4, /* Only recognize images with GPT partition tables */
+ DISSECT_IMAGE_REQUIRE_ROOT = 1 << 5, /* Don't accept disks without root partition */
+ DISSECT_IMAGE_MOUNT_ROOT_ONLY = 1 << 6, /* Mount only the root partition */
+ DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY = 1 << 7, /* Mount only non-root partitions */
+ DISSECT_IMAGE_VALIDATE_OS = 1 << 8, /* Refuse mounting images that aren't identifyable as OS images */
+ DISSECT_IMAGE_NO_UDEV = 1 << 9, /* Don't wait for udev initializing things */
+} DissectImageFlags;
+
+struct DissectedImage {
+ bool encrypted:1;
+ bool verity:1; /* verity available and usable */
+ bool can_verity:1; /* verity available, but not necessarily used */
+
+ DissectedPartition partitions[_PARTITION_DESIGNATOR_MAX];
+
+ char *hostname;
+ sd_id128_t machine_id;
+ char **machine_info;
+ char **os_release;
+};
+
+int probe_filesystem(const char *node, char **ret_fstype);
+int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret);
+int dissect_image_and_warn(int fd, const char *name, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret);
+
+DissectedImage* dissected_image_unref(DissectedImage *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref);
+
+int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret);
+int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret);
+int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, DissectImageFlags flags);
+
+int dissected_image_acquire_metadata(DissectedImage *m);
+
+DecryptedImage* decrypted_image_unref(DecryptedImage *p);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DecryptedImage*, decrypted_image_unref);
+int decrypted_image_relinquish(DecryptedImage *d);
+
+const char* partition_designator_to_string(int i) _const_;
+int partition_designator_from_string(const char *name) _pure_;
+
+int root_hash_load(const char *image, void **ret, size_t *ret_size);
diff --git a/src/shared/dns-domain.c b/src/shared/dns-domain.c
new file mode 100644
index 0000000..4b31cb3
--- /dev/null
+++ b/src/shared/dns-domain.c
@@ -0,0 +1,1375 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#if HAVE_LIBIDN2
+# include <idn2.h>
+#elif HAVE_LIBIDN
+# include <idna.h>
+# include <stringprep.h>
+#endif
+
+#include <endian.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "hashmap.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags) {
+ const char *n;
+ char *d, last_char = 0;
+ int r = 0;
+
+ assert(name);
+ assert(*name);
+
+ n = *name;
+ d = dest;
+
+ for (;;) {
+ if (*n == 0 || *n == '.') {
+ if (FLAGS_SET(flags, DNS_LABEL_LDH) && last_char == '-')
+ /* Trailing dash */
+ return -EINVAL;
+
+ if (*n == '.')
+ n++;
+ break;
+ }
+
+ if (r >= DNS_LABEL_MAX)
+ return -EINVAL;
+
+ if (sz <= 0)
+ return -ENOBUFS;
+
+ if (*n == '\\') {
+ /* Escaped character */
+ if (FLAGS_SET(flags, DNS_LABEL_NO_ESCAPES))
+ return -EINVAL;
+
+ n++;
+
+ if (*n == 0)
+ /* Ending NUL */
+ return -EINVAL;
+
+ else if (IN_SET(*n, '\\', '.')) {
+ /* Escaped backslash or dot */
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH))
+ return -EINVAL;
+
+ last_char = *n;
+ if (d)
+ *(d++) = *n;
+ sz--;
+ r++;
+ n++;
+
+ } else if (n[0] >= '0' && n[0] <= '9') {
+ unsigned k;
+
+ /* Escaped literal ASCII character */
+
+ if (!(n[1] >= '0' && n[1] <= '9') ||
+ !(n[2] >= '0' && n[2] <= '9'))
+ return -EINVAL;
+
+ k = ((unsigned) (n[0] - '0') * 100) +
+ ((unsigned) (n[1] - '0') * 10) +
+ ((unsigned) (n[2] - '0'));
+
+ /* Don't allow anything that doesn't
+ * fit in 8bit. Note that we do allow
+ * control characters, as some servers
+ * (e.g. cloudflare) are happy to
+ * generate labels with them
+ * inside. */
+ if (k > 255)
+ return -EINVAL;
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH) &&
+ !valid_ldh_char((char) k))
+ return -EINVAL;
+
+ last_char = (char) k;
+ if (d)
+ *(d++) = (char) k;
+ sz--;
+ r++;
+
+ n += 3;
+ } else
+ return -EINVAL;
+
+ } else if ((uint8_t) *n >= (uint8_t) ' ' && *n != 127) {
+
+ /* Normal character */
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH)) {
+ if (!valid_ldh_char(*n))
+ return -EINVAL;
+ if (r == 0 && *n == '-')
+ /* Leading dash */
+ return -EINVAL;
+ }
+
+ last_char = *n;
+ if (d)
+ *(d++) = *n;
+ sz--;
+ r++;
+ n++;
+ } else
+ return -EINVAL;
+ }
+
+ /* Empty label that is not at the end? */
+ if (r == 0 && *n)
+ return -EINVAL;
+
+ /* More than one trailing dot? */
+ if (*n == '.')
+ return -EINVAL;
+
+ if (sz >= 1 && d)
+ *d = 0;
+
+ *name = n;
+ return r;
+}
+
+/* @label_terminal: terminal character of a label, updated to point to the terminal character of
+ * the previous label (always skipping one dot) or to NULL if there are no more
+ * labels. */
+int dns_label_unescape_suffix(const char *name, const char **label_terminal, char *dest, size_t sz) {
+ const char *terminal;
+ int r;
+
+ assert(name);
+ assert(label_terminal);
+ assert(dest);
+
+ /* no more labels */
+ if (!*label_terminal) {
+ if (sz >= 1)
+ *dest = 0;
+
+ return 0;
+ }
+
+ terminal = *label_terminal;
+ assert(IN_SET(*terminal, 0, '.'));
+
+ /* Skip current terminal character (and accept domain names ending it ".") */
+ if (*terminal == 0)
+ terminal--;
+ if (terminal >= name && *terminal == '.')
+ terminal--;
+
+ /* Point name to the last label, and terminal to the preceding terminal symbol (or make it a NULL pointer) */
+ for (;;) {
+ if (terminal < name) {
+ /* Reached the first label, so indicate that there are no more */
+ terminal = NULL;
+ break;
+ }
+
+ /* Find the start of the last label */
+ if (*terminal == '.') {
+ const char *y;
+ unsigned slashes = 0;
+
+ for (y = terminal - 1; y >= name && *y == '\\'; y--)
+ slashes++;
+
+ if (slashes % 2 == 0) {
+ /* The '.' was not escaped */
+ name = terminal + 1;
+ break;
+ } else {
+ terminal = y;
+ continue;
+ }
+ }
+
+ terminal--;
+ }
+
+ r = dns_label_unescape(&name, dest, sz, 0);
+ if (r < 0)
+ return r;
+
+ *label_terminal = terminal;
+
+ return r;
+}
+
+int dns_label_escape(const char *p, size_t l, char *dest, size_t sz) {
+ char *q;
+
+ /* DNS labels must be between 1 and 63 characters long. A
+ * zero-length label does not exist. See RFC 2182, Section
+ * 11. */
+
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+ if (sz < 1)
+ return -ENOBUFS;
+
+ assert(p);
+ assert(dest);
+
+ q = dest;
+ while (l > 0) {
+
+ if (IN_SET(*p, '.', '\\')) {
+
+ /* Dot or backslash */
+
+ if (sz < 3)
+ return -ENOBUFS;
+
+ *(q++) = '\\';
+ *(q++) = *p;
+
+ sz -= 2;
+
+ } else if (IN_SET(*p, '_', '-') ||
+ (*p >= '0' && *p <= '9') ||
+ (*p >= 'a' && *p <= 'z') ||
+ (*p >= 'A' && *p <= 'Z')) {
+
+ /* Proper character */
+
+ if (sz < 2)
+ return -ENOBUFS;
+
+ *(q++) = *p;
+ sz -= 1;
+
+ } else {
+
+ /* Everything else */
+
+ if (sz < 5)
+ return -ENOBUFS;
+
+ *(q++) = '\\';
+ *(q++) = '0' + (char) ((uint8_t) *p / 100);
+ *(q++) = '0' + (char) (((uint8_t) *p / 10) % 10);
+ *(q++) = '0' + (char) ((uint8_t) *p % 10);
+
+ sz -= 4;
+ }
+
+ p++;
+ l--;
+ }
+
+ *q = 0;
+ return (int) (q - dest);
+}
+
+int dns_label_escape_new(const char *p, size_t l, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ s = new(char, DNS_LABEL_ESCAPED_MAX);
+ if (!s)
+ return -ENOMEM;
+
+ r = dns_label_escape(p, l, s, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(s);
+
+ return r;
+}
+
+#if HAVE_LIBIDN
+int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) {
+ _cleanup_free_ uint32_t *input = NULL;
+ size_t input_size, l;
+ const char *p;
+ bool contains_8bit = false;
+ char buffer[DNS_LABEL_MAX+1];
+
+ assert(encoded);
+ assert(decoded);
+
+ /* Converts an U-label into an A-label */
+
+ if (encoded_size <= 0)
+ return -EINVAL;
+
+ for (p = encoded; p < encoded + encoded_size; p++)
+ if ((uint8_t) *p > 127)
+ contains_8bit = true;
+
+ if (!contains_8bit) {
+ if (encoded_size > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ return 0;
+ }
+
+ input = stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size);
+ if (!input)
+ return -ENOMEM;
+
+ if (idna_to_ascii_4i(input, input_size, buffer, 0) != 0)
+ return -EINVAL;
+
+ l = strlen(buffer);
+
+ /* Verify that the result is not longer than one DNS label. */
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+ if (l > decoded_max)
+ return -ENOBUFS;
+
+ memcpy(decoded, buffer, l);
+
+ /* If there's room, append a trailing NUL byte, but only then */
+ if (decoded_max > l)
+ decoded[l] = 0;
+
+ return (int) l;
+}
+
+int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) {
+ size_t input_size, output_size;
+ _cleanup_free_ uint32_t *input = NULL;
+ _cleanup_free_ char *result = NULL;
+ uint32_t *output = NULL;
+ size_t w;
+
+ /* To be invoked after unescaping. Converts an A-label into an U-label. */
+
+ assert(encoded);
+ assert(decoded);
+
+ if (encoded_size <= 0 || encoded_size > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ if (!memory_startswith(encoded, encoded_size, IDNA_ACE_PREFIX))
+ return 0;
+
+ input = stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size);
+ if (!input)
+ return -ENOMEM;
+
+ output_size = input_size;
+ output = newa(uint32_t, output_size);
+
+ idna_to_unicode_44i(input, input_size, output, &output_size, 0);
+
+ result = stringprep_ucs4_to_utf8(output, output_size, NULL, &w);
+ if (!result)
+ return -ENOMEM;
+ if (w <= 0)
+ return -EINVAL;
+ if (w > decoded_max)
+ return -ENOBUFS;
+
+ memcpy(decoded, result, w);
+
+ /* Append trailing NUL byte if there's space, but only then. */
+ if (decoded_max > w)
+ decoded[w] = 0;
+
+ return w;
+}
+#endif
+
+int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **_ret) {
+ _cleanup_free_ char *ret = NULL;
+ size_t n = 0, allocated = 0;
+ const char *p;
+ bool first = true;
+ int r;
+
+ if (a)
+ p = a;
+ else if (b)
+ p = TAKE_PTR(b);
+ else
+ goto finish;
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&p, label, sizeof label, flags);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (*p != 0)
+ return -EINVAL;
+
+ if (b) {
+ /* Now continue with the second string, if there is one */
+ p = TAKE_PTR(b);
+ continue;
+ }
+
+ break;
+ }
+
+ if (_ret) {
+ if (!GREEDY_REALLOC(ret, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ r = dns_label_escape(label, r, ret + n + !first, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ if (!first)
+ ret[n] = '.';
+ } else {
+ char escaped[DNS_LABEL_ESCAPED_MAX];
+
+ r = dns_label_escape(label, r, escaped, sizeof(escaped));
+ if (r < 0)
+ return r;
+ }
+
+ if (!first)
+ n++;
+ else
+ first = false;
+
+ n += r;
+ }
+
+finish:
+ if (n > DNS_HOSTNAME_MAX)
+ return -EINVAL;
+
+ if (_ret) {
+ if (n == 0) {
+ /* Nothing appended? If so, generate at least a single dot, to indicate the DNS root domain */
+ if (!GREEDY_REALLOC(ret, allocated, 2))
+ return -ENOMEM;
+
+ ret[n++] = '.';
+ } else {
+ if (!GREEDY_REALLOC(ret, allocated, n + 1))
+ return -ENOMEM;
+ }
+
+ ret[n] = 0;
+ *_ret = TAKE_PTR(ret);
+ }
+
+ return 0;
+}
+
+void dns_name_hash_func(const char *p, struct siphash *state) {
+ int r;
+
+ assert(p);
+
+ for (;;) {
+ char label[DNS_LABEL_MAX+1];
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ break;
+ if (r == 0)
+ break;
+
+ ascii_strlower_n(label, r);
+ siphash24_compress(label, r, state);
+ siphash24_compress_byte(0, state); /* make sure foobar and foo.bar result in different hashes */
+ }
+
+ /* enforce that all names are terminated by the empty label */
+ string_hash_func("", state);
+}
+
+int dns_name_compare_func(const char *a, const char *b) {
+ const char *x, *y;
+ int r, q;
+
+ assert(a);
+ assert(b);
+
+ x = a + strlen(a);
+ y = b + strlen(b);
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+
+ if (x == NULL && y == NULL)
+ return 0;
+
+ r = dns_label_unescape_suffix(a, &x, la, sizeof(la));
+ q = dns_label_unescape_suffix(b, &y, lb, sizeof(lb));
+ if (r < 0 || q < 0)
+ return CMP(r, q);
+
+ r = ascii_strcasecmp_nn(la, r, lb, q);
+ if (r != 0)
+ return r;
+ }
+}
+
+DEFINE_HASH_OPS(dns_name_hash_ops, char, dns_name_hash_func, dns_name_compare_func);
+
+int dns_name_equal(const char *x, const char *y) {
+ int r, q;
+
+ assert(x);
+ assert(y);
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&x, la, sizeof la, 0);
+ if (r < 0)
+ return r;
+
+ q = dns_label_unescape(&y, lb, sizeof lb, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q)
+ return false;
+ if (r == 0)
+ return true;
+
+ if (ascii_strcasecmp_n(la, lb, r) != 0)
+ return false;
+ }
+}
+
+int dns_name_endswith(const char *name, const char *suffix) {
+ const char *n, *s, *saved_n = NULL;
+ int r, q;
+
+ assert(name);
+ assert(suffix);
+
+ n = name;
+ s = suffix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (r < 0)
+ return r;
+
+ if (!saved_n)
+ saved_n = n;
+
+ q = dns_label_unescape(&s, ls, sizeof ls, 0);
+ if (q < 0)
+ return q;
+
+ if (r == 0 && q == 0)
+ return true;
+ if (r == 0 && saved_n == n)
+ return false;
+
+ if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) {
+
+ /* Not the same, let's jump back, and try with the next label again */
+ s = suffix;
+ n = TAKE_PTR(saved_n);
+ }
+ }
+}
+
+int dns_name_startswith(const char *name, const char *prefix) {
+ const char *n, *p;
+ int r, q;
+
+ assert(name);
+ assert(prefix);
+
+ n = name;
+ p = prefix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], lp[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&p, lp, sizeof lp, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return true;
+
+ q = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q)
+ return false;
+ if (ascii_strcasecmp_n(ln, lp, r) != 0)
+ return false;
+ }
+}
+
+int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret) {
+ const char *n, *s, *saved_before = NULL, *saved_after = NULL, *prefix;
+ int r, q;
+
+ assert(name);
+ assert(old_suffix);
+ assert(new_suffix);
+ assert(ret);
+
+ n = name;
+ s = old_suffix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX];
+
+ if (!saved_before)
+ saved_before = n;
+
+ r = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (r < 0)
+ return r;
+
+ if (!saved_after)
+ saved_after = n;
+
+ q = dns_label_unescape(&s, ls, sizeof ls, 0);
+ if (q < 0)
+ return q;
+
+ if (r == 0 && q == 0)
+ break;
+ if (r == 0 && saved_after == n) {
+ *ret = NULL; /* doesn't match */
+ return 0;
+ }
+
+ if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) {
+
+ /* Not the same, let's jump back, and try with the next label again */
+ s = old_suffix;
+ n = TAKE_PTR(saved_after);
+ saved_before = NULL;
+ }
+ }
+
+ /* Found it! Now generate the new name */
+ prefix = strndupa(name, saved_before - name);
+
+ r = dns_name_concat(prefix, new_suffix, 0, ret);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int dns_name_between(const char *a, const char *b, const char *c) {
+ /* Determine if b is strictly greater than a and strictly smaller than c.
+ We consider the order of names to be circular, so that if a is
+ strictly greater than c, we consider b to be between them if it is
+ either greater than a or smaller than c. This is how the canonical
+ DNS name order used in NSEC records work. */
+
+ if (dns_name_compare_func(a, c) < 0)
+ /*
+ a and c are properly ordered:
+ a<---b--->c
+ */
+ return dns_name_compare_func(a, b) < 0 &&
+ dns_name_compare_func(b, c) < 0;
+ else
+ /*
+ a and c are equal or 'reversed':
+ <--b--c a----->
+ or:
+ <-----c a--b-->
+ */
+ return dns_name_compare_func(b, c) < 0 ||
+ dns_name_compare_func(a, b) < 0;
+}
+
+int dns_name_reverse(int family, const union in_addr_union *a, char **ret) {
+ const uint8_t *p;
+ int r;
+
+ assert(a);
+ assert(ret);
+
+ p = (const uint8_t*) a;
+
+ if (family == AF_INET)
+ r = asprintf(ret, "%u.%u.%u.%u.in-addr.arpa", p[3], p[2], p[1], p[0]);
+ else if (family == AF_INET6)
+ r = asprintf(ret, "%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.ip6.arpa",
+ hexchar(p[15] & 0xF), hexchar(p[15] >> 4), hexchar(p[14] & 0xF), hexchar(p[14] >> 4),
+ hexchar(p[13] & 0xF), hexchar(p[13] >> 4), hexchar(p[12] & 0xF), hexchar(p[12] >> 4),
+ hexchar(p[11] & 0xF), hexchar(p[11] >> 4), hexchar(p[10] & 0xF), hexchar(p[10] >> 4),
+ hexchar(p[ 9] & 0xF), hexchar(p[ 9] >> 4), hexchar(p[ 8] & 0xF), hexchar(p[ 8] >> 4),
+ hexchar(p[ 7] & 0xF), hexchar(p[ 7] >> 4), hexchar(p[ 6] & 0xF), hexchar(p[ 6] >> 4),
+ hexchar(p[ 5] & 0xF), hexchar(p[ 5] >> 4), hexchar(p[ 4] & 0xF), hexchar(p[ 4] >> 4),
+ hexchar(p[ 3] & 0xF), hexchar(p[ 3] >> 4), hexchar(p[ 2] & 0xF), hexchar(p[ 2] >> 4),
+ hexchar(p[ 1] & 0xF), hexchar(p[ 1] >> 4), hexchar(p[ 0] & 0xF), hexchar(p[ 0] >> 4));
+ else
+ return -EAFNOSUPPORT;
+ if (r < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int dns_name_address(const char *p, int *family, union in_addr_union *address) {
+ int r;
+
+ assert(p);
+ assert(family);
+ assert(address);
+
+ r = dns_name_endswith(p, "in-addr.arpa");
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ uint8_t a[4];
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(a); i++) {
+ char label[DNS_LABEL_MAX+1];
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ if (r > 3)
+ return -EINVAL;
+
+ r = safe_atou8(label, &a[i]);
+ if (r < 0)
+ return r;
+ }
+
+ r = dns_name_equal(p, "in-addr.arpa");
+ if (r <= 0)
+ return r;
+
+ *family = AF_INET;
+ address->in.s_addr = htobe32(((uint32_t) a[3] << 24) |
+ ((uint32_t) a[2] << 16) |
+ ((uint32_t) a[1] << 8) |
+ (uint32_t) a[0]);
+
+ return 1;
+ }
+
+ r = dns_name_endswith(p, "ip6.arpa");
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ struct in6_addr a;
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(a.s6_addr); i++) {
+ char label[DNS_LABEL_MAX+1];
+ int x, y;
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r <= 0)
+ return r;
+ if (r != 1)
+ return -EINVAL;
+ x = unhexchar(label[0]);
+ if (x < 0)
+ return -EINVAL;
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r <= 0)
+ return r;
+ if (r != 1)
+ return -EINVAL;
+ y = unhexchar(label[0]);
+ if (y < 0)
+ return -EINVAL;
+
+ a.s6_addr[ELEMENTSOF(a.s6_addr) - i - 1] = (uint8_t) y << 4 | (uint8_t) x;
+ }
+
+ r = dns_name_equal(p, "ip6.arpa");
+ if (r <= 0)
+ return r;
+
+ *family = AF_INET6;
+ address->in6 = a;
+ return 1;
+ }
+
+ return 0;
+}
+
+bool dns_name_is_root(const char *name) {
+
+ assert(name);
+
+ /* There are exactly two ways to encode the root domain name:
+ * as empty string, or with a single dot. */
+
+ return STR_IN_SET(name, "", ".");
+}
+
+bool dns_name_is_single_label(const char *name) {
+ int r;
+
+ assert(name);
+
+ r = dns_name_parent(&name);
+ if (r <= 0)
+ return false;
+
+ return dns_name_is_root(name);
+}
+
+/* Encode a domain name according to RFC 1035 Section 3.1, without compression */
+int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical) {
+ uint8_t *label_length, *out;
+ int r;
+
+ assert(domain);
+ assert(buffer);
+
+ out = buffer;
+
+ do {
+ /* Reserve a byte for label length */
+ if (len <= 0)
+ return -ENOBUFS;
+ len--;
+ label_length = out;
+ out++;
+
+ /* Convert and copy a single label. Note that
+ * dns_label_unescape() returns 0 when it hits the end
+ * of the domain name, which we rely on here to encode
+ * the trailing NUL byte. */
+ r = dns_label_unescape(&domain, (char *) out, len, 0);
+ if (r < 0)
+ return r;
+
+ /* Optionally, output the name in DNSSEC canonical
+ * format, as described in RFC 4034, section 6.2. Or
+ * in other words: in lower-case. */
+ if (canonical)
+ ascii_strlower_n((char*) out, (size_t) r);
+
+ /* Fill label length, move forward */
+ *label_length = r;
+ out += r;
+ len -= r;
+
+ } while (r != 0);
+
+ /* Verify the maximum size of the encoded name. The trailing
+ * dot + NUL byte account are included this time, hence
+ * compare against DNS_HOSTNAME_MAX + 2 (which is 255) this
+ * time. */
+ if (out - buffer > DNS_HOSTNAME_MAX + 2)
+ return -EINVAL;
+
+ return out - buffer;
+}
+
+static bool srv_type_label_is_valid(const char *label, size_t n) {
+ size_t k;
+
+ assert(label);
+
+ if (n < 2) /* Label needs to be at least 2 chars long */
+ return false;
+
+ if (label[0] != '_') /* First label char needs to be underscore */
+ return false;
+
+ /* Second char must be a letter */
+ if (!(label[1] >= 'A' && label[1] <= 'Z') &&
+ !(label[1] >= 'a' && label[1] <= 'z'))
+ return false;
+
+ /* Third and further chars must be alphanumeric or a hyphen */
+ for (k = 2; k < n; k++) {
+ if (!(label[k] >= 'A' && label[k] <= 'Z') &&
+ !(label[k] >= 'a' && label[k] <= 'z') &&
+ !(label[k] >= '0' && label[k] <= '9') &&
+ label[k] != '-')
+ return false;
+ }
+
+ return true;
+}
+
+bool dns_srv_type_is_valid(const char *name) {
+ unsigned c = 0;
+ int r;
+
+ if (!name)
+ return false;
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ /* This more or less implements RFC 6335, Section 5.1 */
+
+ r = dns_label_unescape(&name, label, sizeof label, 0);
+ if (r < 0)
+ return false;
+ if (r == 0)
+ break;
+
+ if (c >= 2)
+ return false;
+
+ if (!srv_type_label_is_valid(label, r))
+ return false;
+
+ c++;
+ }
+
+ return c == 2; /* exactly two labels */
+}
+
+bool dnssd_srv_type_is_valid(const char *name) {
+ return dns_srv_type_is_valid(name) &&
+ ((dns_name_endswith(name, "_tcp") > 0) ||
+ (dns_name_endswith(name, "_udp") > 0)); /* Specific to DNS-SD. RFC 6763, Section 7 */
+}
+
+bool dns_service_name_is_valid(const char *name) {
+ size_t l;
+
+ /* This more or less implements RFC 6763, Section 4.1.1 */
+
+ if (!name)
+ return false;
+
+ if (!utf8_is_valid(name))
+ return false;
+
+ if (string_has_cc(name, NULL))
+ return false;
+
+ l = strlen(name);
+ if (l <= 0)
+ return false;
+ if (l > 63)
+ return false;
+
+ return true;
+}
+
+int dns_service_join(const char *name, const char *type, const char *domain, char **ret) {
+ char escaped[DNS_LABEL_ESCAPED_MAX];
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ assert(type);
+ assert(domain);
+ assert(ret);
+
+ if (!dns_srv_type_is_valid(type))
+ return -EINVAL;
+
+ if (!name)
+ return dns_name_concat(type, domain, 0, ret);
+
+ if (!dns_service_name_is_valid(name))
+ return -EINVAL;
+
+ r = dns_label_escape(name, strlen(name), escaped, sizeof(escaped));
+ if (r < 0)
+ return r;
+
+ r = dns_name_concat(type, domain, 0, &n);
+ if (r < 0)
+ return r;
+
+ return dns_name_concat(escaped, n, 0, ret);
+}
+
+static bool dns_service_name_label_is_valid(const char *label, size_t n) {
+ char *s;
+
+ assert(label);
+
+ if (memchr(label, 0, n))
+ return false;
+
+ s = strndupa(label, n);
+ return dns_service_name_is_valid(s);
+}
+
+int dns_service_split(const char *joined, char **_name, char **_type, char **_domain) {
+ _cleanup_free_ char *name = NULL, *type = NULL, *domain = NULL;
+ const char *p = joined, *q = NULL, *d = NULL;
+ char a[DNS_LABEL_MAX], b[DNS_LABEL_MAX], c[DNS_LABEL_MAX];
+ int an, bn, cn, r;
+ unsigned x = 0;
+
+ assert(joined);
+
+ /* Get first label from the full name */
+ an = dns_label_unescape(&p, a, sizeof(a), 0);
+ if (an < 0)
+ return an;
+
+ if (an > 0) {
+ x++;
+
+ /* If there was a first label, try to get the second one */
+ bn = dns_label_unescape(&p, b, sizeof(b), 0);
+ if (bn < 0)
+ return bn;
+
+ if (bn > 0) {
+ x++;
+
+ /* If there was a second label, try to get the third one */
+ q = p;
+ cn = dns_label_unescape(&p, c, sizeof(c), 0);
+ if (cn < 0)
+ return cn;
+
+ if (cn > 0)
+ x++;
+ } else
+ cn = 0;
+ } else
+ an = 0;
+
+ if (x >= 2 && srv_type_label_is_valid(b, bn)) {
+
+ if (x >= 3 && srv_type_label_is_valid(c, cn)) {
+
+ if (dns_service_name_label_is_valid(a, an)) {
+ /* OK, got <name> . <type> . <type2> . <domain> */
+
+ name = strndup(a, an);
+ if (!name)
+ return -ENOMEM;
+
+ type = strjoin(b, ".", c);
+ if (!type)
+ return -ENOMEM;
+
+ d = p;
+ goto finish;
+ }
+
+ } else if (srv_type_label_is_valid(a, an)) {
+
+ /* OK, got <type> . <type2> . <domain> */
+
+ name = NULL;
+
+ type = strjoin(a, ".", b);
+ if (!type)
+ return -ENOMEM;
+
+ d = q;
+ goto finish;
+ }
+ }
+
+ name = NULL;
+ type = NULL;
+ d = joined;
+
+finish:
+ r = dns_name_normalize(d, 0, &domain);
+ if (r < 0)
+ return r;
+
+ if (_domain)
+ *_domain = TAKE_PTR(domain);
+
+ if (_type)
+ *_type = TAKE_PTR(type);
+
+ if (_name)
+ *_name = TAKE_PTR(name);
+
+ return 0;
+}
+
+static int dns_name_build_suffix_table(const char *name, const char *table[]) {
+ const char *p;
+ unsigned n = 0;
+ int r;
+
+ assert(name);
+ assert(table);
+
+ p = name;
+ for (;;) {
+ if (n > DNS_N_LABELS_MAX)
+ return -EINVAL;
+
+ table[n] = p;
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ n++;
+ }
+
+ return (int) n;
+}
+
+int dns_name_suffix(const char *name, unsigned n_labels, const char **ret) {
+ const char* labels[DNS_N_LABELS_MAX+1];
+ int n;
+
+ assert(name);
+ assert(ret);
+
+ n = dns_name_build_suffix_table(name, labels);
+ if (n < 0)
+ return n;
+
+ if ((unsigned) n < n_labels)
+ return -EINVAL;
+
+ *ret = labels[n - n_labels];
+ return (int) (n - n_labels);
+}
+
+int dns_name_skip(const char *a, unsigned n_labels, const char **ret) {
+ int r;
+
+ assert(a);
+ assert(ret);
+
+ for (; n_labels > 0; n_labels--) {
+ r = dns_name_parent(&a);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *ret = "";
+ return 0;
+ }
+ }
+
+ *ret = a;
+ return 1;
+}
+
+int dns_name_count_labels(const char *name) {
+ unsigned n = 0;
+ const char *p;
+ int r;
+
+ assert(name);
+
+ p = name;
+ for (;;) {
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (n >= DNS_N_LABELS_MAX)
+ return -EINVAL;
+
+ n++;
+ }
+
+ return (int) n;
+}
+
+int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b) {
+ int r;
+
+ assert(a);
+ assert(b);
+
+ r = dns_name_skip(a, n_labels, &a);
+ if (r <= 0)
+ return r;
+
+ return dns_name_equal(a, b);
+}
+
+int dns_name_common_suffix(const char *a, const char *b, const char **ret) {
+ const char *a_labels[DNS_N_LABELS_MAX+1], *b_labels[DNS_N_LABELS_MAX+1];
+ int n = 0, m = 0, k = 0, r, q;
+
+ assert(a);
+ assert(b);
+ assert(ret);
+
+ /* Determines the common suffix of domain names a and b */
+
+ n = dns_name_build_suffix_table(a, a_labels);
+ if (n < 0)
+ return n;
+
+ m = dns_name_build_suffix_table(b, b_labels);
+ if (m < 0)
+ return m;
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+ const char *x, *y;
+
+ if (k >= n || k >= m) {
+ *ret = a_labels[n - k];
+ return 0;
+ }
+
+ x = a_labels[n - 1 - k];
+ r = dns_label_unescape(&x, la, sizeof la, 0);
+ if (r < 0)
+ return r;
+
+ y = b_labels[m - 1 - k];
+ q = dns_label_unescape(&y, lb, sizeof lb, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q || ascii_strcasecmp_n(la, lb, r) != 0) {
+ *ret = a_labels[n - k];
+ return 0;
+ }
+
+ k++;
+ }
+}
+
+int dns_name_apply_idna(const char *name, char **ret) {
+ /* Return negative on error, 0 if not implemented, positive on success. */
+
+#if HAVE_LIBIDN2
+ int r;
+ _cleanup_free_ char *t = NULL;
+
+ assert(name);
+ assert(ret);
+
+ r = idn2_lookup_u8((uint8_t*) name, (uint8_t**) &t,
+ IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL);
+ log_debug("idn2_lookup_u8: %s → %s", name, t);
+ if (r == IDN2_OK) {
+ if (!startswith(name, "xn--")) {
+ _cleanup_free_ char *s = NULL;
+
+ r = idn2_to_unicode_8z8z(t, &s, 0);
+ if (r != IDN2_OK) {
+ log_debug("idn2_to_unicode_8z8z(\"%s\") failed: %d/%s",
+ t, r, idn2_strerror(r));
+ return 0;
+ }
+
+ if (!streq_ptr(name, s)) {
+ log_debug("idn2 roundtrip failed: \"%s\" → \"%s\" → \"%s\", ignoring.",
+ name, t, s);
+ return 0;
+ }
+ }
+
+ *ret = TAKE_PTR(t);
+
+ return 1; /* *ret has been written */
+ }
+
+ log_debug("idn2_lookup_u8(\"%s\") failed: %d/%s", name, r, idn2_strerror(r));
+ if (r == IDN2_2HYPHEN)
+ /* The name has two hyphens — forbidden by IDNA2008 in some cases */
+ return 0;
+ if (IN_SET(r, IDN2_TOO_BIG_DOMAIN, IDN2_TOO_BIG_LABEL))
+ return -ENOSPC;
+ return -EINVAL;
+#elif HAVE_LIBIDN
+ _cleanup_free_ char *buf = NULL;
+ size_t n = 0, allocated = 0;
+ bool first = true;
+ int r, q;
+
+ assert(name);
+ assert(ret);
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&name, label, sizeof label, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ q = dns_label_apply_idna(label, r, label, sizeof label);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ r = q;
+
+ if (!GREEDY_REALLOC(buf, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ r = dns_label_escape(label, r, buf + n + !first, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ if (first)
+ first = false;
+ else
+ buf[n++] = '.';
+
+ n += r;
+ }
+
+ if (n > DNS_HOSTNAME_MAX)
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(buf, allocated, n + 1))
+ return -ENOMEM;
+
+ buf[n] = 0;
+ *ret = TAKE_PTR(buf);
+
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+int dns_name_is_valid_or_address(const char *name) {
+ /* Returns > 0 if the specified name is either a valid IP address formatted as string or a valid DNS name */
+
+ if (isempty(name))
+ return 0;
+
+ if (in_addr_from_string_auto(name, NULL, NULL) >= 0)
+ return 1;
+
+ return dns_name_is_valid(name);
+}
diff --git a/src/shared/dns-domain.h b/src/shared/dns-domain.h
new file mode 100644
index 0000000..6ed512c
--- /dev/null
+++ b/src/shared/dns-domain.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hashmap.h"
+#include "in-addr-util.h"
+
+/* Length of a single label, with all escaping removed, excluding any trailing dot or NUL byte */
+#define DNS_LABEL_MAX 63
+
+/* Worst case length of a single label, with all escaping applied and room for a trailing NUL byte. */
+#define DNS_LABEL_ESCAPED_MAX (DNS_LABEL_MAX*4+1)
+
+/* Maximum length of a full hostname, consisting of a series of unescaped labels, and no trailing dot or NUL byte */
+#define DNS_HOSTNAME_MAX 253
+
+/* Maximum length of a full hostname, on the wire, including the final NUL byte */
+#define DNS_WIRE_FORMAT_HOSTNAME_MAX 255
+
+/* Maximum number of labels per valid hostname */
+#define DNS_N_LABELS_MAX 127
+
+typedef enum DNSLabelFlags {
+ DNS_LABEL_LDH = 1 << 0, /* Follow the "LDH" rule — only letters, digits, and internal hyphens. */
+ DNS_LABEL_NO_ESCAPES = 1 << 1, /* Do not treat backslashes specially */
+} DNSLabelFlags;
+
+int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags);
+int dns_label_unescape_suffix(const char *name, const char **label_end, char *dest, size_t sz);
+int dns_label_escape(const char *p, size_t l, char *dest, size_t sz);
+int dns_label_escape_new(const char *p, size_t l, char **ret);
+
+static inline int dns_name_parent(const char **name) {
+ return dns_label_unescape(name, NULL, DNS_LABEL_MAX, 0);
+}
+
+#if HAVE_LIBIDN
+int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max);
+int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max);
+#endif
+
+int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **ret);
+
+static inline int dns_name_normalize(const char *s, DNSLabelFlags flags, char **ret) {
+ /* dns_name_concat() normalizes as a side-effect */
+ return dns_name_concat(s, NULL, flags, ret);
+}
+
+static inline int dns_name_is_valid(const char *s) {
+ int r;
+
+ /* dns_name_normalize() verifies as a side effect */
+ r = dns_name_normalize(s, 0, NULL);
+ if (r == -EINVAL)
+ return 0;
+ if (r < 0)
+ return r;
+ return 1;
+}
+
+static inline int dns_name_is_valid_ldh(const char *s) {
+ int r;
+
+ r = dns_name_concat(s, NULL, DNS_LABEL_LDH|DNS_LABEL_NO_ESCAPES, NULL);
+ if (r == -EINVAL)
+ return 0;
+ if (r < 0)
+ return r;
+ return 1;
+}
+
+void dns_name_hash_func(const char *s, struct siphash *state);
+int dns_name_compare_func(const char *a, const char *b);
+extern const struct hash_ops dns_name_hash_ops;
+
+int dns_name_between(const char *a, const char *b, const char *c);
+int dns_name_equal(const char *x, const char *y);
+int dns_name_endswith(const char *name, const char *suffix);
+int dns_name_startswith(const char *name, const char *prefix);
+
+int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret);
+
+int dns_name_reverse(int family, const union in_addr_union *a, char **ret);
+int dns_name_address(const char *p, int *family, union in_addr_union *a);
+
+bool dns_name_is_root(const char *name);
+bool dns_name_is_single_label(const char *name);
+
+int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical);
+
+bool dns_srv_type_is_valid(const char *name);
+bool dnssd_srv_type_is_valid(const char *name);
+bool dns_service_name_is_valid(const char *name);
+
+int dns_service_join(const char *name, const char *type, const char *domain, char **ret);
+int dns_service_split(const char *joined, char **name, char **type, char **domain);
+
+int dns_name_suffix(const char *name, unsigned n_labels, const char **ret);
+int dns_name_count_labels(const char *name);
+
+int dns_name_skip(const char *a, unsigned n_labels, const char **ret);
+int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b);
+
+int dns_name_common_suffix(const char *a, const char *b, const char **ret);
+
+int dns_name_apply_idna(const char *name, char **ret);
+
+int dns_name_is_valid_or_address(const char *name);
diff --git a/src/shared/dropin.c b/src/shared/dropin.c
new file mode 100644
index 0000000..409eef2
--- /dev/null
+++ b/src/shared/dropin.c
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "dirent-util.h"
+#include "dropin.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio-label.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+int drop_in_file(const char *dir, const char *unit, unsigned level,
+ const char *name, char **_p, char **_q) {
+
+ char prefix[DECIMAL_STR_MAX(unsigned)];
+ _cleanup_free_ char *b = NULL;
+ char *p, *q;
+
+ assert(unit);
+ assert(name);
+ assert(_p);
+ assert(_q);
+
+ sprintf(prefix, "%u", level);
+
+ b = xescape(name, "/.");
+ if (!b)
+ return -ENOMEM;
+
+ if (!filename_is_valid(b))
+ return -EINVAL;
+
+ p = strjoin(dir, "/", unit, ".d");
+ if (!p)
+ return -ENOMEM;
+
+ q = strjoin(p, "/", prefix, "-", b, ".conf");
+ if (!q) {
+ free(p);
+ return -ENOMEM;
+ }
+
+ *_p = p;
+ *_q = q;
+ return 0;
+}
+
+int write_drop_in(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *data) {
+
+ _cleanup_free_ char *p = NULL, *q = NULL;
+ int r;
+
+ assert(dir);
+ assert(unit);
+ assert(name);
+ assert(data);
+
+ r = drop_in_file(dir, unit, level, name, &p, &q);
+ if (r < 0)
+ return r;
+
+ (void) mkdir_p(p, 0755);
+ return write_string_file_atomic_label(q, data);
+}
+
+int write_drop_in_format(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ assert(dir);
+ assert(unit);
+ assert(name);
+ assert(format);
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return write_drop_in(dir, unit, level, name, p);
+}
+
+static int unit_file_find_dir(
+ const char *original_root,
+ const char *path,
+ char ***dirs) {
+
+ _cleanup_free_ char *chased = NULL;
+ int r;
+
+ assert(path);
+
+ r = chase_symlinks(path, original_root, 0, &chased);
+ if (r == -ENOENT) /* Ignore -ENOENT, after all most units won't have a drop-in dir. */
+ return 0;
+ if (r == -ENAMETOOLONG) {
+ /* Also, ignore -ENAMETOOLONG but log about it. After all, users are not even able to create the
+ * drop-in dir in such case. This mostly happens for device units with an overly long /sys path. */
+ log_debug_errno(r, "Path '%s' too long, couldn't canonicalize, ignoring.", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_warning_errno(r, "Failed to canonicalize path '%s': %m", path);
+
+ r = strv_push(dirs, chased);
+ if (r < 0)
+ return log_oom();
+
+ chased = NULL;
+ return 0;
+}
+
+static int unit_file_find_dirs(
+ const char *original_root,
+ Set *unit_path_cache,
+ const char *unit_path,
+ const char *name,
+ const char *suffix,
+ char ***dirs) {
+
+ _cleanup_free_ char *prefix = NULL, *instance = NULL, *built = NULL;
+ bool is_instance, chopped;
+ const char *dash;
+ UnitType type;
+ char *path;
+ size_t n;
+ int r;
+
+ assert(unit_path);
+ assert(name);
+ assert(suffix);
+
+ path = strjoina(unit_path, "/", name, suffix);
+ if (!unit_path_cache || set_get(unit_path_cache, path)) {
+ r = unit_file_find_dir(original_root, path, dirs);
+ if (r < 0)
+ return r;
+ }
+
+ is_instance = unit_name_is_valid(name, UNIT_NAME_INSTANCE);
+ if (is_instance) { /* Also try the template dir */
+ _cleanup_free_ char *template = NULL;
+
+ r = unit_name_template(name, &template);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate template from unit name: %m");
+
+ r = unit_file_find_dirs(original_root, unit_path_cache, unit_path, template, suffix, dirs);
+ if (r < 0)
+ return r;
+ }
+
+ /* Let's see if there's a "-" prefix for this unit name. If so, let's invoke ourselves for it. This will then
+ * recursively do the same for all our prefixes. i.e. this means given "foo-bar-waldo.service" we'll also
+ * search "foo-bar-.service" and "foo-.service".
+ *
+ * Note the order in which we do it: we traverse up adding drop-ins on each step. This means the more specific
+ * drop-ins may override the more generic drop-ins, which is the intended behaviour. */
+
+ r = unit_name_to_prefix(name, &prefix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to derive unit name prefix from unit name: %m");
+
+ chopped = false;
+ for (;;) {
+ dash = strrchr(prefix, '-');
+ if (!dash) /* No dash? if so we are done */
+ return 0;
+
+ n = (size_t) (dash - prefix);
+ if (n == 0) /* Leading dash? If so, we are done */
+ return 0;
+
+ if (prefix[n+1] != 0 || chopped) {
+ prefix[n+1] = 0;
+ break;
+ }
+
+ /* Trailing dash? If so, chop it off and try again, but not more than once. */
+ prefix[n] = 0;
+ chopped = true;
+ }
+
+ if (!unit_prefix_is_valid(prefix))
+ return 0;
+
+ type = unit_name_to_type(name);
+ if (type < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to to derive unit type from unit name: %s",
+ name);
+
+ if (is_instance) {
+ r = unit_name_to_instance(name, &instance);
+ if (r < 0)
+ return log_error_errno(r, "Failed to derive unit name instance from unit name: %m");
+ }
+
+ r = unit_name_build_from_type(prefix, instance, type, &built);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build prefix unit name: %m");
+
+ return unit_file_find_dirs(original_root, unit_path_cache, unit_path, built, suffix, dirs);
+}
+
+int unit_file_find_dropin_paths(
+ const char *original_root,
+ char **lookup_path,
+ Set *unit_path_cache,
+ const char *dir_suffix,
+ const char *file_suffix,
+ Set *names,
+ char ***ret) {
+
+ _cleanup_strv_free_ char **dirs = NULL;
+ char *t, **p;
+ Iterator i;
+ int r;
+
+ assert(ret);
+
+ SET_FOREACH(t, names, i)
+ STRV_FOREACH(p, lookup_path)
+ (void) unit_file_find_dirs(original_root, unit_path_cache, *p, t, dir_suffix, &dirs);
+
+ if (strv_isempty(dirs)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ r = conf_files_list_strv(ret, file_suffix, NULL, 0, (const char**) dirs);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to create the list of configuration files: %m");
+
+ return 1;
+}
diff --git a/src/shared/dropin.h b/src/shared/dropin.h
new file mode 100644
index 0000000..ae7379b
--- /dev/null
+++ b/src/shared/dropin.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+#include "unit-name.h"
+
+int drop_in_file(const char *dir, const char *unit, unsigned level,
+ const char *name, char **_p, char **_q);
+
+int write_drop_in(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *data);
+
+int write_drop_in_format(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *format, ...) _printf_(5, 6);
+
+int unit_file_find_dropin_paths(
+ const char *original_root,
+ char **lookup_path,
+ Set *unit_path_cache,
+ const char *dir_suffix,
+ const char *file_suffix,
+ Set *names,
+ char ***paths);
+
+static inline int unit_file_find_dropin_conf_paths(
+ const char *original_root,
+ char **lookup_path,
+ Set *unit_path_cache,
+ Set *names,
+ char ***paths) {
+
+ return unit_file_find_dropin_paths(original_root,
+ lookup_path,
+ unit_path_cache,
+ ".d", ".conf",
+ names, paths);
+}
diff --git a/src/shared/efivars.c b/src/shared/efivars.c
new file mode 100644
index 0000000..26f905b
--- /dev/null
+++ b/src/shared/efivars.c
@@ -0,0 +1,914 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/fs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "chattr-util.h"
+#include "dirent-util.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "virt.h"
+
+#if ENABLE_EFI
+
+#define LOAD_OPTION_ACTIVE 0x00000001
+#define MEDIA_DEVICE_PATH 0x04
+#define MEDIA_HARDDRIVE_DP 0x01
+#define MEDIA_FILEPATH_DP 0x04
+#define SIGNATURE_TYPE_GUID 0x02
+#define MBR_TYPE_EFI_PARTITION_TABLE_HEADER 0x02
+#define END_DEVICE_PATH_TYPE 0x7f
+#define END_ENTIRE_DEVICE_PATH_SUBTYPE 0xff
+#define EFI_OS_INDICATIONS_BOOT_TO_FW_UI 0x0000000000000001
+
+#define boot_option__contents { \
+ uint32_t attr; \
+ uint16_t path_len; \
+ uint16_t title[]; \
+ }
+
+struct boot_option boot_option__contents;
+struct boot_option__packed boot_option__contents _packed_;
+assert_cc(offsetof(struct boot_option, title) == offsetof(struct boot_option__packed, title));
+/* sizeof(struct boot_option) != sizeof(struct boot_option__packed), so
+ * the *size* of the structure should not be used anywhere below. */
+
+struct drive_path {
+ uint32_t part_nr;
+ uint64_t part_start;
+ uint64_t part_size;
+ char signature[16];
+ uint8_t mbr_type;
+ uint8_t signature_type;
+} _packed_;
+
+#define device_path__contents { \
+ uint8_t type; \
+ uint8_t sub_type; \
+ uint16_t length; \
+ union { \
+ uint16_t path[0]; \
+ struct drive_path drive; \
+ }; \
+ }
+
+struct device_path device_path__contents;
+struct device_path__packed device_path__contents _packed_;
+assert_cc(sizeof(struct device_path) == sizeof(struct device_path__packed));
+
+bool is_efi_boot(void) {
+ if (detect_container() > 0)
+ return false;
+
+ return access("/sys/firmware/efi/", F_OK) >= 0;
+}
+
+static int read_flag(const char *varname) {
+ _cleanup_free_ void *v = NULL;
+ uint8_t b;
+ size_t s;
+ int r;
+
+ if (!is_efi_boot()) /* If this is not an EFI boot, assume the queried flags are zero */
+ return 0;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, varname, NULL, &v, &s);
+ if (r < 0)
+ return r;
+
+ if (s != 1)
+ return -EINVAL;
+
+ b = *(uint8_t *)v;
+ return !!b;
+}
+
+bool is_efi_secure_boot(void) {
+ return read_flag("SecureBoot") > 0;
+}
+
+bool is_efi_secure_boot_setup_mode(void) {
+ return read_flag("SetupMode") > 0;
+}
+
+int efi_reboot_to_firmware_supported(void) {
+ _cleanup_free_ void *v = NULL;
+ uint64_t b;
+ size_t s;
+ int r;
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndicationsSupported", NULL, &v, &s);
+ if (r == -ENOENT) /* variable doesn't exist? it's not supported then */
+ return -EOPNOTSUPP;
+ if (r < 0)
+ return r;
+ if (s != sizeof(uint64_t))
+ return -EINVAL;
+
+ b = *(uint64_t*) v;
+ if (!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI))
+ return -EOPNOTSUPP; /* bit unset? it's not supported then */
+
+ return 0;
+}
+
+static int get_os_indications(uint64_t *os_indication) {
+ _cleanup_free_ void *v = NULL;
+ size_t s;
+ int r;
+
+ /* Let's verify general support first */
+ r = efi_reboot_to_firmware_supported();
+ if (r < 0)
+ return r;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndications", NULL, &v, &s);
+ if (r == -ENOENT) {
+ /* Some firmware implementations that do support OsIndications and report that with
+ * OsIndicationsSupported will remove the OsIndications variable when it is unset. Let's pretend it's 0
+ * then, to hide this implementation detail. Note that this call will return -ENOENT then only if the
+ * support for OsIndications is missing entirely, as determined by efi_reboot_to_firmware_supported()
+ * above. */
+ *os_indication = 0;
+ return 0;
+ } else if (r < 0)
+ return r;
+ else if (s != sizeof(uint64_t))
+ return -EINVAL;
+
+ *os_indication = *(uint64_t *)v;
+ return 0;
+}
+
+int efi_get_reboot_to_firmware(void) {
+ int r;
+ uint64_t b;
+
+ r = get_os_indications(&b);
+ if (r < 0)
+ return r;
+
+ return !!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI);
+}
+
+int efi_set_reboot_to_firmware(bool value) {
+ int r;
+ uint64_t b, b_new;
+
+ r = get_os_indications(&b);
+ if (r < 0)
+ return r;
+
+ if (value)
+ b_new = b | EFI_OS_INDICATIONS_BOOT_TO_FW_UI;
+ else
+ b_new = b & ~EFI_OS_INDICATIONS_BOOT_TO_FW_UI;
+
+ /* Avoid writing to efi vars store if we can due to firmware bugs. */
+ if (b != b_new)
+ return efi_set_variable(EFI_VENDOR_GLOBAL, "OsIndications", &b_new, sizeof(uint64_t));
+
+ return 0;
+}
+
+int efi_get_variable(
+ sd_id128_t vendor,
+ const char *name,
+ uint32_t *attribute,
+ void **value,
+ size_t *size) {
+
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *p = NULL;
+ uint32_t a;
+ ssize_t n;
+ struct stat st;
+ _cleanup_free_ void *buf = NULL;
+
+ assert(name);
+ assert(value);
+ assert(size);
+
+ if (asprintf(&p,
+ "/sys/firmware/efi/efivars/%s-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ name, SD_ID128_FORMAT_VAL(vendor)) < 0)
+ return -ENOMEM;
+
+ fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+ if (st.st_size < 4)
+ return -EIO;
+ if (st.st_size > 4*1024*1024 + 4)
+ return -E2BIG;
+
+ n = read(fd, &a, sizeof(a));
+ if (n < 0)
+ return -errno;
+ if (n != sizeof(a))
+ return -EIO;
+
+ buf = malloc(st.st_size - 4 + 2);
+ if (!buf)
+ return -ENOMEM;
+
+ n = read(fd, buf, (size_t) st.st_size - 4);
+ if (n < 0)
+ return -errno;
+ if (n != (ssize_t) st.st_size - 4)
+ return -EIO;
+
+ /* Always NUL terminate (2 bytes, to protect UTF-16) */
+ ((char*) buf)[st.st_size - 4] = 0;
+ ((char*) buf)[st.st_size - 4 + 1] = 0;
+
+ *value = TAKE_PTR(buf);
+ *size = (size_t) st.st_size - 4;
+
+ if (attribute)
+ *attribute = a;
+
+ return 0;
+}
+
+int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p) {
+ _cleanup_free_ void *s = NULL;
+ size_t ss = 0;
+ int r;
+ char *x;
+
+ r = efi_get_variable(vendor, name, NULL, &s, &ss);
+ if (r < 0)
+ return r;
+
+ x = utf16_to_utf8(s, ss);
+ if (!x)
+ return -ENOMEM;
+
+ *p = x;
+ return 0;
+}
+
+int efi_set_variable(
+ sd_id128_t vendor,
+ const char *name,
+ const void *value,
+ size_t size) {
+
+ struct var {
+ uint32_t attr;
+ char buf[];
+ } _packed_ * _cleanup_free_ buf = NULL;
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ bool saved_flags_valid = false;
+ unsigned saved_flags;
+ int r;
+
+ assert(name);
+ assert(value || size == 0);
+
+ if (asprintf(&p,
+ "/sys/firmware/efi/efivars/%s-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ name, SD_ID128_FORMAT_VAL(vendor)) < 0)
+ return -ENOMEM;
+
+ /* Newer efivarfs protects variables that are not in a whitelist with FS_IMMUTABLE_FL by default, to protect
+ * them for accidental removal and modification. We are not changing these variables accidentally however,
+ * hence let's unset the bit first. */
+
+ r = chattr_path(p, 0, FS_IMMUTABLE_FL, &saved_flags);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to drop FS_IMMUTABLE_FL flag from '%s', ignoring: %m", p);
+
+ saved_flags_valid = r >= 0;
+
+ if (size == 0) {
+ if (unlink(p) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ return 0;
+ }
+
+ fd = open(p, O_WRONLY|O_CREAT|O_NOCTTY|O_CLOEXEC, 0644);
+ if (fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ buf = malloc(sizeof(uint32_t) + size);
+ if (!buf) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ buf->attr = EFI_VARIABLE_NON_VOLATILE|EFI_VARIABLE_BOOTSERVICE_ACCESS|EFI_VARIABLE_RUNTIME_ACCESS;
+ memcpy(buf->buf, value, size);
+
+ r = loop_write(fd, buf, sizeof(uint32_t) + size, false);
+ if (r < 0)
+ goto finish;
+
+ r = 0;
+
+finish:
+ if (saved_flags_valid) {
+ int q;
+
+ /* Restore the original flags field, just in case */
+ if (fd < 0)
+ q = chattr_path(p, saved_flags, FS_IMMUTABLE_FL, NULL);
+ else
+ q = chattr_fd(fd, saved_flags, FS_IMMUTABLE_FL, NULL);
+ if (q < 0)
+ log_debug_errno(q, "Failed to restore FS_IMMUTABLE_FL on '%s', ignoring: %m", p);
+ }
+
+ return r;
+}
+
+int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *v) {
+ _cleanup_free_ char16_t *u16 = NULL;
+
+ u16 = utf8_to_utf16(v, strlen(v));
+ if (!u16)
+ return -ENOMEM;
+
+ return efi_set_variable(vendor, name, u16, (char16_strlen(u16) + 1) * sizeof(char16_t));
+}
+
+static ssize_t utf16_size(const uint16_t *s, size_t buf_len_bytes) {
+ size_t l = 0;
+
+ /* Returns the size of the string in bytes without the terminating two zero bytes */
+
+ if (buf_len_bytes % sizeof(uint16_t) != 0)
+ return -EINVAL;
+
+ while (l < buf_len_bytes / sizeof(uint16_t)) {
+ if (s[l] == 0)
+ return (l + 1) * sizeof(uint16_t);
+ l++;
+ }
+
+ return -EINVAL; /* The terminator was not found */
+}
+
+struct guid {
+ uint32_t u1;
+ uint16_t u2;
+ uint16_t u3;
+ uint8_t u4[8];
+} _packed_;
+
+static void efi_guid_to_id128(const void *guid, sd_id128_t *id128) {
+ uint32_t u1;
+ uint16_t u2, u3;
+ const struct guid *uuid = guid;
+
+ memcpy(&u1, &uuid->u1, sizeof(uint32_t));
+ id128->bytes[0] = (u1 >> 24) & 0xff;
+ id128->bytes[1] = (u1 >> 16) & 0xff;
+ id128->bytes[2] = (u1 >> 8) & 0xff;
+ id128->bytes[3] = u1 & 0xff;
+ memcpy(&u2, &uuid->u2, sizeof(uint16_t));
+ id128->bytes[4] = (u2 >> 8) & 0xff;
+ id128->bytes[5] = u2 & 0xff;
+ memcpy(&u3, &uuid->u3, sizeof(uint16_t));
+ id128->bytes[6] = (u3 >> 8) & 0xff;
+ id128->bytes[7] = u3 & 0xff;
+ memcpy(&id128->bytes[8], uuid->u4, sizeof(uuid->u4));
+}
+
+int efi_get_boot_option(
+ uint16_t id,
+ char **title,
+ sd_id128_t *part_uuid,
+ char **path,
+ bool *active) {
+
+ char boot_id[9];
+ _cleanup_free_ uint8_t *buf = NULL;
+ size_t l;
+ struct boot_option *header;
+ ssize_t title_size;
+ _cleanup_free_ char *s = NULL, *p = NULL;
+ sd_id128_t p_uuid = SD_ID128_NULL;
+ int r;
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, (void **)&buf, &l);
+ if (r < 0)
+ return r;
+ if (l < offsetof(struct boot_option, title))
+ return -ENOENT;
+
+ header = (struct boot_option *)buf;
+ title_size = utf16_size(header->title, l - offsetof(struct boot_option, title));
+ if (title_size < 0)
+ return title_size;
+
+ if (title) {
+ s = utf16_to_utf8(header->title, title_size);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ if (header->path_len > 0) {
+ uint8_t *dbuf;
+ size_t dnext, doff;
+
+ doff = offsetof(struct boot_option, title) + title_size;
+ dbuf = buf + doff;
+ if (header->path_len > l - doff)
+ return -EINVAL;
+
+ dnext = 0;
+ while (dnext < header->path_len) {
+ struct device_path *dpath;
+
+ dpath = (struct device_path *)(dbuf + dnext);
+ if (dpath->length < 4)
+ break;
+
+ /* Type 0x7F – End of Hardware Device Path, Sub-Type 0xFF – End Entire Device Path */
+ if (dpath->type == END_DEVICE_PATH_TYPE && dpath->sub_type == END_ENTIRE_DEVICE_PATH_SUBTYPE)
+ break;
+
+ dnext += dpath->length;
+
+ /* Type 0x04 – Media Device Path */
+ if (dpath->type != MEDIA_DEVICE_PATH)
+ continue;
+
+ /* Sub-Type 1 – Hard Drive */
+ if (dpath->sub_type == MEDIA_HARDDRIVE_DP) {
+ /* 0x02 – GUID Partition Table */
+ if (dpath->drive.mbr_type != MBR_TYPE_EFI_PARTITION_TABLE_HEADER)
+ continue;
+
+ /* 0x02 – GUID signature */
+ if (dpath->drive.signature_type != SIGNATURE_TYPE_GUID)
+ continue;
+
+ if (part_uuid)
+ efi_guid_to_id128(dpath->drive.signature, &p_uuid);
+ continue;
+ }
+
+ /* Sub-Type 4 – File Path */
+ if (dpath->sub_type == MEDIA_FILEPATH_DP && !p && path) {
+ p = utf16_to_utf8(dpath->path, dpath->length-4);
+ if (!p)
+ return -ENOMEM;
+
+ efi_tilt_backslashes(p);
+ continue;
+ }
+ }
+ }
+
+ if (title)
+ *title = TAKE_PTR(s);
+ if (part_uuid)
+ *part_uuid = p_uuid;
+ if (path)
+ *path = TAKE_PTR(p);
+ if (active)
+ *active = !!(header->attr & LOAD_OPTION_ACTIVE);
+
+ return 0;
+}
+
+static void to_utf16(uint16_t *dest, const char *src) {
+ int i;
+
+ for (i = 0; src[i] != '\0'; i++)
+ dest[i] = src[i];
+ dest[i] = '\0';
+}
+
+static void id128_to_efi_guid(sd_id128_t id, void *guid) {
+ struct guid uuid = {
+ .u1 = id.bytes[0] << 24 | id.bytes[1] << 16 | id.bytes[2] << 8 | id.bytes[3],
+ .u2 = id.bytes[4] << 8 | id.bytes[5],
+ .u3 = id.bytes[6] << 8 | id.bytes[7],
+ };
+ memcpy(uuid.u4, id.bytes+8, sizeof(uuid.u4));
+ memcpy(guid, &uuid, sizeof(uuid));
+}
+
+static uint16_t *tilt_slashes(uint16_t *s) {
+ uint16_t *p;
+
+ for (p = s; *p; p++)
+ if (*p == '/')
+ *p = '\\';
+
+ return s;
+}
+
+int efi_add_boot_option(
+ uint16_t id,
+ const char *title,
+ uint32_t part,
+ uint64_t pstart,
+ uint64_t psize,
+ sd_id128_t part_uuid,
+ const char *path) {
+
+ size_t size, title_len, path_len;
+ _cleanup_free_ char *buf = NULL;
+ struct boot_option *option;
+ struct device_path *devicep;
+ char boot_id[9];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ title_len = (strlen(title)+1) * 2;
+ path_len = (strlen(path)+1) * 2;
+
+ buf = malloc0(offsetof(struct boot_option, title) + title_len +
+ sizeof(struct drive_path) +
+ sizeof(struct device_path) + path_len);
+ if (!buf)
+ return -ENOMEM;
+
+ /* header */
+ option = (struct boot_option *)buf;
+ option->attr = LOAD_OPTION_ACTIVE;
+ option->path_len = offsetof(struct device_path, drive) + sizeof(struct drive_path) +
+ offsetof(struct device_path, path) + path_len +
+ offsetof(struct device_path, path);
+ to_utf16(option->title, title);
+ size = offsetof(struct boot_option, title) + title_len;
+
+ /* partition info */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = MEDIA_DEVICE_PATH;
+ devicep->sub_type = MEDIA_HARDDRIVE_DP;
+ devicep->length = offsetof(struct device_path, drive) + sizeof(struct drive_path);
+ memcpy(&devicep->drive.part_nr, &part, sizeof(uint32_t));
+ memcpy(&devicep->drive.part_start, &pstart, sizeof(uint64_t));
+ memcpy(&devicep->drive.part_size, &psize, sizeof(uint64_t));
+ id128_to_efi_guid(part_uuid, devicep->drive.signature);
+ devicep->drive.mbr_type = MBR_TYPE_EFI_PARTITION_TABLE_HEADER;
+ devicep->drive.signature_type = SIGNATURE_TYPE_GUID;
+ size += devicep->length;
+
+ /* path to loader */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = MEDIA_DEVICE_PATH;
+ devicep->sub_type = MEDIA_FILEPATH_DP;
+ devicep->length = offsetof(struct device_path, path) + path_len;
+ to_utf16(devicep->path, path);
+ tilt_slashes(devicep->path);
+ size += devicep->length;
+
+ /* end of path */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = END_DEVICE_PATH_TYPE;
+ devicep->sub_type = END_ENTIRE_DEVICE_PATH_SUBTYPE;
+ devicep->length = offsetof(struct device_path, path);
+ size += devicep->length;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, buf, size);
+}
+
+int efi_remove_boot_option(uint16_t id) {
+ char boot_id[9];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, 0);
+}
+
+int efi_get_boot_order(uint16_t **order) {
+ _cleanup_free_ void *buf = NULL;
+ size_t l;
+ int r;
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "BootOrder", NULL, &buf, &l);
+ if (r < 0)
+ return r;
+
+ if (l <= 0)
+ return -ENOENT;
+
+ if (l % sizeof(uint16_t) > 0 ||
+ l / sizeof(uint16_t) > INT_MAX)
+ return -EINVAL;
+
+ *order = TAKE_PTR(buf);
+ return (int) (l / sizeof(uint16_t));
+}
+
+int efi_set_boot_order(uint16_t *order, size_t n) {
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ return efi_set_variable(EFI_VENDOR_GLOBAL, "BootOrder", order, n * sizeof(uint16_t));
+}
+
+static int boot_id_hex(const char s[static 4]) {
+ int id = 0, i;
+
+ for (i = 0; i < 4; i++)
+ if (s[i] >= '0' && s[i] <= '9')
+ id |= (s[i] - '0') << (3 - i) * 4;
+ else if (s[i] >= 'A' && s[i] <= 'F')
+ id |= (s[i] - 'A' + 10) << (3 - i) * 4;
+ else
+ return -EINVAL;
+
+ return id;
+}
+
+static int cmp_uint16(const uint16_t *a, const uint16_t *b) {
+ return CMP(*a, *b);
+}
+
+int efi_get_boot_options(uint16_t **options) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ _cleanup_free_ uint16_t *list = NULL;
+ struct dirent *de;
+ size_t alloc = 0;
+ int count = 0;
+
+ assert(options);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ dir = opendir("/sys/firmware/efi/efivars/");
+ if (!dir)
+ return -errno;
+
+ FOREACH_DIRENT(de, dir, return -errno) {
+ int id;
+
+ if (strncmp(de->d_name, "Boot", 4) != 0)
+ continue;
+
+ if (strlen(de->d_name) != 45)
+ continue;
+
+ if (strcmp(de->d_name + 8, "-8be4df61-93ca-11d2-aa0d-00e098032b8c") != 0)
+ continue;
+
+ id = boot_id_hex(de->d_name + 4);
+ if (id < 0)
+ continue;
+
+ if (!GREEDY_REALLOC(list, alloc, count + 1))
+ return -ENOMEM;
+
+ list[count++] = id;
+ }
+
+ typesafe_qsort(list, count, cmp_uint16);
+
+ *options = TAKE_PTR(list);
+
+ return count;
+}
+
+static int read_usec(sd_id128_t vendor, const char *name, usec_t *u) {
+ _cleanup_free_ char *j = NULL;
+ int r;
+ uint64_t x = 0;
+
+ assert(name);
+ assert(u);
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, name, &j);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(j, &x);
+ if (r < 0)
+ return r;
+
+ *u = x;
+ return 0;
+}
+
+int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) {
+ uint64_t x, y;
+ int r;
+
+ assert(firmware);
+ assert(loader);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeInitUSec", &x);
+ if (r < 0)
+ return r;
+
+ r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeExecUSec", &y);
+ if (r < 0)
+ return r;
+
+ if (y == 0 || y < x)
+ return -EIO;
+
+ if (y > USEC_PER_HOUR)
+ return -EIO;
+
+ *firmware = x;
+ *loader = y;
+
+ return 0;
+}
+
+int efi_loader_get_device_part_uuid(sd_id128_t *u) {
+ _cleanup_free_ char *p = NULL;
+ int r, parsed[16];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderDevicePartUUID", &p);
+ if (r < 0)
+ return r;
+
+ if (sscanf(p, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ &parsed[0], &parsed[1], &parsed[2], &parsed[3],
+ &parsed[4], &parsed[5], &parsed[6], &parsed[7],
+ &parsed[8], &parsed[9], &parsed[10], &parsed[11],
+ &parsed[12], &parsed[13], &parsed[14], &parsed[15]) != 16)
+ return -EIO;
+
+ if (u) {
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(parsed); i++)
+ u->bytes[i] = parsed[i];
+ }
+
+ return 0;
+}
+
+bool efi_loader_entry_name_valid(const char *s) {
+ if (isempty(s))
+ return false;
+
+ if (strlen(s) > FILENAME_MAX) /* Make sure entry names fit in filenames */
+ return false;
+
+ return in_charset(s, ALPHANUMERICAL "-");
+}
+
+int efi_loader_get_entries(char ***ret) {
+ _cleanup_free_ char16_t *entries = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ size_t size, i, start;
+ int r;
+
+ assert(ret);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderEntries", NULL, (void**) &entries, &size);
+ if (r < 0)
+ return r;
+
+ /* The variable contains a series of individually NUL terminated UTF-16 strings. */
+
+ for (i = 0, start = 0;; i++) {
+ _cleanup_free_ char *decoded = NULL;
+ bool end;
+
+ /* Is this the end of the variable's data? */
+ end = i * sizeof(char16_t) >= size;
+
+ /* Are we in the middle of a string? (i.e. not at the end of the variable, nor at a NUL terminator?) If
+ * so, let's go to the next entry. */
+ if (!end && entries[i] != 0)
+ continue;
+
+ /* We reached the end of a string, let's decode it into UTF-8 */
+ decoded = utf16_to_utf8(entries + start, (i - start) * sizeof(char16_t));
+ if (!decoded)
+ return -ENOMEM;
+
+ if (efi_loader_entry_name_valid(decoded)) {
+ r = strv_consume(&l, TAKE_PTR(decoded));
+ if (r < 0)
+ return r;
+ } else
+ log_debug("Ignoring invalid loader entry '%s'.", decoded);
+
+ /* We reached the end of the variable */
+ if (end)
+ break;
+
+ /* Continue after the NUL byte */
+ start = i + 1;
+ }
+
+ *ret = TAKE_PTR(l);
+ return 0;
+}
+
+int efi_loader_get_features(uint64_t *ret) {
+ _cleanup_free_ void *v = NULL;
+ size_t s;
+ int r;
+
+ if (!is_efi_boot()) {
+ *ret = 0;
+ return 0;
+ }
+
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderFeatures", NULL, &v, &s);
+ if (r == -ENOENT) {
+ _cleanup_free_ char *info = NULL;
+
+ /* The new (v240+) LoaderFeatures variable is not supported, let's see if it's systemd-boot at all */
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderInfo", &info);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return r;
+
+ /* Variable not set, definitely means not systemd-boot */
+
+ } else if (first_word(info, "systemd-boot")) {
+
+ /* An older systemd-boot version. Let's hardcode the feature set, since it was pretty
+ * static in all its versions. */
+
+ *ret = EFI_LOADER_FEATURE_CONFIG_TIMEOUT |
+ EFI_LOADER_FEATURE_ENTRY_DEFAULT |
+ EFI_LOADER_FEATURE_ENTRY_ONESHOT;
+
+ return 0;
+ }
+
+ /* No features supported */
+ *ret = 0;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ if (s != sizeof(uint64_t))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "LoaderFeatures EFI variable doesn't have the right size.");
+
+ memcpy(ret, v, sizeof(uint64_t));
+ return 0;
+}
+
+#endif
+
+char *efi_tilt_backslashes(char *s) {
+ char *p;
+
+ for (p = s; *p; p++)
+ if (*p == '\\')
+ *p = '/';
+
+ return s;
+}
diff --git a/src/shared/efivars.h b/src/shared/efivars.h
new file mode 100644
index 0000000..92670c8
--- /dev/null
+++ b/src/shared/efivars.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#if ! ENABLE_EFI
+#include <errno.h>
+#endif
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "sd-id128.h"
+
+#include "time-util.h"
+
+#define EFI_VENDOR_LOADER SD_ID128_MAKE(4a,67,b0,82,0a,4c,41,cf,b6,c7,44,0b,29,bb,8c,4f)
+#define EFI_VENDOR_GLOBAL SD_ID128_MAKE(8b,e4,df,61,93,ca,11,d2,aa,0d,00,e0,98,03,2b,8c)
+#define EFI_VARIABLE_NON_VOLATILE 0x0000000000000001
+#define EFI_VARIABLE_BOOTSERVICE_ACCESS 0x0000000000000002
+#define EFI_VARIABLE_RUNTIME_ACCESS 0x0000000000000004
+
+#define EFI_LOADER_FEATURE_CONFIG_TIMEOUT (UINT64_C(1) << 0)
+#define EFI_LOADER_FEATURE_CONFIG_TIMEOUT_ONE_SHOT (UINT64_C(1) << 1)
+#define EFI_LOADER_FEATURE_ENTRY_DEFAULT (UINT64_C(1) << 2)
+#define EFI_LOADER_FEATURE_ENTRY_ONESHOT (UINT64_C(1) << 3)
+#define EFI_LOADER_FEATURE_BOOT_COUNTING (UINT64_C(1) << 4)
+
+#if ENABLE_EFI
+
+bool is_efi_boot(void);
+bool is_efi_secure_boot(void);
+bool is_efi_secure_boot_setup_mode(void);
+int efi_reboot_to_firmware_supported(void);
+int efi_get_reboot_to_firmware(void);
+int efi_set_reboot_to_firmware(bool value);
+
+int efi_get_variable(sd_id128_t vendor, const char *name, uint32_t *attribute, void **value, size_t *size);
+int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p);
+int efi_set_variable(sd_id128_t vendor, const char *name, const void *value, size_t size);
+int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *p);
+
+int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active);
+int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path);
+int efi_remove_boot_option(uint16_t id);
+int efi_get_boot_order(uint16_t **order);
+int efi_set_boot_order(uint16_t *order, size_t n);
+int efi_get_boot_options(uint16_t **options);
+
+int efi_loader_get_device_part_uuid(sd_id128_t *u);
+int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader);
+
+int efi_loader_get_entries(char ***ret);
+
+bool efi_loader_entry_name_valid(const char *s);
+
+int efi_loader_get_features(uint64_t *ret);
+
+#else
+
+static inline bool is_efi_boot(void) {
+ return false;
+}
+
+static inline bool is_efi_secure_boot(void) {
+ return false;
+}
+
+static inline bool is_efi_secure_boot_setup_mode(void) {
+ return false;
+}
+
+static inline int efi_reboot_to_firmware_supported(void) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_reboot_to_firmware(void) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_reboot_to_firmware(bool value) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_variable(sd_id128_t vendor, const char *name, uint32_t *attribute, void **value, size_t *size) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_variable(sd_id128_t vendor, const char *name, const void *value, size_t size) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *p) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_remove_boot_option(uint16_t id) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_order(uint16_t **order) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_boot_order(uint16_t *order, size_t n) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_options(uint16_t **options) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_device_part_uuid(sd_id128_t *u) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_entries(char ***ret) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_features(uint64_t *ret) {
+ return -EOPNOTSUPP;
+}
+
+#endif
+
+char *efi_tilt_backslashes(char *s);
diff --git a/src/shared/enable-mempool.c b/src/shared/enable-mempool.c
new file mode 100644
index 0000000..a571b43
--- /dev/null
+++ b/src/shared/enable-mempool.c
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "mempool.h"
+
+const bool mempool_use_allowed = true;
diff --git a/src/shared/env-file-label.c b/src/shared/env-file-label.c
new file mode 100644
index 0000000..add68d2
--- /dev/null
+++ b/src/shared/env-file-label.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/stat.h>
+
+#include "env-file-label.h"
+#include "env-file.h"
+#include "selinux-util.h"
+
+int write_env_file_label(const char *fname, char **l) {
+ int r;
+
+ r = mac_selinux_create_file_prepare(fname, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = write_env_file(fname, l);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
diff --git a/src/shared/env-file-label.h b/src/shared/env-file-label.h
new file mode 100644
index 0000000..158fc4e
--- /dev/null
+++ b/src/shared/env-file-label.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to
+ * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not
+ * for all */
+
+int write_env_file_label(const char *fname, char **l);
diff --git a/src/shared/exec-util.c b/src/shared/exec-util.c
new file mode 100644
index 0000000..17a278a
--- /dev/null
+++ b/src/shared/exec-util.c
@@ -0,0 +1,353 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "exec-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "serialize.h"
+#include "set.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+/* Put this test here for a lack of better place */
+assert_cc(EAGAIN == EWOULDBLOCK);
+
+static int do_spawn(const char *path, char *argv[], int stdout_fd, pid_t *pid) {
+
+ pid_t _pid;
+ int r;
+
+ if (null_or_empty_path(path)) {
+ log_debug("%s is empty (a mask).", path);
+ return 0;
+ }
+
+ r = safe_fork("(direxec)", FORK_DEATHSIG|FORK_LOG, &_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char *_argv[2];
+
+ if (stdout_fd >= 0) {
+ r = rearrange_stdio(STDIN_FILENO, stdout_fd, STDERR_FILENO);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+ }
+
+ (void) rlimit_nofile_safe();
+
+ if (!argv) {
+ _argv[0] = (char*) path;
+ _argv[1] = NULL;
+ argv = _argv;
+ } else
+ argv[0] = (char*) path;
+
+ execv(path, argv);
+ log_error_errno(errno, "Failed to execute %s: %m", path);
+ _exit(EXIT_FAILURE);
+ }
+
+ *pid = _pid;
+ return 1;
+}
+
+static int do_execute(
+ char **directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ int output_fd,
+ char *argv[],
+ char *envp[]) {
+
+ _cleanup_hashmap_free_free_ Hashmap *pids = NULL;
+ _cleanup_strv_free_ char **paths = NULL;
+ char **path, **e;
+ int r;
+
+ /* We fork this all off from a child process so that we can somewhat cleanly make
+ * use of SIGALRM to set a time limit.
+ *
+ * If callbacks is nonnull, execution is serial. Otherwise, we default to parallel.
+ */
+
+ r = conf_files_list_strv(&paths, NULL, NULL, CONF_FILES_EXECUTABLE|CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, (const char* const*) directories);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate executables: %m");
+
+ if (!callbacks) {
+ pids = hashmap_new(NULL);
+ if (!pids)
+ return log_oom();
+ }
+
+ /* Abort execution of this process after the timout. We simply rely on SIGALRM as
+ * default action terminating the process, and turn on alarm(). */
+
+ if (timeout != USEC_INFINITY)
+ alarm(DIV_ROUND_UP(timeout, USEC_PER_SEC));
+
+ STRV_FOREACH(e, envp)
+ if (putenv(*e) != 0)
+ return log_error_errno(errno, "Failed to set environment variable: %m");
+
+ STRV_FOREACH(path, paths) {
+ _cleanup_free_ char *t = NULL;
+ _cleanup_close_ int fd = -1;
+ pid_t pid;
+
+ t = strdup(*path);
+ if (!t)
+ return log_oom();
+
+ if (callbacks) {
+ fd = open_serialization_fd(basename(*path));
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open serialization file: %m");
+ }
+
+ r = do_spawn(t, argv, fd, &pid);
+ if (r <= 0)
+ continue;
+
+ if (pids) {
+ r = hashmap_put(pids, PID_TO_PTR(pid), t);
+ if (r < 0)
+ return log_oom();
+ t = NULL;
+ } else {
+ r = wait_for_terminate_and_check(t, pid, WAIT_LOG);
+ if (r < 0)
+ continue;
+
+ if (lseek(fd, 0, SEEK_SET) < 0)
+ return log_error_errno(errno, "Failed to seek on serialization fd: %m");
+
+ r = callbacks[STDOUT_GENERATE](fd, callback_args[STDOUT_GENERATE]);
+ fd = -1;
+ if (r < 0)
+ return log_error_errno(r, "Failed to process output from %s: %m", *path);
+ }
+ }
+
+ if (callbacks) {
+ r = callbacks[STDOUT_COLLECT](output_fd, callback_args[STDOUT_COLLECT]);
+ if (r < 0)
+ return log_error_errno(r, "Callback two failed: %m");
+ }
+
+ while (!hashmap_isempty(pids)) {
+ _cleanup_free_ char *t = NULL;
+ pid_t pid;
+
+ pid = PTR_TO_PID(hashmap_first_key(pids));
+ assert(pid > 0);
+
+ t = hashmap_remove(pids, PID_TO_PTR(pid));
+ assert(t);
+
+ (void) wait_for_terminate_and_check(t, pid, WAIT_LOG);
+ }
+
+ return 0;
+}
+
+int execute_directories(
+ const char* const* directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ char *argv[],
+ char *envp[]) {
+
+ char **dirs = (char**) directories;
+ _cleanup_close_ int fd = -1;
+ char *name;
+ int r;
+
+ assert(!strv_isempty(dirs));
+
+ name = basename(dirs[0]);
+ assert(!isempty(name));
+
+ if (callbacks) {
+ assert(callback_args);
+ assert(callbacks[STDOUT_GENERATE]);
+ assert(callbacks[STDOUT_COLLECT]);
+ assert(callbacks[STDOUT_CONSUME]);
+
+ fd = open_serialization_fd(name);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open serialization file: %m");
+ }
+
+ /* Executes all binaries in the directories serially or in parallel and waits for
+ * them to finish. Optionally a timeout is applied. If a file with the same name
+ * exists in more than one directory, the earliest one wins. */
+
+ r = safe_fork("(sd-executor)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ r = do_execute(dirs, timeout, callbacks, callback_args, fd, argv, envp);
+ _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+
+ if (!callbacks)
+ return 0;
+
+ if (lseek(fd, 0, SEEK_SET) < 0)
+ return log_error_errno(errno, "Failed to rewind serialization fd: %m");
+
+ r = callbacks[STDOUT_CONSUME](fd, callback_args[STDOUT_CONSUME]);
+ fd = -1;
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse returned data: %m");
+ return 0;
+}
+
+static int gather_environment_generate(int fd, void *arg) {
+ char ***env = arg, **x, **y;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **new = NULL;
+ int r;
+
+ /* Read a series of VAR=value assignments from fd, use them to update the list of
+ * variables in env. Also update the exported environment.
+ *
+ * fd is always consumed, even on error.
+ */
+
+ assert(env);
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ r = load_env_file_pairs(f, NULL, &new);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(x, y, new) {
+ char *p;
+
+ if (!env_name_is_valid(*x)) {
+ log_warning("Invalid variable assignment \"%s=...\", ignoring.", *x);
+ continue;
+ }
+
+ p = strjoin(*x, "=", *y);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_env_replace(env, p);
+ if (r < 0)
+ return r;
+
+ if (setenv(*x, *y, true) < 0)
+ return -errno;
+ }
+
+ return r;
+}
+
+static int gather_environment_collect(int fd, void *arg) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char ***env = arg;
+ int r;
+
+ /* Write out a series of env=cescape(VAR=value) assignments to fd. */
+
+ assert(env);
+
+ f = fdopen(fd, "w");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ r = serialize_strv(f, "env", *env);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int gather_environment_consume(int fd, void *arg) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char ***env = arg;
+ int r = 0;
+
+ /* Read a series of env=cescape(VAR=value) assignments from fd into env. */
+
+ assert(env);
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *v;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return k;
+ if (k == 0)
+ break;
+
+ v = startswith(line, "env=");
+ if (!v) {
+ log_debug("Serialization line \"%s\" unexpectedly didn't start with \"env=\".", line);
+ if (r == 0)
+ r = -EINVAL;
+
+ continue;
+ }
+
+ k = deserialize_environment(v, env);
+ if (k < 0) {
+ log_debug_errno(k, "Invalid serialization line \"%s\": %m", line);
+
+ if (r == 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+const gather_stdout_callback_t gather_environment[] = {
+ gather_environment_generate,
+ gather_environment_collect,
+ gather_environment_consume,
+};
diff --git a/src/shared/exec-util.h b/src/shared/exec-util.h
new file mode 100644
index 0000000..6ac3c90
--- /dev/null
+++ b/src/shared/exec-util.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+
+typedef int (*gather_stdout_callback_t) (int fd, void *arg);
+
+enum {
+ STDOUT_GENERATE, /* from generators to helper process */
+ STDOUT_COLLECT, /* from helper process to main process */
+ STDOUT_CONSUME, /* process data in main process */
+ _STDOUT_CONSUME_MAX,
+};
+
+int execute_directories(
+ const char* const* directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ char *argv[],
+ char *envp[]);
+
+extern const gather_stdout_callback_t gather_environment[_STDOUT_CONSUME_MAX];
diff --git a/src/shared/exit-status.c b/src/shared/exit-status.c
new file mode 100644
index 0000000..26b3060
--- /dev/null
+++ b/src/shared/exit-status.c
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <signal.h>
+#include <stdlib.h>
+#include <sysexits.h>
+
+#include "exit-status.h"
+#include "macro.h"
+#include "set.h"
+
+const char* exit_status_to_string(int status, ExitStatusLevel level) {
+
+ /* Exit status ranges:
+ *
+ * 0…1 │ ISO C, EXIT_SUCCESS + EXIT_FAILURE
+ * 2…7 │ LSB exit codes for init scripts
+ * 8…63 │ (Currently unmapped)
+ * 64…78 │ BSD defined exit codes
+ * 79…199 │ (Currently unmapped)
+ * 200…241 │ systemd's private error codes (might be extended to 254 in future development)
+ * 242…254 │ (Currently unmapped, but see above)
+ *
+ * 255 │ EXIT_EXCEPTION (We use this to propagate exit-by-signal events. It's frequently used by others apps (like bash)
+ * │ to indicate exit reason that cannot really be expressed in a single exit status value — such as a propagated
+ * │ signal or such, and we follow that logic here.)
+ */
+
+ switch (status) { /* We always cover the ISO C ones */
+
+ case EXIT_SUCCESS:
+ return "SUCCESS";
+
+ case EXIT_FAILURE:
+ return "FAILURE";
+ }
+
+ if (IN_SET(level, EXIT_STATUS_SYSTEMD, EXIT_STATUS_LSB, EXIT_STATUS_FULL)) {
+ switch (status) { /* Optionally we cover our own ones */
+
+ case EXIT_CHDIR:
+ return "CHDIR";
+
+ case EXIT_NICE:
+ return "NICE";
+
+ case EXIT_FDS:
+ return "FDS";
+
+ case EXIT_EXEC:
+ return "EXEC";
+
+ case EXIT_MEMORY:
+ return "MEMORY";
+
+ case EXIT_LIMITS:
+ return "LIMITS";
+
+ case EXIT_OOM_ADJUST:
+ return "OOM_ADJUST";
+
+ case EXIT_SIGNAL_MASK:
+ return "SIGNAL_MASK";
+
+ case EXIT_STDIN:
+ return "STDIN";
+
+ case EXIT_STDOUT:
+ return "STDOUT";
+
+ case EXIT_CHROOT:
+ return "CHROOT";
+
+ case EXIT_IOPRIO:
+ return "IOPRIO";
+
+ case EXIT_TIMERSLACK:
+ return "TIMERSLACK";
+
+ case EXIT_SECUREBITS:
+ return "SECUREBITS";
+
+ case EXIT_SETSCHEDULER:
+ return "SETSCHEDULER";
+
+ case EXIT_CPUAFFINITY:
+ return "CPUAFFINITY";
+
+ case EXIT_GROUP:
+ return "GROUP";
+
+ case EXIT_USER:
+ return "USER";
+
+ case EXIT_CAPABILITIES:
+ return "CAPABILITIES";
+
+ case EXIT_CGROUP:
+ return "CGROUP";
+
+ case EXIT_SETSID:
+ return "SETSID";
+
+ case EXIT_CONFIRM:
+ return "CONFIRM";
+
+ case EXIT_STDERR:
+ return "STDERR";
+
+ case EXIT_PAM:
+ return "PAM";
+
+ case EXIT_NETWORK:
+ return "NETWORK";
+
+ case EXIT_NAMESPACE:
+ return "NAMESPACE";
+
+ case EXIT_NO_NEW_PRIVILEGES:
+ return "NO_NEW_PRIVILEGES";
+
+ case EXIT_SECCOMP:
+ return "SECCOMP";
+
+ case EXIT_SELINUX_CONTEXT:
+ return "SELINUX_CONTEXT";
+
+ case EXIT_PERSONALITY:
+ return "PERSONALITY";
+
+ case EXIT_APPARMOR_PROFILE:
+ return "APPARMOR";
+
+ case EXIT_ADDRESS_FAMILIES:
+ return "ADDRESS_FAMILIES";
+
+ case EXIT_RUNTIME_DIRECTORY:
+ return "RUNTIME_DIRECTORY";
+
+ case EXIT_CHOWN:
+ return "CHOWN";
+
+ case EXIT_SMACK_PROCESS_LABEL:
+ return "SMACK_PROCESS_LABEL";
+
+ case EXIT_KEYRING:
+ return "KEYRING";
+
+ case EXIT_STATE_DIRECTORY:
+ return "STATE_DIRECTORY";
+
+ case EXIT_CACHE_DIRECTORY:
+ return "CACHE_DIRECTORY";
+
+ case EXIT_LOGS_DIRECTORY:
+ return "LOGS_DIRECTORY";
+
+ case EXIT_CONFIGURATION_DIRECTORY:
+ return "CONFIGURATION_DIRECTORY";
+
+ case EXIT_EXCEPTION:
+ return "EXCEPTION";
+ }
+ }
+
+ if (IN_SET(level, EXIT_STATUS_LSB, EXIT_STATUS_FULL)) {
+ switch (status) { /* Optionally we support LSB ones */
+
+ case EXIT_INVALIDARGUMENT:
+ return "INVALIDARGUMENT";
+
+ case EXIT_NOTIMPLEMENTED:
+ return "NOTIMPLEMENTED";
+
+ case EXIT_NOPERMISSION:
+ return "NOPERMISSION";
+
+ case EXIT_NOTINSTALLED:
+ return "NOTINSTALLED";
+
+ case EXIT_NOTCONFIGURED:
+ return "NOTCONFIGURED";
+
+ case EXIT_NOTRUNNING:
+ return "NOTRUNNING";
+ }
+ }
+
+ if (level == EXIT_STATUS_FULL) {
+ switch (status) { /* Optionally, we support BSD exit statusses */
+
+ case EX_USAGE:
+ return "USAGE";
+
+ case EX_DATAERR:
+ return "DATAERR";
+
+ case EX_NOINPUT:
+ return "NOINPUT";
+
+ case EX_NOUSER:
+ return "NOUSER";
+
+ case EX_NOHOST:
+ return "NOHOST";
+
+ case EX_UNAVAILABLE:
+ return "UNAVAILABLE";
+
+ case EX_SOFTWARE:
+ return "SOFTWARE";
+
+ case EX_OSERR:
+ return "OSERR";
+
+ case EX_OSFILE:
+ return "OSFILE";
+
+ case EX_CANTCREAT:
+ return "CANTCREAT";
+
+ case EX_IOERR:
+ return "IOERR";
+
+ case EX_TEMPFAIL:
+ return "TEMPFAIL";
+
+ case EX_PROTOCOL:
+ return "PROTOCOL";
+
+ case EX_NOPERM:
+ return "NOPERM";
+
+ case EX_CONFIG:
+ return "CONFIG";
+ }
+ }
+
+ return NULL;
+}
+
+bool is_clean_exit(int code, int status, ExitClean clean, ExitStatusSet *success_status) {
+
+ if (code == CLD_EXITED)
+ return status == 0 ||
+ (success_status &&
+ set_contains(success_status->status, INT_TO_PTR(status)));
+
+ /* If a daemon does not implement handlers for some of the signals that's not considered an unclean shutdown */
+ if (code == CLD_KILLED)
+ return
+ (clean == EXIT_CLEAN_DAEMON && IN_SET(status, SIGHUP, SIGINT, SIGTERM, SIGPIPE)) ||
+ (success_status &&
+ set_contains(success_status->signal, INT_TO_PTR(status)));
+
+ return false;
+}
+
+void exit_status_set_free(ExitStatusSet *x) {
+ assert(x);
+
+ x->status = set_free(x->status);
+ x->signal = set_free(x->signal);
+}
+
+bool exit_status_set_is_empty(ExitStatusSet *x) {
+ if (!x)
+ return true;
+
+ return set_isempty(x->status) && set_isempty(x->signal);
+}
+
+bool exit_status_set_test(ExitStatusSet *x, int code, int status) {
+
+ if (exit_status_set_is_empty(x))
+ return false;
+
+ if (code == CLD_EXITED && set_contains(x->status, INT_TO_PTR(status)))
+ return true;
+
+ if (IN_SET(code, CLD_KILLED, CLD_DUMPED) && set_contains(x->signal, INT_TO_PTR(status)))
+ return true;
+
+ return false;
+}
diff --git a/src/shared/exit-status.h b/src/shared/exit-status.h
new file mode 100644
index 0000000..510eb31
--- /dev/null
+++ b/src/shared/exit-status.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+
+/* This defines pretty names for the LSB 'start' verb exit codes. Note that they shouldn't be confused with the LSB
+ * 'status' verb exit codes which are defined very differently. For details see:
+ *
+ * https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html
+ */
+
+enum {
+ /* EXIT_SUCCESS defined by libc */
+ /* EXIT_FAILURE defined by libc */
+ EXIT_INVALIDARGUMENT = 2,
+ EXIT_NOTIMPLEMENTED = 3,
+ EXIT_NOPERMISSION = 4,
+ EXIT_NOTINSTALLED = 5,
+ EXIT_NOTCONFIGURED = 6,
+ EXIT_NOTRUNNING = 7,
+
+ /* BSD's sysexits.h defines a couple EX_xyz exit codes in the range 64 … 78 */
+
+ /* The LSB suggests that error codes >= 200 are "reserved". We use them here under the assumption that they
+ * hence are unused by init scripts. */
+ EXIT_CHDIR = 200,
+ EXIT_NICE,
+ EXIT_FDS,
+ EXIT_EXEC,
+ EXIT_MEMORY,
+ EXIT_LIMITS,
+ EXIT_OOM_ADJUST,
+ EXIT_SIGNAL_MASK,
+ EXIT_STDIN,
+ EXIT_STDOUT,
+ EXIT_CHROOT, /* 210 */
+ EXIT_IOPRIO,
+ EXIT_TIMERSLACK,
+ EXIT_SECUREBITS,
+ EXIT_SETSCHEDULER,
+ EXIT_CPUAFFINITY,
+ EXIT_GROUP,
+ EXIT_USER,
+ EXIT_CAPABILITIES,
+ EXIT_CGROUP,
+ EXIT_SETSID, /* 220 */
+ EXIT_CONFIRM,
+ EXIT_STDERR,
+ _EXIT_RESERVED, /* used to be tcpwrap, don't reuse! */
+ EXIT_PAM,
+ EXIT_NETWORK,
+ EXIT_NAMESPACE,
+ EXIT_NO_NEW_PRIVILEGES,
+ EXIT_SECCOMP,
+ EXIT_SELINUX_CONTEXT,
+ EXIT_PERSONALITY, /* 230 */
+ EXIT_APPARMOR_PROFILE,
+ EXIT_ADDRESS_FAMILIES,
+ EXIT_RUNTIME_DIRECTORY,
+ _EXIT_RESERVED2, /* used to be used by kdbus, don't reuse */
+ EXIT_CHOWN,
+ EXIT_SMACK_PROCESS_LABEL,
+ EXIT_KEYRING,
+ EXIT_STATE_DIRECTORY,
+ EXIT_CACHE_DIRECTORY,
+ EXIT_LOGS_DIRECTORY, /* 240 */
+ EXIT_CONFIGURATION_DIRECTORY,
+
+ EXIT_EXCEPTION = 255, /* Whenever we want to propagate an abnormal/signal exit, in line with bash */
+};
+
+typedef enum ExitStatusLevel {
+ EXIT_STATUS_MINIMAL, /* only cover libc EXIT_STATUS/EXIT_FAILURE */
+ EXIT_STATUS_SYSTEMD, /* cover libc and systemd's own exit codes */
+ EXIT_STATUS_LSB, /* cover libc, systemd's own and LSB exit codes */
+ EXIT_STATUS_FULL, /* cover libc, systemd's own, LSB and BSD (EX_xyz) exit codes */
+} ExitStatusLevel;
+
+typedef struct ExitStatusSet {
+ Set *status;
+ Set *signal;
+} ExitStatusSet;
+
+const char* exit_status_to_string(int status, ExitStatusLevel level) _const_;
+
+typedef enum ExitClean {
+ EXIT_CLEAN_DAEMON,
+ EXIT_CLEAN_COMMAND,
+} ExitClean;
+
+bool is_clean_exit(int code, int status, ExitClean clean, ExitStatusSet *success_status);
+
+void exit_status_set_free(ExitStatusSet *x);
+bool exit_status_set_is_empty(ExitStatusSet *x);
+bool exit_status_set_test(ExitStatusSet *x, int code, int status);
diff --git a/src/shared/fdset.c b/src/shared/fdset.c
new file mode 100644
index 0000000..5d27732
--- /dev/null
+++ b/src/shared/fdset.c
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fdset.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+
+#define MAKE_SET(s) ((Set*) s)
+#define MAKE_FDSET(s) ((FDSet*) s)
+
+FDSet *fdset_new(void) {
+ return MAKE_FDSET(set_new(NULL));
+}
+
+int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds) {
+ size_t i;
+ FDSet *s;
+ int r;
+
+ assert(ret);
+
+ s = fdset_new();
+ if (!s)
+ return -ENOMEM;
+
+ for (i = 0; i < n_fds; i++) {
+
+ r = fdset_put(s, fds[i]);
+ if (r < 0) {
+ set_free(MAKE_SET(s));
+ return r;
+ }
+ }
+
+ *ret = s;
+ return 0;
+}
+
+FDSet* fdset_free(FDSet *s) {
+ void *p;
+
+ while ((p = set_steal_first(MAKE_SET(s)))) {
+ /* Valgrind's fd might have ended up in this set here,
+ * due to fdset_new_fill(). We'll ignore all failures
+ * here, so that the EBADFD that valgrind will return
+ * us on close() doesn't influence us */
+
+ /* When reloading duplicates of the private bus
+ * connection fds and suchlike are closed here, which
+ * has no effect at all, since they are only
+ * duplicates. So don't be surprised about these log
+ * messages. */
+
+ log_debug("Closing left-over fd %i", PTR_TO_FD(p));
+ close_nointr(PTR_TO_FD(p));
+ }
+
+ set_free(MAKE_SET(s));
+ return NULL;
+}
+
+int fdset_put(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return set_put(MAKE_SET(s), FD_TO_PTR(fd));
+}
+
+int fdset_put_dup(FDSet *s, int fd) {
+ int copy, r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ r = fdset_put(s, copy);
+ if (r < 0) {
+ safe_close(copy);
+ return r;
+ }
+
+ return copy;
+}
+
+bool fdset_contains(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return !!set_get(MAKE_SET(s), FD_TO_PTR(fd));
+}
+
+int fdset_remove(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return set_remove(MAKE_SET(s), FD_TO_PTR(fd)) ? fd : -ENOENT;
+}
+
+int fdset_new_fill(FDSet **_s) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+ FDSet *s;
+
+ assert(_s);
+
+ /* Creates an fdset and fills in all currently open file
+ * descriptors. */
+
+ d = opendir("/proc/self/fd");
+ if (!d)
+ return -errno;
+
+ s = fdset_new();
+ if (!s) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ int fd = -1;
+
+ r = safe_atoi(de->d_name, &fd);
+ if (r < 0)
+ goto finish;
+
+ if (fd < 3)
+ continue;
+
+ if (fd == dirfd(d))
+ continue;
+
+ r = fdset_put(s, fd);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = 0;
+ *_s = TAKE_PTR(s);
+
+finish:
+ /* We won't close the fds here! */
+ if (s)
+ set_free(MAKE_SET(s));
+
+ return r;
+}
+
+int fdset_cloexec(FDSet *fds, bool b) {
+ Iterator i;
+ void *p;
+ int r;
+
+ assert(fds);
+
+ SET_FOREACH(p, MAKE_SET(fds), i) {
+ r = fd_cloexec(PTR_TO_FD(p), b);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int fdset_new_listen_fds(FDSet **_s, bool unset) {
+ int n, fd, r;
+ FDSet *s;
+
+ assert(_s);
+
+ /* Creates an fdset and fills in all passed file descriptors */
+
+ s = fdset_new();
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n = sd_listen_fds(unset);
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) {
+ r = fdset_put(s, fd);
+ if (r < 0)
+ goto fail;
+ }
+
+ *_s = s;
+ return 0;
+
+fail:
+ if (s)
+ set_free(MAKE_SET(s));
+
+ return r;
+}
+
+int fdset_close_others(FDSet *fds) {
+ void *e;
+ Iterator i;
+ int *a = NULL;
+ size_t j = 0, m;
+
+ m = fdset_size(fds);
+
+ if (m > 0) {
+ a = newa(int, m);
+ SET_FOREACH(e, MAKE_SET(fds), i)
+ a[j++] = PTR_TO_FD(e);
+ }
+
+ assert(j == m);
+
+ return close_all_fds(a, j);
+}
+
+unsigned fdset_size(FDSet *fds) {
+ return set_size(MAKE_SET(fds));
+}
+
+bool fdset_isempty(FDSet *fds) {
+ return set_isempty(MAKE_SET(fds));
+}
+
+int fdset_iterate(FDSet *s, Iterator *i) {
+ void *p;
+
+ if (!set_iterate(MAKE_SET(s), i, &p))
+ return -ENOENT;
+
+ return PTR_TO_FD(p);
+}
+
+int fdset_steal_first(FDSet *fds) {
+ void *p;
+
+ p = set_steal_first(MAKE_SET(fds));
+ if (!p)
+ return -ENOENT;
+
+ return PTR_TO_FD(p);
+}
diff --git a/src/shared/fdset.h b/src/shared/fdset.h
new file mode 100644
index 0000000..d31062b
--- /dev/null
+++ b/src/shared/fdset.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+
+typedef struct FDSet FDSet;
+
+FDSet* fdset_new(void);
+FDSet* fdset_free(FDSet *s);
+
+int fdset_put(FDSet *s, int fd);
+int fdset_put_dup(FDSet *s, int fd);
+
+bool fdset_contains(FDSet *s, int fd);
+int fdset_remove(FDSet *s, int fd);
+
+int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds);
+int fdset_new_fill(FDSet **ret);
+int fdset_new_listen_fds(FDSet **ret, bool unset);
+
+int fdset_cloexec(FDSet *fds, bool b);
+
+int fdset_close_others(FDSet *fds);
+
+unsigned fdset_size(FDSet *fds);
+bool fdset_isempty(FDSet *fds);
+
+int fdset_iterate(FDSet *s, Iterator *i);
+
+int fdset_steal_first(FDSet *fds);
+
+#define FDSET_FOREACH(fd, fds, i) \
+ for ((i) = ITERATOR_FIRST, (fd) = fdset_iterate((fds), &(i)); (fd) >= 0; (fd) = fdset_iterate((fds), &(i)))
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FDSet*, fdset_free);
+#define _cleanup_fdset_free_ _cleanup_(fdset_freep)
diff --git a/src/shared/fileio-label.c b/src/shared/fileio-label.c
new file mode 100644
index 0000000..49ab297
--- /dev/null
+++ b/src/shared/fileio-label.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/stat.h>
+
+#include "fileio-label.h"
+#include "fileio.h"
+#include "selinux-util.h"
+
+int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts) {
+ int r;
+
+ r = mac_selinux_create_file_prepare(fn, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = write_string_file_ts(fn, line, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC, ts);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
+
+int create_shutdown_run_nologin_or_warn(void) {
+ int r;
+
+ /* This is used twice: once in systemd-user-sessions.service, in order to block logins when we actually go
+ * down, and once in systemd-logind.service when shutdowns are scheduled, and logins are to be turned off a bit
+ * in advance. We use the same wording of the message in both cases. */
+
+ r = write_string_file_atomic_label("/run/nologin",
+ "System is going down. Unprivileged users are not permitted to log in anymore. "
+ "For technical details, see pam_nologin(8).");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/nologin: %m");
+
+ return 0;
+}
diff --git a/src/shared/fileio-label.h b/src/shared/fileio-label.h
new file mode 100644
index 0000000..8f88955
--- /dev/null
+++ b/src/shared/fileio-label.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to
+ * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not
+ * for all */
+
+int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts);
+static inline int write_string_file_atomic_label(const char *fn, const char *line) {
+ return write_string_file_atomic_label_ts(fn, line, NULL);
+}
+
+int create_shutdown_run_nologin_or_warn(void);
diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c
new file mode 100644
index 0000000..cba52fb
--- /dev/null
+++ b/src/shared/firewall-util.c
@@ -0,0 +1,350 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+/* Temporary work-around for broken glibc vs. linux kernel header definitions
+ * This is already fixed upstream, remove this when distributions have updated.
+ */
+#define _NET_IF_H 1
+
+#include <alloca.h>
+#include <arpa/inet.h>
+#include <endian.h>
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+#include <linux/if.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/nf_nat.h>
+#include <linux/netfilter/xt_addrtype.h>
+#include <libiptc/libiptc.h>
+
+#include "alloc-util.h"
+#include "firewall-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "socket-util.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct xtc_handle*, iptc_free);
+
+static int entry_fill_basics(
+ struct ipt_entry *entry,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+
+ assert(entry);
+
+ if (out_interface && !ifname_valid(out_interface))
+ return -EINVAL;
+ if (in_interface && !ifname_valid(in_interface))
+ return -EINVAL;
+
+ entry->ip.proto = protocol;
+
+ if (in_interface) {
+ size_t l;
+
+ l = strlen(in_interface);
+ assert(l < sizeof entry->ip.iniface);
+ assert(l < sizeof entry->ip.iniface_mask);
+
+ strcpy(entry->ip.iniface, in_interface);
+ memset(entry->ip.iniface_mask, 0xFF, l + 1);
+ }
+ if (source) {
+ entry->ip.src = source->in;
+ in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
+ }
+
+ if (out_interface) {
+ size_t l = strlen(out_interface);
+ assert(l < sizeof entry->ip.outiface);
+ assert(l < sizeof entry->ip.outiface_mask);
+
+ strcpy(entry->ip.outiface, out_interface);
+ memset(entry->ip.outiface_mask, 0xFF, l + 1);
+ }
+ if (destination) {
+ entry->ip.dst = destination->in;
+ in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
+ }
+
+ return 0;
+}
+
+int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+
+ _cleanup_(iptc_freep) struct xtc_handle *h = NULL;
+ struct ipt_entry *entry, *mask;
+ struct ipt_entry_target *t;
+ size_t sz;
+ struct nf_nat_ipv4_multi_range_compat *mr;
+ int r;
+
+ if (af != AF_INET)
+ return -EOPNOTSUPP;
+
+ if (!IN_SET(protocol, 0, IPPROTO_TCP, IPPROTO_UDP))
+ return -EOPNOTSUPP;
+
+ h = iptc_init("nat");
+ if (!h)
+ return -errno;
+
+ sz = XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+
+ /* Put together the entry we want to add or remove */
+ entry = alloca0(sz);
+ entry->next_offset = sz;
+ entry->target_offset = XT_ALIGN(sizeof(struct ipt_entry));
+ r = entry_fill_basics(entry, protocol, NULL, source, source_prefixlen, out_interface, destination, destination_prefixlen);
+ if (r < 0)
+ return r;
+
+ /* Fill in target part */
+ t = ipt_get_target(entry);
+ t->u.target_size =
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+ strncpy(t->u.user.name, "MASQUERADE", sizeof(t->u.user.name));
+ mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
+ mr->rangesize = 1;
+
+ /* Create a search mask entry */
+ mask = alloca(sz);
+ memset(mask, 0xFF, sz);
+
+ if (add) {
+ if (iptc_check_entry("POSTROUTING", entry, (unsigned char*) mask, h))
+ return 0;
+ if (errno != ENOENT) /* if other error than not existing yet, fail */
+ return -errno;
+
+ if (!iptc_insert_entry("POSTROUTING", entry, 0, h))
+ return -errno;
+ } else {
+ if (!iptc_delete_entry("POSTROUTING", entry, (unsigned char*) mask, h)) {
+ if (errno == ENOENT) /* if it's already gone, all is good! */
+ return 0;
+
+ return -errno;
+ }
+ }
+
+ if (!iptc_commit(h))
+ return -errno;
+
+ return 0;
+}
+
+int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote) {
+
+ _cleanup_(iptc_freep) struct xtc_handle *h = NULL;
+ struct ipt_entry *entry, *mask;
+ struct ipt_entry_target *t;
+ struct ipt_entry_match *m;
+ struct xt_addrtype_info_v1 *at;
+ struct nf_nat_ipv4_multi_range_compat *mr;
+ size_t sz, msz;
+ int r;
+
+ assert(add || !previous_remote);
+
+ if (af != AF_INET)
+ return -EOPNOTSUPP;
+
+ if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP))
+ return -EOPNOTSUPP;
+
+ if (local_port <= 0)
+ return -EINVAL;
+
+ if (remote_port <= 0)
+ return -EINVAL;
+
+ h = iptc_init("nat");
+ if (!h)
+ return -errno;
+
+ sz = XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+
+ if (protocol == IPPROTO_TCP)
+ msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_tcp));
+ else
+ msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_udp));
+
+ sz += msz;
+
+ /* Fill in basic part */
+ entry = alloca0(sz);
+ entry->next_offset = sz;
+ entry->target_offset =
+ XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
+ msz;
+ r = entry_fill_basics(entry, protocol, in_interface, source, source_prefixlen, NULL, destination, destination_prefixlen);
+ if (r < 0)
+ return r;
+
+ /* Fill in first match */
+ m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)));
+ m->u.match_size = msz;
+ if (protocol == IPPROTO_TCP) {
+ struct xt_tcp *tcp;
+
+ strncpy(m->u.user.name, "tcp", sizeof(m->u.user.name));
+ tcp = (struct xt_tcp*) m->data;
+ tcp->dpts[0] = tcp->dpts[1] = local_port;
+ tcp->spts[0] = 0;
+ tcp->spts[1] = 0xFFFF;
+
+ } else {
+ struct xt_udp *udp;
+
+ strncpy(m->u.user.name, "udp", sizeof(m->u.user.name));
+ udp = (struct xt_udp*) m->data;
+ udp->dpts[0] = udp->dpts[1] = local_port;
+ udp->spts[0] = 0;
+ udp->spts[1] = 0xFFFF;
+ }
+
+ /* Fill in second match */
+ m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)) + msz);
+ m->u.match_size =
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1));
+ strncpy(m->u.user.name, "addrtype", sizeof(m->u.user.name));
+ m->u.user.revision = 1;
+ at = (struct xt_addrtype_info_v1*) m->data;
+ at->dest = XT_ADDRTYPE_LOCAL;
+
+ /* Fill in target part */
+ t = ipt_get_target(entry);
+ t->u.target_size =
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+ strncpy(t->u.user.name, "DNAT", sizeof(t->u.user.name));
+ mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
+ mr->rangesize = 1;
+ mr->range[0].flags = NF_NAT_RANGE_PROTO_SPECIFIED|NF_NAT_RANGE_MAP_IPS;
+ mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
+ if (protocol == IPPROTO_TCP)
+ mr->range[0].min.tcp.port = mr->range[0].max.tcp.port = htons(remote_port);
+ else
+ mr->range[0].min.udp.port = mr->range[0].max.udp.port = htons(remote_port);
+
+ mask = alloca0(sz);
+ memset(mask, 0xFF, sz);
+
+ if (add) {
+ /* Add the PREROUTING rule, if it is missing so far */
+ if (!iptc_check_entry("PREROUTING", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -EINVAL;
+
+ if (!iptc_insert_entry("PREROUTING", entry, 0, h))
+ return -errno;
+ }
+
+ /* If a previous remote is set, remove its entry */
+ if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
+ mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
+
+ if (!iptc_delete_entry("PREROUTING", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
+ }
+
+ /* Add the OUTPUT rule, if it is missing so far */
+ if (!in_interface) {
+
+ /* Don't apply onto loopback addresses */
+ if (!destination) {
+ entry->ip.dst.s_addr = htobe32(0x7F000000);
+ entry->ip.dmsk.s_addr = htobe32(0xFF000000);
+ entry->ip.invflags = IPT_INV_DSTIP;
+ }
+
+ if (!iptc_check_entry("OUTPUT", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+
+ if (!iptc_insert_entry("OUTPUT", entry, 0, h))
+ return -errno;
+ }
+
+ /* If a previous remote is set, remove its entry */
+ if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
+ mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
+
+ if (!iptc_delete_entry("OUTPUT", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+ }
+ }
+ } else {
+ if (!iptc_delete_entry("PREROUTING", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ if (!in_interface) {
+ if (!destination) {
+ entry->ip.dst.s_addr = htobe32(0x7F000000);
+ entry->ip.dmsk.s_addr = htobe32(0xFF000000);
+ entry->ip.invflags = IPT_INV_DSTIP;
+ }
+
+ if (!iptc_delete_entry("OUTPUT", entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+ }
+ }
+
+ if (!iptc_commit(h))
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/firewall-util.h b/src/shared/firewall-util.h
new file mode 100644
index 0000000..4fc71da
--- /dev/null
+++ b/src/shared/firewall-util.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "in-addr-util.h"
+
+#if HAVE_LIBIPTC
+
+int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen);
+
+int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote);
+
+#else
+
+static inline int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+ return -EOPNOTSUPP;
+}
+
+static inline int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote) {
+ return -EOPNOTSUPP;
+}
+
+#endif
diff --git a/src/shared/format-table.c b/src/shared/format-table.c
new file mode 100644
index 0000000..7d52980
--- /dev/null
+++ b/src/shared/format-table.c
@@ -0,0 +1,1625 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ctype.h>
+#include <stdio_ext.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "gunicode.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+
+#define DEFAULT_WEIGHT 100
+
+/*
+ A few notes on implementation details:
+
+ - TableCell is a 'fake' structure, it's just used as data type to pass references to specific cell positions in the
+ table. It can be easily converted to an index number and back.
+
+ - TableData is where the actual data is stored: it encapsulates the data and formatting for a specific cell. It's
+ 'pseudo-immutable' and ref-counted. When a cell's data's formatting is to be changed, we duplicate the object if the
+ ref-counting is larger than 1. Note that TableData and its ref-counting is mostly not visible to the outside. The
+ outside only sees Table and TableCell.
+
+ - The Table object stores a simple one-dimensional array of references to TableData objects, one row after the
+ previous one.
+
+ - There's no special concept of a "row" or "column" in the table, and no special concept of the "header" row. It's all
+ derived from the cell index: we know how many cells are to be stored in a row, and can determine the rest from
+ that. The first row is always the header row. If header display is turned off we simply skip outputting the first
+ row. Also, when sorting rows we always leave the first row where it is, as the header shouldn't move.
+
+ - Note because there's no row and no column object some properties that might be appropriate as row/column properties
+ are exposed as cell properties instead. For example, the "weight" of a column (which is used to determine where to
+ add/remove space preferable when expanding/compressing tables horizontally) is actually made the "weight" of a
+ cell. Given that we usually need it per-column though we will calculate the average across every cell of the column
+ instead.
+
+ - To make things easy, when cells are added without any explicit configured formatting, then we'll copy the formatting
+ from the same cell in the previous cell. This is particularly useful for the "weight" of the cell (see above), as
+ this means setting the weight of the cells of the header row will nicely propagate to all cells in the other rows.
+*/
+
+typedef struct TableData {
+ unsigned n_ref;
+ TableDataType type;
+
+ size_t minimum_width; /* minimum width for the column */
+ size_t maximum_width; /* maximum width for the column */
+ unsigned weight; /* the horizontal weight for this column, in case the table is expanded/compressed */
+ unsigned ellipsize_percent; /* 0 … 100, where to place the ellipsis when compression is needed */
+ unsigned align_percent; /* 0 … 100, where to pad with spaces when expanding is needed. 0: left-aligned, 100: right-aligned */
+
+ bool uppercase; /* Uppercase string on display */
+
+ const char *color; /* ANSI color string to use for this cell. When written to terminal should not move cursor. Will automatically be reset after the cell */
+ char *url; /* A URL to use for a clickable hyperlink */
+ char *formatted; /* A cached textual representation of the cell data, before ellipsation/alignment */
+
+ union {
+ uint8_t data[0]; /* data is generic array */
+ bool boolean;
+ usec_t timestamp;
+ usec_t timespan;
+ uint64_t size;
+ char string[0];
+ uint32_t uint32;
+ uint64_t uint64;
+ int percent; /* we use 'int' as datatype for percent values in order to match the result of parse_percent() */
+ /* … add more here as we start supporting more cell data types … */
+ };
+} TableData;
+
+static size_t TABLE_CELL_TO_INDEX(TableCell *cell) {
+ size_t i;
+
+ assert(cell);
+
+ i = PTR_TO_SIZE(cell);
+ assert(i > 0);
+
+ return i-1;
+}
+
+static TableCell* TABLE_INDEX_TO_CELL(size_t index) {
+ assert(index != (size_t) -1);
+ return SIZE_TO_PTR(index + 1);
+}
+
+struct Table {
+ size_t n_columns;
+ size_t n_cells;
+
+ bool header; /* Whether to show the header row? */
+ size_t width; /* If != (size_t) -1 the width to format this table in */
+
+ TableData **data;
+ size_t n_allocated;
+
+ size_t *display_map; /* List of columns to show (by their index). It's fine if columns are listed multiple times or not at all */
+ size_t n_display_map;
+
+ size_t *sort_map; /* The columns to order rows by, in order of preference. */
+ size_t n_sort_map;
+
+ bool *reverse_map;
+};
+
+Table *table_new_raw(size_t n_columns) {
+ _cleanup_(table_unrefp) Table *t = NULL;
+
+ assert(n_columns > 0);
+
+ t = new(Table, 1);
+ if (!t)
+ return NULL;
+
+ *t = (struct Table) {
+ .n_columns = n_columns,
+ .header = true,
+ .width = (size_t) -1,
+ };
+
+ return TAKE_PTR(t);
+}
+
+Table *table_new_internal(const char *first_header, ...) {
+ _cleanup_(table_unrefp) Table *t = NULL;
+ size_t n_columns = 1;
+ const char *h;
+ va_list ap;
+ int r;
+
+ assert(first_header);
+
+ va_start(ap, first_header);
+ for (;;) {
+ h = va_arg(ap, const char*);
+ if (!h)
+ break;
+
+ n_columns++;
+ }
+ va_end(ap);
+
+ t = table_new_raw(n_columns);
+ if (!t)
+ return NULL;
+
+ va_start(ap, first_header);
+ for (h = first_header; h; h = va_arg(ap, const char*)) {
+ TableCell *cell;
+
+ r = table_add_cell(t, &cell, TABLE_STRING, h);
+ if (r < 0) {
+ va_end(ap);
+ return NULL;
+ }
+
+ /* Make the table header uppercase */
+ r = table_set_uppercase(t, cell, true);
+ if (r < 0) {
+ va_end(ap);
+ return NULL;
+ }
+ }
+ va_end(ap);
+
+ assert(t->n_columns == t->n_cells);
+ return TAKE_PTR(t);
+}
+
+static TableData *table_data_free(TableData *d) {
+ assert(d);
+
+ free(d->formatted);
+ free(d->url);
+
+ return mfree(d);
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(TableData, table_data, table_data_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(TableData*, table_data_unref);
+
+Table *table_unref(Table *t) {
+ size_t i;
+
+ if (!t)
+ return NULL;
+
+ for (i = 0; i < t->n_cells; i++)
+ table_data_unref(t->data[i]);
+
+ free(t->data);
+ free(t->display_map);
+ free(t->sort_map);
+ free(t->reverse_map);
+
+ return mfree(t);
+}
+
+static size_t table_data_size(TableDataType type, const void *data) {
+
+ switch (type) {
+
+ case TABLE_EMPTY:
+ return 0;
+
+ case TABLE_STRING:
+ return strlen(data) + 1;
+
+ case TABLE_BOOLEAN:
+ return sizeof(bool);
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESPAN:
+ return sizeof(usec_t);
+
+ case TABLE_SIZE:
+ case TABLE_UINT64:
+ return sizeof(uint64_t);
+
+ case TABLE_UINT32:
+ return sizeof(uint32_t);
+
+ case TABLE_PERCENT:
+ return sizeof(int);
+
+ default:
+ assert_not_reached("Uh? Unexpected cell type");
+ }
+}
+
+static bool table_data_matches(
+ TableData *d,
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ size_t k, l;
+ assert(d);
+
+ if (d->type != type)
+ return false;
+
+ if (d->minimum_width != minimum_width)
+ return false;
+
+ if (d->maximum_width != maximum_width)
+ return false;
+
+ if (d->weight != weight)
+ return false;
+
+ if (d->align_percent != align_percent)
+ return false;
+
+ if (d->ellipsize_percent != ellipsize_percent)
+ return false;
+
+ /* If a color/url/uppercase flag is set, refuse to merge */
+ if (d->color)
+ return false;
+ if (d->url)
+ return false;
+ if (d->uppercase)
+ return false;
+
+ k = table_data_size(type, data);
+ l = table_data_size(d->type, d->data);
+
+ if (k != l)
+ return false;
+
+ return memcmp_safe(data, d->data, l) == 0;
+}
+
+static TableData *table_data_new(
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ size_t data_size;
+ TableData *d;
+
+ data_size = table_data_size(type, data);
+
+ d = malloc0(offsetof(TableData, data) + data_size);
+ if (!d)
+ return NULL;
+
+ d->n_ref = 1;
+ d->type = type;
+ d->minimum_width = minimum_width;
+ d->maximum_width = maximum_width;
+ d->weight = weight;
+ d->align_percent = align_percent;
+ d->ellipsize_percent = ellipsize_percent;
+ memcpy_safe(d->data, data, data_size);
+
+ return d;
+}
+
+int table_add_cell_full(
+ Table *t,
+ TableCell **ret_cell,
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ _cleanup_(table_data_unrefp) TableData *d = NULL;
+ TableData *p;
+
+ assert(t);
+ assert(type >= 0);
+ assert(type < _TABLE_DATA_TYPE_MAX);
+
+ /* Determine the cell adjacent to the current one, but one row up */
+ if (t->n_cells >= t->n_columns)
+ assert_se(p = t->data[t->n_cells - t->n_columns]);
+ else
+ p = NULL;
+
+ /* If formatting parameters are left unspecified, copy from the previous row */
+ if (minimum_width == (size_t) -1)
+ minimum_width = p ? p->minimum_width : 1;
+
+ if (weight == (unsigned) -1)
+ weight = p ? p->weight : DEFAULT_WEIGHT;
+
+ if (align_percent == (unsigned) -1)
+ align_percent = p ? p->align_percent : 0;
+
+ if (ellipsize_percent == (unsigned) -1)
+ ellipsize_percent = p ? p->ellipsize_percent : 100;
+
+ assert(align_percent <= 100);
+ assert(ellipsize_percent <= 100);
+
+ /* Small optimization: Pretty often adjacent cells in two subsequent lines have the same data and
+ * formatting. Let's see if we can reuse the cell data and ref it once more. */
+
+ if (p && table_data_matches(p, type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent))
+ d = table_data_ref(p);
+ else {
+ d = table_data_new(type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent);
+ if (!d)
+ return -ENOMEM;
+ }
+
+ if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns)))
+ return -ENOMEM;
+
+ if (ret_cell)
+ *ret_cell = TABLE_INDEX_TO_CELL(t->n_cells);
+
+ t->data[t->n_cells++] = TAKE_PTR(d);
+
+ return 0;
+}
+
+int table_dup_cell(Table *t, TableCell *cell) {
+ size_t i;
+
+ assert(t);
+
+ /* Add the data of the specified cell a second time as a new cell to the end. */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns)))
+ return -ENOMEM;
+
+ t->data[t->n_cells++] = table_data_ref(t->data[i]);
+ return 0;
+}
+
+static int table_dedup_cell(Table *t, TableCell *cell) {
+ _cleanup_free_ char *curl = NULL;
+ TableData *nd, *od;
+ size_t i;
+
+ assert(t);
+
+ /* Helper call that ensures the specified cell's data object has a ref count of 1, which we can use before
+ * changing a cell's formatting without effecting every other cell's formatting that shares the same data */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ assert_se(od = t->data[i]);
+ if (od->n_ref == 1)
+ return 0;
+
+ assert(od->n_ref > 1);
+
+ if (od->url) {
+ curl = strdup(od->url);
+ if (!curl)
+ return -ENOMEM;
+ }
+
+ nd = table_data_new(
+ od->type,
+ od->data,
+ od->minimum_width,
+ od->maximum_width,
+ od->weight,
+ od->align_percent,
+ od->ellipsize_percent);
+ if (!nd)
+ return -ENOMEM;
+
+ nd->color = od->color;
+ nd->url = TAKE_PTR(curl);
+ nd->uppercase = od->uppercase;
+
+ table_data_unref(od);
+ t->data[i] = nd;
+
+ assert(nd->n_ref == 1);
+
+ return 1;
+}
+
+static TableData *table_get_data(Table *t, TableCell *cell) {
+ size_t i;
+
+ assert(t);
+ assert(cell);
+
+ /* Get the data object of the specified cell, or NULL if it doesn't exist */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return NULL;
+
+ assert(t->data[i]);
+ assert(t->data[i]->n_ref > 0);
+
+ return t->data[i];
+}
+
+int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (minimum_width == (size_t) -1)
+ minimum_width = 1;
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->minimum_width = minimum_width;
+ return 0;
+}
+
+int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->maximum_width = maximum_width;
+ return 0;
+}
+
+int table_set_weight(Table *t, TableCell *cell, unsigned weight) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (weight == (unsigned) -1)
+ weight = DEFAULT_WEIGHT;
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->weight = weight;
+ return 0;
+}
+
+int table_set_align_percent(Table *t, TableCell *cell, unsigned percent) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (percent == (unsigned) -1)
+ percent = 0;
+
+ assert(percent <= 100);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->align_percent = percent;
+ return 0;
+}
+
+int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (percent == (unsigned) -1)
+ percent = 100;
+
+ assert(percent <= 100);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->ellipsize_percent = percent;
+ return 0;
+}
+
+int table_set_color(Table *t, TableCell *cell, const char *color) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->color = empty_to_null(color);
+ return 0;
+}
+
+int table_set_url(Table *t, TableCell *cell, const char *url) {
+ _cleanup_free_ char *copy = NULL;
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (url) {
+ copy = strdup(url);
+ if (!copy)
+ return -ENOMEM;
+ }
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ return free_and_replace(table_get_data(t, cell)->url, copy);
+}
+
+int table_set_uppercase(Table *t, TableCell *cell, bool b) {
+ TableData *d;
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ assert_se(d = table_get_data(t, cell));
+
+ if (d->uppercase == b)
+ return 0;
+
+ d->formatted = mfree(d->formatted);
+ d->uppercase = b;
+ return 1;
+}
+
+int table_update(Table *t, TableCell *cell, TableDataType type, const void *data) {
+ _cleanup_free_ char *curl = NULL;
+ TableData *nd, *od;
+ size_t i;
+
+ assert(t);
+ assert(cell);
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ assert_se(od = t->data[i]);
+
+ if (od->url) {
+ curl = strdup(od->url);
+ if (!curl)
+ return -ENOMEM;
+ }
+
+ nd = table_data_new(
+ type,
+ data,
+ od->minimum_width,
+ od->maximum_width,
+ od->weight,
+ od->align_percent,
+ od->ellipsize_percent);
+ if (!nd)
+ return -ENOMEM;
+
+ nd->color = od->color;
+ nd->url = TAKE_PTR(curl);
+ nd->uppercase = od->uppercase;
+
+ table_data_unref(od);
+ t->data[i] = nd;
+
+ return 0;
+}
+
+int table_add_many_internal(Table *t, TableDataType first_type, ...) {
+ TableDataType type;
+ va_list ap;
+ int r;
+
+ assert(t);
+ assert(first_type >= 0);
+ assert(first_type < _TABLE_DATA_TYPE_MAX);
+
+ type = first_type;
+
+ va_start(ap, first_type);
+ for (;;) {
+ const void *data;
+ union {
+ uint64_t size;
+ usec_t usec;
+ uint32_t uint32;
+ uint64_t uint64;
+ int percent;
+ bool b;
+ } buffer;
+
+ switch (type) {
+
+ case TABLE_EMPTY:
+ data = NULL;
+ break;
+
+ case TABLE_STRING:
+ data = va_arg(ap, const char *);
+ break;
+
+ case TABLE_BOOLEAN:
+ buffer.b = va_arg(ap, int);
+ data = &buffer.b;
+ break;
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESPAN:
+ buffer.usec = va_arg(ap, usec_t);
+ data = &buffer.usec;
+ break;
+
+ case TABLE_SIZE:
+ buffer.size = va_arg(ap, uint64_t);
+ data = &buffer.size;
+ break;
+
+ case TABLE_UINT32:
+ buffer.uint32 = va_arg(ap, uint32_t);
+ data = &buffer.uint32;
+ break;
+
+ case TABLE_UINT64:
+ buffer.uint64 = va_arg(ap, uint64_t);
+ data = &buffer.uint64;
+ break;
+
+ case TABLE_PERCENT:
+ buffer.percent = va_arg(ap, int);
+ data = &buffer.percent;
+ break;
+
+ case _TABLE_DATA_TYPE_MAX:
+ /* Used as end marker */
+ va_end(ap);
+ return 0;
+
+ default:
+ assert_not_reached("Uh? Unexpected data type.");
+ }
+
+ r = table_add_cell(t, NULL, type, data);
+ if (r < 0) {
+ va_end(ap);
+ return r;
+ }
+
+ type = va_arg(ap, TableDataType);
+ }
+}
+
+void table_set_header(Table *t, bool b) {
+ assert(t);
+
+ t->header = b;
+}
+
+void table_set_width(Table *t, size_t width) {
+ assert(t);
+
+ t->width = width;
+}
+
+int table_set_display(Table *t, size_t first_column, ...) {
+ size_t allocated, column;
+ va_list ap;
+
+ assert(t);
+
+ allocated = t->n_display_map;
+ column = first_column;
+
+ va_start(ap, first_column);
+ for (;;) {
+ assert(column < t->n_columns);
+
+ if (!GREEDY_REALLOC(t->display_map, allocated, MAX(t->n_columns, t->n_display_map+1))) {
+ va_end(ap);
+ return -ENOMEM;
+ }
+
+ t->display_map[t->n_display_map++] = column;
+
+ column = va_arg(ap, size_t);
+ if (column == (size_t) -1)
+ break;
+
+ }
+ va_end(ap);
+
+ return 0;
+}
+
+int table_set_sort(Table *t, size_t first_column, ...) {
+ size_t allocated, column;
+ va_list ap;
+
+ assert(t);
+
+ allocated = t->n_sort_map;
+ column = first_column;
+
+ va_start(ap, first_column);
+ for (;;) {
+ assert(column < t->n_columns);
+
+ if (!GREEDY_REALLOC(t->sort_map, allocated, MAX(t->n_columns, t->n_sort_map+1))) {
+ va_end(ap);
+ return -ENOMEM;
+ }
+
+ t->sort_map[t->n_sort_map++] = column;
+
+ column = va_arg(ap, size_t);
+ if (column == (size_t) -1)
+ break;
+ }
+ va_end(ap);
+
+ return 0;
+}
+
+static int cell_data_compare(TableData *a, size_t index_a, TableData *b, size_t index_b) {
+ assert(a);
+ assert(b);
+
+ if (a->type == b->type) {
+
+ /* We only define ordering for cells of the same data type. If cells with different data types are
+ * compared we follow the order the cells were originally added in */
+
+ switch (a->type) {
+
+ case TABLE_STRING:
+ return strcmp(a->string, b->string);
+
+ case TABLE_BOOLEAN:
+ if (!a->boolean && b->boolean)
+ return -1;
+ if (a->boolean && !b->boolean)
+ return 1;
+ return 0;
+
+ case TABLE_TIMESTAMP:
+ return CMP(a->timestamp, b->timestamp);
+
+ case TABLE_TIMESPAN:
+ return CMP(a->timespan, b->timespan);
+
+ case TABLE_SIZE:
+ return CMP(a->size, b->size);
+
+ case TABLE_UINT32:
+ return CMP(a->uint32, b->uint32);
+
+ case TABLE_UINT64:
+ return CMP(a->uint64, b->uint64);
+
+ case TABLE_PERCENT:
+ return CMP(a->percent, b->percent);
+
+ default:
+ ;
+ }
+ }
+
+ /* Generic fallback using the orginal order in which the cells where added. */
+ return CMP(index_a, index_b);
+}
+
+static int table_data_compare(const size_t *a, const size_t *b, Table *t) {
+ size_t i;
+ int r;
+
+ assert(t);
+ assert(t->sort_map);
+
+ /* Make sure the header stays at the beginning */
+ if (*a < t->n_columns && *b < t->n_columns)
+ return 0;
+ if (*a < t->n_columns)
+ return -1;
+ if (*b < t->n_columns)
+ return 1;
+
+ /* Order other lines by the sorting map */
+ for (i = 0; i < t->n_sort_map; i++) {
+ TableData *d, *dd;
+
+ d = t->data[*a + t->sort_map[i]];
+ dd = t->data[*b + t->sort_map[i]];
+
+ r = cell_data_compare(d, *a, dd, *b);
+ if (r != 0)
+ return t->reverse_map && t->reverse_map[t->sort_map[i]] ? -r : r;
+ }
+
+ /* Order identical lines by the order there were originally added in */
+ return CMP(*a, *b);
+}
+
+static const char *table_data_format(TableData *d) {
+ assert(d);
+
+ if (d->formatted)
+ return d->formatted;
+
+ switch (d->type) {
+ case TABLE_EMPTY:
+ return "";
+
+ case TABLE_STRING:
+ if (d->uppercase) {
+ char *p, *q;
+
+ d->formatted = new(char, strlen(d->string) + 1);
+ if (!d->formatted)
+ return NULL;
+
+ for (p = d->string, q = d->formatted; *p; p++, q++)
+ *q = (char) toupper((unsigned char) *p);
+ *q = 0;
+
+ return d->formatted;
+ }
+
+ return d->string;
+
+ case TABLE_BOOLEAN:
+ return yes_no(d->boolean);
+
+ case TABLE_TIMESTAMP: {
+ _cleanup_free_ char *p;
+
+ p = new(char, FORMAT_TIMESTAMP_MAX);
+ if (!p)
+ return NULL;
+
+ if (!format_timestamp(p, FORMAT_TIMESTAMP_MAX, d->timestamp))
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_TIMESPAN: {
+ _cleanup_free_ char *p;
+
+ p = new(char, FORMAT_TIMESPAN_MAX);
+ if (!p)
+ return NULL;
+
+ if (!format_timespan(p, FORMAT_TIMESPAN_MAX, d->timespan, 0))
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_SIZE: {
+ _cleanup_free_ char *p;
+
+ p = new(char, FORMAT_BYTES_MAX);
+ if (!p)
+ return NULL;
+
+ if (!format_bytes(p, FORMAT_BYTES_MAX, d->size))
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT32: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint32) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu32, d->uint32);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT64: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint64) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu64, d->uint64);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_PERCENT: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->percent) + 2);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%i%%" , d->percent);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ default:
+ assert_not_reached("Unexpected type?");
+ }
+
+ return d->formatted;
+}
+
+static int table_data_requested_width(TableData *d, size_t *ret) {
+ const char *t;
+ size_t l;
+
+ t = table_data_format(d);
+ if (!t)
+ return -ENOMEM;
+
+ l = utf8_console_width(t);
+ if (l == (size_t) -1)
+ return -EINVAL;
+
+ if (d->maximum_width != (size_t) -1 && l > d->maximum_width)
+ l = d->maximum_width;
+
+ if (l < d->minimum_width)
+ l = d->minimum_width;
+
+ *ret = l;
+ return 0;
+}
+
+static char *align_string_mem(const char *str, const char *url, size_t new_length, unsigned percent) {
+ size_t w = 0, space, lspace, old_length, clickable_length;
+ _cleanup_free_ char *clickable = NULL;
+ const char *p;
+ char *ret;
+ size_t i;
+ int r;
+
+ /* As with ellipsize_mem(), 'old_length' is a byte size while 'new_length' is a width in character cells */
+
+ assert(str);
+ assert(percent <= 100);
+
+ old_length = strlen(str);
+
+ if (url) {
+ r = terminal_urlify(url, str, &clickable);
+ if (r < 0)
+ return NULL;
+
+ clickable_length = strlen(clickable);
+ } else
+ clickable_length = old_length;
+
+ /* Determine current width on screen */
+ p = str;
+ while (p < str + old_length) {
+ char32_t c;
+
+ if (utf8_encoded_to_unichar(p, &c) < 0) {
+ p++, w++; /* count invalid chars as 1 */
+ continue;
+ }
+
+ p = utf8_next_char(p);
+ w += unichar_iswide(c) ? 2 : 1;
+ }
+
+ /* Already wider than the target, if so, don't do anything */
+ if (w >= new_length)
+ return clickable ? TAKE_PTR(clickable) : strdup(str);
+
+ /* How much spaces shall we add? An how much on the left side? */
+ space = new_length - w;
+ lspace = space * percent / 100U;
+
+ ret = new(char, space + clickable_length + 1);
+ if (!ret)
+ return NULL;
+
+ for (i = 0; i < lspace; i++)
+ ret[i] = ' ';
+ memcpy(ret + lspace, clickable ?: str, clickable_length);
+ for (i = lspace + clickable_length; i < space + clickable_length; i++)
+ ret[i] = ' ';
+
+ ret[space + clickable_length] = 0;
+ return ret;
+}
+
+int table_print(Table *t, FILE *f) {
+ size_t n_rows, *minimum_width, *maximum_width, display_columns, *requested_width,
+ i, j, table_minimum_width, table_maximum_width, table_requested_width, table_effective_width,
+ *width;
+ _cleanup_free_ size_t *sorted = NULL;
+ uint64_t *column_weight, weight_sum;
+ int r;
+
+ assert(t);
+
+ if (!f)
+ f = stdout;
+
+ /* Ensure we have no incomplete rows */
+ assert(t->n_cells % t->n_columns == 0);
+
+ n_rows = t->n_cells / t->n_columns;
+ assert(n_rows > 0); /* at least the header row must be complete */
+
+ if (t->sort_map) {
+ /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */
+
+ sorted = new(size_t, n_rows);
+ if (!sorted)
+ return -ENOMEM;
+
+ for (i = 0; i < n_rows; i++)
+ sorted[i] = i * t->n_columns;
+
+ typesafe_qsort_r(sorted, n_rows, table_data_compare, t);
+ }
+
+ if (t->display_map)
+ display_columns = t->n_display_map;
+ else
+ display_columns = t->n_columns;
+
+ assert(display_columns > 0);
+
+ minimum_width = newa(size_t, display_columns);
+ maximum_width = newa(size_t, display_columns);
+ requested_width = newa(size_t, display_columns);
+ width = newa(size_t, display_columns);
+ column_weight = newa0(uint64_t, display_columns);
+
+ for (j = 0; j < display_columns; j++) {
+ minimum_width[j] = 1;
+ maximum_width[j] = (size_t) -1;
+ requested_width[j] = (size_t) -1;
+ }
+
+ /* First pass: determine column sizes */
+ for (i = t->header ? 0 : 1; i < n_rows; i++) {
+ TableData **row;
+
+ /* Note that we don't care about ordering at this time, as we just want to determine column sizes,
+ * hence we don't care for sorted[] during the first pass. */
+ row = t->data + i * t->n_columns;
+
+ for (j = 0; j < display_columns; j++) {
+ TableData *d;
+ size_t req;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ r = table_data_requested_width(d, &req);
+ if (r < 0)
+ return r;
+
+ /* Determine the biggest width that any cell in this column would like to have */
+ if (requested_width[j] == (size_t) -1 ||
+ requested_width[j] < req)
+ requested_width[j] = req;
+
+ /* Determine the minimum width any cell in this column needs */
+ if (minimum_width[j] < d->minimum_width)
+ minimum_width[j] = d->minimum_width;
+
+ /* Determine the maximum width any cell in this column needs */
+ if (d->maximum_width != (size_t) -1 &&
+ (maximum_width[j] == (size_t) -1 ||
+ maximum_width[j] > d->maximum_width))
+ maximum_width[j] = d->maximum_width;
+
+ /* Determine the full columns weight */
+ column_weight[j] += d->weight;
+ }
+ }
+
+ /* One space between each column */
+ table_requested_width = table_minimum_width = table_maximum_width = display_columns - 1;
+
+ /* Calculate the total weight for all columns, plus the minimum, maximum and requested width for the table. */
+ weight_sum = 0;
+ for (j = 0; j < display_columns; j++) {
+ weight_sum += column_weight[j];
+
+ table_minimum_width += minimum_width[j];
+
+ if (maximum_width[j] == (size_t) -1)
+ table_maximum_width = (size_t) -1;
+ else
+ table_maximum_width += maximum_width[j];
+
+ table_requested_width += requested_width[j];
+ }
+
+ /* Calculate effective table width */
+ if (t->width == (size_t) -1)
+ table_effective_width = pager_have() ? table_requested_width : MIN(table_requested_width, columns());
+ else
+ table_effective_width = t->width;
+
+ if (table_maximum_width != (size_t) -1 && table_effective_width > table_maximum_width)
+ table_effective_width = table_maximum_width;
+
+ if (table_effective_width < table_minimum_width)
+ table_effective_width = table_minimum_width;
+
+ if (table_effective_width >= table_requested_width) {
+ size_t extra;
+
+ /* We have extra room, let's distribute it among columns according to their weights. We first provide
+ * each column with what it asked for and the distribute the rest. */
+
+ extra = table_effective_width - table_requested_width;
+
+ for (j = 0; j < display_columns; j++) {
+ size_t delta;
+
+ if (weight_sum == 0)
+ width[j] = requested_width[j] + extra / (display_columns - j); /* Avoid division by zero */
+ else
+ width[j] = requested_width[j] + (extra * column_weight[j]) / weight_sum;
+
+ if (maximum_width[j] != (size_t) -1 && width[j] > maximum_width[j])
+ width[j] = maximum_width[j];
+
+ if (width[j] < minimum_width[j])
+ width[j] = minimum_width[j];
+
+ assert(width[j] >= requested_width[j]);
+ delta = width[j] - requested_width[j];
+
+ /* Subtract what we just added from the rest */
+ if (extra > delta)
+ extra -= delta;
+ else
+ extra = 0;
+
+ assert(weight_sum >= column_weight[j]);
+ weight_sum -= column_weight[j];
+ }
+
+ } else {
+ /* We need to compress the table, columns can't get what they asked for. We first provide each column
+ * with the minimum they need, and then distribute anything left. */
+ bool finalize = false;
+ size_t extra;
+
+ extra = table_effective_width - table_minimum_width;
+
+ for (j = 0; j < display_columns; j++)
+ width[j] = (size_t) -1;
+
+ for (;;) {
+ bool restart = false;
+
+ for (j = 0; j < display_columns; j++) {
+ size_t delta, w;
+
+ /* Did this column already get something assigned? If so, let's skip to the next */
+ if (width[j] != (size_t) -1)
+ continue;
+
+ if (weight_sum == 0)
+ w = minimum_width[j] + extra / (display_columns - j); /* avoid division by zero */
+ else
+ w = minimum_width[j] + (extra * column_weight[j]) / weight_sum;
+
+ if (w >= requested_width[j]) {
+ /* Never give more than requested. If we hit a column like this, there's more
+ * space to allocate to other columns which means we need to restart the
+ * iteration. However, if we hit a column like this, let's assign it the space
+ * it wanted for good early.*/
+
+ w = requested_width[j];
+ restart = true;
+
+ } else if (!finalize)
+ continue;
+
+ width[j] = w;
+
+ assert(w >= minimum_width[j]);
+ delta = w - minimum_width[j];
+
+ assert(delta <= extra);
+ extra -= delta;
+
+ assert(weight_sum >= column_weight[j]);
+ weight_sum -= column_weight[j];
+
+ if (restart && !finalize)
+ break;
+ }
+
+ if (finalize)
+ break;
+
+ if (!restart)
+ finalize = true;
+ }
+ }
+
+ /* Second pass: show output */
+ for (i = t->header ? 0 : 1; i < n_rows; i++) {
+ TableData **row;
+
+ if (sorted)
+ row = t->data + sorted[i];
+ else
+ row = t->data + i * t->n_columns;
+
+ for (j = 0; j < display_columns; j++) {
+ _cleanup_free_ char *buffer = NULL;
+ const char *field;
+ TableData *d;
+ size_t l;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ field = table_data_format(d);
+ if (!field)
+ return -ENOMEM;
+
+ l = utf8_console_width(field);
+ if (l > width[j]) {
+ /* Field is wider than allocated space. Let's ellipsize */
+
+ buffer = ellipsize(field, width[j], d->ellipsize_percent);
+ if (!buffer)
+ return -ENOMEM;
+
+ field = buffer;
+
+ } else if (l < width[j]) {
+ /* Field is shorter than allocated space. Let's align with spaces */
+
+ buffer = align_string_mem(field, d->url, width[j], d->align_percent);
+ if (!buffer)
+ return -ENOMEM;
+
+ field = buffer;
+ }
+
+ if (l >= width[j] && d->url) {
+ _cleanup_free_ char *clickable = NULL;
+
+ r = terminal_urlify(d->url, field, &clickable);
+ if (r < 0)
+ return r;
+
+ free_and_replace(buffer, clickable);
+ field = buffer;
+ }
+
+ if (row == t->data) /* underline header line fully, including the column separator */
+ fputs(ansi_underline(), f);
+
+ if (j > 0)
+ fputc(' ', f); /* column separator */
+
+ if (d->color && colors_enabled()) {
+ if (row == t->data) /* first undo header underliner */
+ fputs(ANSI_NORMAL, f);
+
+ fputs(d->color, f);
+ }
+
+ fputs(field, f);
+
+ if (colors_enabled() && (d->color || row == t->data))
+ fputs(ANSI_NORMAL, f);
+ }
+
+ fputc('\n', f);
+ }
+
+ return fflush_and_check(f);
+}
+
+int table_format(Table *t, char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char *buf = NULL;
+ size_t sz = 0;
+ int r;
+
+ f = open_memstream(&buf, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ r = table_print(t, f);
+ if (r < 0)
+ return r;
+
+ f = safe_fclose(f);
+
+ *ret = buf;
+
+ return 0;
+}
+
+size_t table_get_rows(Table *t) {
+ if (!t)
+ return 0;
+
+ assert(t->n_columns > 0);
+ return t->n_cells / t->n_columns;
+}
+
+size_t table_get_columns(Table *t) {
+ if (!t)
+ return 0;
+
+ assert(t->n_columns > 0);
+ return t->n_columns;
+}
+
+int table_set_reverse(Table *t, size_t column, bool b) {
+ assert(t);
+ assert(column < t->n_columns);
+
+ if (!t->reverse_map) {
+ if (!b)
+ return 0;
+
+ t->reverse_map = new0(bool, t->n_columns);
+ if (!t->reverse_map)
+ return -ENOMEM;
+ }
+
+ t->reverse_map[column] = b;
+ return 0;
+}
+
+TableCell *table_get_cell(Table *t, size_t row, size_t column) {
+ size_t i;
+
+ assert(t);
+
+ if (column >= t->n_columns)
+ return NULL;
+
+ i = row * t->n_columns + column;
+ if (i >= t->n_cells)
+ return NULL;
+
+ return TABLE_INDEX_TO_CELL(i);
+}
+
+const void *table_get(Table *t, TableCell *cell) {
+ TableData *d;
+
+ assert(t);
+
+ d = table_get_data(t, cell);
+ if (!d)
+ return NULL;
+
+ return d->data;
+}
+
+const void* table_get_at(Table *t, size_t row, size_t column) {
+ TableCell *cell;
+
+ cell = table_get_cell(t, row, column);
+ if (!cell)
+ return NULL;
+
+ return table_get(t, cell);
+}
+
+static int table_data_to_json(TableData *d, JsonVariant **ret) {
+
+ switch (d->type) {
+
+ case TABLE_EMPTY:
+ return json_variant_new_null(ret);
+
+ case TABLE_STRING:
+ return json_variant_new_string(ret, d->string);
+
+ case TABLE_BOOLEAN:
+ return json_variant_new_boolean(ret, d->boolean);
+
+ case TABLE_TIMESTAMP:
+ if (d->timestamp == USEC_INFINITY)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->timestamp);
+
+ case TABLE_TIMESPAN:
+ if (d->timespan == USEC_INFINITY)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->timespan);
+
+ case TABLE_SIZE:
+ if (d->size == (size_t) -1)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->size);
+
+ case TABLE_UINT32:
+ return json_variant_new_unsigned(ret, d->uint32);
+
+ case TABLE_UINT64:
+ return json_variant_new_unsigned(ret, d->uint64);
+
+ case TABLE_PERCENT:
+ return json_variant_new_integer(ret, d->percent);
+
+ default:
+ return -EINVAL;
+ }
+}
+
+int table_to_json(Table *t, JsonVariant **ret) {
+ JsonVariant **rows = NULL, **elements = NULL;
+ _cleanup_free_ size_t *sorted = NULL;
+ size_t n_rows, i, j, display_columns;
+ int r;
+
+ assert(t);
+
+ /* Ensure we have no incomplete rows */
+ assert(t->n_cells % t->n_columns == 0);
+
+ n_rows = t->n_cells / t->n_columns;
+ assert(n_rows > 0); /* at least the header row must be complete */
+
+ if (t->sort_map) {
+ /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */
+
+ sorted = new(size_t, n_rows);
+ if (!sorted) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (i = 0; i < n_rows; i++)
+ sorted[i] = i * t->n_columns;
+
+ typesafe_qsort_r(sorted, n_rows, table_data_compare, t);
+ }
+
+ if (t->display_map)
+ display_columns = t->n_display_map;
+ else
+ display_columns = t->n_columns;
+ assert(display_columns > 0);
+
+ elements = new0(JsonVariant*, display_columns * 2);
+ if (!elements) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (j = 0; j < display_columns; j++) {
+ TableData *d;
+
+ assert_se(d = t->data[t->display_map ? t->display_map[j] : j]);
+
+ r = table_data_to_json(d, elements + j*2);
+ if (r < 0)
+ goto finish;
+ }
+
+ rows = new0(JsonVariant*, n_rows-1);
+ if (!rows) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (i = 1; i < n_rows; i++) {
+ TableData **row;
+
+ if (sorted)
+ row = t->data + sorted[i];
+ else
+ row = t->data + i * t->n_columns;
+
+ for (j = 0; j < display_columns; j++) {
+ TableData *d;
+ size_t k;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ k = j*2+1;
+ elements[k] = json_variant_unref(elements[k]);
+
+ r = table_data_to_json(d, elements + k);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = json_variant_new_object(rows + i - 1, elements, display_columns * 2);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = json_variant_new_array(ret, rows, n_rows - 1);
+
+finish:
+ if (rows) {
+ json_variant_unref_many(rows, n_rows-1);
+ free(rows);
+ }
+
+ if (elements) {
+ json_variant_unref_many(elements, display_columns*2);
+ free(elements);
+ }
+
+ return r;
+}
+
+int table_print_json(Table *t, FILE *f, JsonFormatFlags flags) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ int r;
+
+ assert(t);
+
+ if (!f)
+ f = stdout;
+
+ r = table_to_json(t, &v);
+ if (r < 0)
+ return r;
+
+ json_variant_dump(v, flags, f, NULL);
+
+ return fflush_and_check(f);
+}
diff --git a/src/shared/format-table.h b/src/shared/format-table.h
new file mode 100644
index 0000000..5ff2479
--- /dev/null
+++ b/src/shared/format-table.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "json.h"
+#include "macro.h"
+
+typedef enum TableDataType {
+ TABLE_EMPTY,
+ TABLE_STRING,
+ TABLE_BOOLEAN,
+ TABLE_TIMESTAMP,
+ TABLE_TIMESPAN,
+ TABLE_SIZE,
+ TABLE_UINT32,
+ TABLE_UINT64,
+ TABLE_PERCENT,
+ _TABLE_DATA_TYPE_MAX,
+ _TABLE_DATA_TYPE_INVALID = -1,
+} TableDataType;
+
+typedef struct Table Table;
+typedef struct TableCell TableCell;
+
+Table *table_new_internal(const char *first_header, ...) _sentinel_;
+#define table_new(...) table_new_internal(__VA_ARGS__, NULL)
+Table *table_new_raw(size_t n_columns);
+Table *table_unref(Table *t);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Table*, table_unref);
+
+int table_add_cell_full(Table *t, TableCell **ret_cell, TableDataType type, const void *data, size_t minimum_width, size_t maximum_width, unsigned weight, unsigned align_percent, unsigned ellipsize_percent);
+static inline int table_add_cell(Table *t, TableCell **ret_cell, TableDataType type, const void *data) {
+ return table_add_cell_full(t, ret_cell, type, data, (size_t) -1, (size_t) -1, (unsigned) -1, (unsigned) -1, (unsigned) -1);
+}
+
+int table_dup_cell(Table *t, TableCell *cell);
+
+int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width);
+int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width);
+int table_set_weight(Table *t, TableCell *cell, unsigned weight);
+int table_set_align_percent(Table *t, TableCell *cell, unsigned percent);
+int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent);
+int table_set_color(Table *t, TableCell *cell, const char *color);
+int table_set_url(Table *t, TableCell *cell, const char *color);
+int table_set_uppercase(Table *t, TableCell *cell, bool b);
+
+int table_update(Table *t, TableCell *cell, TableDataType type, const void *data);
+
+int table_add_many_internal(Table *t, TableDataType first_type, ...);
+#define table_add_many(t, ...) table_add_many_internal(t, __VA_ARGS__, _TABLE_DATA_TYPE_MAX)
+
+void table_set_header(Table *table, bool b);
+void table_set_width(Table *t, size_t width);
+int table_set_display(Table *t, size_t first_column, ...);
+int table_set_sort(Table *t, size_t first_column, ...);
+int table_set_reverse(Table *t, size_t column, bool b);
+
+int table_print(Table *t, FILE *f);
+int table_format(Table *t, char **ret);
+
+static inline TableCell* TABLE_HEADER_CELL(size_t i) {
+ return SIZE_TO_PTR(i + 1);
+}
+
+size_t table_get_rows(Table *t);
+size_t table_get_columns(Table *t);
+
+TableCell *table_get_cell(Table *t, size_t row, size_t column);
+
+const void *table_get(Table *t, TableCell *cell);
+const void *table_get_at(Table *t, size_t row, size_t column);
+
+int table_to_json(Table *t, JsonVariant **ret);
+int table_print_json(Table *t, FILE *f, unsigned json_flags);
diff --git a/src/shared/fstab-util.c b/src/shared/fstab-util.c
new file mode 100644
index 0000000..6fd9866
--- /dev/null
+++ b/src/shared/fstab-util.c
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <mntent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "device-nodes.h"
+#include "fstab-util.h"
+#include "macro.h"
+#include "mount-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+int fstab_has_fstype(const char *fstype) {
+ _cleanup_endmntent_ FILE *f = NULL;
+ struct mntent *m;
+
+ f = setmntent("/etc/fstab", "re");
+ if (!f)
+ return errno == ENOENT ? false : -errno;
+
+ for (;;) {
+ errno = 0;
+ m = getmntent(f);
+ if (!m)
+ return errno != 0 ? -errno : false;
+
+ if (streq(m->mnt_type, fstype))
+ return true;
+ }
+ return false;
+}
+
+int fstab_is_mount_point(const char *mount) {
+ _cleanup_endmntent_ FILE *f = NULL;
+ struct mntent *m;
+
+ f = setmntent("/etc/fstab", "re");
+ if (!f)
+ return errno == ENOENT ? false : -errno;
+
+ for (;;) {
+ errno = 0;
+ m = getmntent(f);
+ if (!m)
+ return errno != 0 ? -errno : false;
+
+ if (path_equal(m->mnt_dir, mount))
+ return true;
+ }
+ return false;
+}
+
+int fstab_filter_options(const char *opts, const char *names,
+ const char **namefound, char **value, char **filtered) {
+ const char *name, *n = NULL, *x;
+ _cleanup_strv_free_ char **stor = NULL;
+ _cleanup_free_ char *v = NULL, **strv = NULL;
+
+ assert(names && *names);
+
+ if (!opts)
+ goto answer;
+
+ /* If !value and !filtered, this function is not allowed to fail. */
+
+ if (!filtered) {
+ const char *word, *state;
+ size_t l;
+
+ FOREACH_WORD_SEPARATOR(word, l, opts, ",", state)
+ NULSTR_FOREACH(name, names) {
+ if (l < strlen(name))
+ continue;
+ if (!strneq(word, name, strlen(name)))
+ continue;
+
+ /* we know that the string is NUL
+ * terminated, so *x is valid */
+ x = word + strlen(name);
+ if (IN_SET(*x, '\0', '=', ',')) {
+ n = name;
+ if (value) {
+ free(v);
+ if (IN_SET(*x, '\0', ','))
+ v = NULL;
+ else {
+ assert(*x == '=');
+ x++;
+ v = strndup(x, l - strlen(name) - 1);
+ if (!v)
+ return -ENOMEM;
+ }
+ }
+ }
+ }
+ } else {
+ char **t, **s;
+
+ stor = strv_split(opts, ",");
+ if (!stor)
+ return -ENOMEM;
+ strv = memdup(stor, sizeof(char*) * (strv_length(stor) + 1));
+ if (!strv)
+ return -ENOMEM;
+
+ for (s = t = strv; *s; s++) {
+ NULSTR_FOREACH(name, names) {
+ x = startswith(*s, name);
+ if (x && IN_SET(*x, '\0', '='))
+ goto found;
+ }
+
+ *t = *s;
+ t++;
+ continue;
+ found:
+ /* Keep the last occurence found */
+ n = name;
+ if (value) {
+ free(v);
+ if (*x == '\0')
+ v = NULL;
+ else {
+ assert(*x == '=');
+ x++;
+ v = strdup(x);
+ if (!v)
+ return -ENOMEM;
+ }
+ }
+ }
+ *t = NULL;
+ }
+
+answer:
+ if (namefound)
+ *namefound = n;
+ if (filtered) {
+ char *f;
+
+ f = strv_join(strv, ",");
+ if (!f)
+ return -ENOMEM;
+
+ *filtered = f;
+ }
+ if (value)
+ *value = TAKE_PTR(v);
+
+ return !!n;
+}
+
+int fstab_extract_values(const char *opts, const char *name, char ***values) {
+ _cleanup_strv_free_ char **optsv = NULL, **res = NULL;
+ char **s;
+
+ assert(opts);
+ assert(name);
+ assert(values);
+
+ optsv = strv_split(opts, ",");
+ if (!optsv)
+ return -ENOMEM;
+
+ STRV_FOREACH(s, optsv) {
+ char *arg;
+ int r;
+
+ arg = startswith(*s, name);
+ if (!arg || *arg != '=')
+ continue;
+ r = strv_extend(&res, arg + 1);
+ if (r < 0)
+ return r;
+ }
+
+ *values = TAKE_PTR(res);
+
+ return !!*values;
+}
+
+int fstab_find_pri(const char *options, int *ret) {
+ _cleanup_free_ char *opt = NULL;
+ int r;
+ unsigned pri;
+
+ assert(ret);
+
+ r = fstab_filter_options(options, "pri\0", NULL, &opt, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0 || !opt)
+ return 0;
+
+ r = safe_atou(opt, &pri);
+ if (r < 0)
+ return r;
+
+ if ((int) pri < 0)
+ return -ERANGE;
+
+ *ret = (int) pri;
+ return 1;
+}
+
+static char *unquote(const char *s, const char* quotes) {
+ size_t l;
+ assert(s);
+
+ /* This is rather stupid, simply removes the heading and
+ * trailing quotes if there is one. Doesn't care about
+ * escaping or anything.
+ *
+ * DON'T USE THIS FOR NEW CODE ANYMORE! */
+
+ l = strlen(s);
+ if (l < 2)
+ return strdup(s);
+
+ if (strchr(quotes, s[0]) && s[l-1] == s[0])
+ return strndup(s+1, l-2);
+
+ return strdup(s);
+}
+
+static char *tag_to_udev_node(const char *tagvalue, const char *by) {
+ _cleanup_free_ char *t = NULL, *u = NULL;
+ size_t enc_len;
+
+ u = unquote(tagvalue, QUOTES);
+ if (!u)
+ return NULL;
+
+ enc_len = strlen(u) * 4 + 1;
+ t = new(char, enc_len);
+ if (!t)
+ return NULL;
+
+ if (encode_devnode_name(u, t, enc_len) < 0)
+ return NULL;
+
+ return strjoin("/dev/disk/by-", by, "/", t);
+}
+
+char *fstab_node_to_udev_node(const char *p) {
+ assert(p);
+
+ if (startswith(p, "LABEL="))
+ return tag_to_udev_node(p+6, "label");
+
+ if (startswith(p, "UUID="))
+ return tag_to_udev_node(p+5, "uuid");
+
+ if (startswith(p, "PARTUUID="))
+ return tag_to_udev_node(p+9, "partuuid");
+
+ if (startswith(p, "PARTLABEL="))
+ return tag_to_udev_node(p+10, "partlabel");
+
+ return strdup(p);
+}
diff --git a/src/shared/fstab-util.h b/src/shared/fstab-util.h
new file mode 100644
index 0000000..9820f78
--- /dev/null
+++ b/src/shared/fstab-util.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "macro.h"
+
+int fstab_is_mount_point(const char *mount);
+int fstab_has_fstype(const char *fstype);
+
+int fstab_filter_options(const char *opts, const char *names, const char **namefound, char **value, char **filtered);
+
+int fstab_extract_values(const char *opts, const char *name, char ***values);
+
+static inline bool fstab_test_option(const char *opts, const char *names) {
+ return !!fstab_filter_options(opts, names, NULL, NULL, NULL);
+}
+
+int fstab_find_pri(const char *options, int *ret);
+
+static inline bool fstab_test_yes_no_option(const char *opts, const char *yes_no) {
+ int r;
+ const char *opt;
+
+ /* If first name given is last, return 1.
+ * If second name given is last or neither is found, return 0. */
+
+ r = fstab_filter_options(opts, yes_no, &opt, NULL, NULL);
+ assert(r >= 0);
+
+ return opt == yes_no;
+}
+
+char *fstab_node_to_udev_node(const char *p);
diff --git a/src/shared/generate-ip-protocol-list.sh b/src/shared/generate-ip-protocol-list.sh
new file mode 100755
index 0000000..a9b1e0f
--- /dev/null
+++ b/src/shared/generate-ip-protocol-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include netinet/in.h - </dev/null | \
+ awk '/^#define[ \t]+IPPROTO_[^ \t]+[ \t]+[^ \t]/ { print $2; }' | \
+ sed -e 's/IPPROTO_//'
diff --git a/src/shared/generator.c b/src/shared/generator.c
new file mode 100644
index 0000000..0adaaf2
--- /dev/null
+++ b/src/shared/generator.c
@@ -0,0 +1,504 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio_ext.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dropin.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "special.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "unit-name.h"
+#include "util.h"
+
+int generator_open_unit_file(
+ const char *dest,
+ const char *source,
+ const char *name,
+ FILE **file) {
+
+ const char *unit;
+ FILE *f;
+
+ unit = strjoina(dest, "/", name);
+
+ f = fopen(unit, "wxe");
+ if (!f) {
+ if (source && errno == EEXIST)
+ return log_error_errno(errno,
+ "Failed to create unit file %s, as it already exists. Duplicate entry in %s?",
+ unit, source);
+ else
+ return log_error_errno(errno,
+ "Failed to create unit file %s: %m",
+ unit);
+ }
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n",
+ program_invocation_short_name);
+
+ *file = f;
+ return 0;
+}
+
+int generator_add_symlink(const char *root, const char *dst, const char *dep_type, const char *src) {
+ /* Adds a symlink from <dst>.<dep_type>.d/ to ../<src> */
+
+ const char *from, *to;
+
+ from = strjoina("../", src);
+ to = strjoina(root, "/", dst, ".", dep_type, "/", src);
+
+ mkdir_parents_label(to, 0755);
+ if (symlink(from, to) < 0)
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create symlink \"%s\": %m", to);
+
+ return 0;
+}
+
+static int write_fsck_sysroot_service(const char *dir, const char *what) {
+ _cleanup_free_ char *device = NULL, *escaped = NULL, *escaped2 = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit;
+ int r;
+
+ escaped = specifier_escape(what);
+ if (!escaped)
+ return log_oom();
+
+ escaped2 = cescape(escaped);
+ if (!escaped2)
+ return log_oom();
+
+ unit = strjoina(dir, "/systemd-fsck-root.service");
+ log_debug("Creating %s", unit);
+
+ r = unit_name_from_path(what, ".device", &device);
+ if (r < 0)
+ return log_error_errno(r, "Failed to convert device \"%s\" to unit name: %m", what);
+
+ f = fopen(unit, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by %1$s\n\n"
+ "[Unit]\n"
+ "Description=File System Check on %2$s\n"
+ "Documentation=man:systemd-fsck-root.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%3$s\n"
+ "Conflicts=shutdown.target\n"
+ "After=initrd-root-device.target local-fs-pre.target %3$s\n"
+ "Before=shutdown.target\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart=" SYSTEMD_FSCK_PATH " %4$s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ escaped,
+ device,
+ escaped2);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit);
+
+ return 0;
+}
+
+int generator_write_fsck_deps(
+ FILE *f,
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *fstype) {
+
+ int r;
+
+ assert(f);
+ assert(dir);
+ assert(what);
+ assert(where);
+
+ if (!is_device_path(what)) {
+ log_warning("Checking was requested for \"%s\", but it is not a device.", what);
+ return 0;
+ }
+
+ if (!isempty(fstype) && !streq(fstype, "auto")) {
+ r = fsck_exists(fstype);
+ if (r < 0)
+ log_warning_errno(r, "Checking was requested for %s, but couldn't detect if fsck.%s may be used, proceeding: %m", what, fstype);
+ else if (r == 0) {
+ /* treat missing check as essentially OK */
+ log_debug("Checking was requested for %s, but fsck.%s does not exist.", what, fstype);
+ return 0;
+ }
+ }
+
+ if (path_equal(where, "/")) {
+ const char *lnk;
+
+ lnk = strjoina(dir, "/" SPECIAL_LOCAL_FS_TARGET ".wants/systemd-fsck-root.service");
+
+ mkdir_parents(lnk, 0755);
+ if (symlink(SYSTEM_DATA_UNIT_PATH "/systemd-fsck-root.service", lnk) < 0)
+ return log_error_errno(errno, "Failed to create symlink %s: %m", lnk);
+
+ } else {
+ _cleanup_free_ char *_fsck = NULL;
+ const char *fsck;
+
+ if (in_initrd() && path_equal(where, "/sysroot")) {
+ r = write_fsck_sysroot_service(dir, what);
+ if (r < 0)
+ return r;
+
+ fsck = "systemd-fsck-root.service";
+ } else {
+ r = unit_name_from_path_instance("systemd-fsck", what, ".service", &_fsck);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create fsck service name: %m");
+
+ fsck = _fsck;
+ }
+
+ fprintf(f,
+ "Requires=%1$s\n"
+ "After=%1$s\n",
+ fsck);
+ }
+
+ return 0;
+}
+
+int generator_write_timeouts(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts,
+ char **filtered) {
+
+ /* Configure how long we wait for a device that backs a mount point or a
+ * swap partition to show up. This is useful to support endless device timeouts
+ * for devices that show up only after user input, like crypto devices. */
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *timeout = NULL;
+ usec_t u;
+ int r;
+
+ r = fstab_filter_options(opts, "comment=systemd.device-timeout\0"
+ "x-systemd.device-timeout\0",
+ NULL, &timeout, filtered);
+ if (r <= 0)
+ return r;
+
+ r = parse_sec_fix_0(timeout, &u);
+ if (r < 0) {
+ log_warning("Failed to parse timeout for %s, ignoring: %s", where, timeout);
+ return 0;
+ }
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+ if (!is_device_path(node)) {
+ log_warning("x-systemd.device-timeout ignored for %s", what);
+ return 0;
+ }
+
+ r = unit_name_from_path(node, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path: %m");
+
+ return write_drop_in_format(dir, unit, 50, "device-timeout",
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "JobRunningTimeoutSec=%s",
+ program_invocation_short_name,
+ timeout);
+}
+
+int generator_write_device_deps(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts) {
+
+ /* fstab records that specify _netdev option should apply the network
+ * ordering on the actual device depending on network connection. If we
+ * are not mounting real device (NFS, CIFS), we rely on _netdev effect
+ * on the mount unit itself. */
+
+ _cleanup_free_ char *node = NULL, *unit = NULL;
+ int r;
+
+ if (!fstab_test_option(opts, "_netdev\0"))
+ return 0;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to apply dependencies to. */
+ if (!is_device_path(node))
+ return 0;
+
+ r = unit_name_from_path(node, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ node);
+
+ /* See mount_add_default_dependencies for explanation why we create such
+ * dependencies. */
+ return write_drop_in_format(dir, unit, 50, "netdev-dependencies",
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "After=" SPECIAL_NETWORK_ONLINE_TARGET " " SPECIAL_NETWORK_TARGET "\n"
+ "Wants=" SPECIAL_NETWORK_ONLINE_TARGET "\n",
+ program_invocation_short_name);
+}
+
+int generator_write_initrd_root_device_deps(const char *dir, const char *what) {
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ r = unit_name_from_path(what, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ what);
+
+ return write_drop_in_format(dir, SPECIAL_INITRD_ROOT_DEVICE_TARGET, 50, "root-device",
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Requires=%s\n"
+ "After=%s",
+ program_invocation_short_name,
+ unit,
+ unit);
+}
+
+int generator_hook_up_mkswap(
+ const char *dir,
+ const char *what) {
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to work on. */
+ if (!is_device_path(node))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format something that is not a device node: %s",
+ node);
+
+ r = unit_name_from_path_instance("systemd-mkswap", node, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ node);
+
+ unit_file = strjoina(dir, "/", unit);
+ log_debug("Creating %s", unit_file);
+
+ escaped = cescape(node);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path(what, ".swap", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ what);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Make Swap on %%f\n"
+ "Documentation=man:systemd-mkswap@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.device\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.device\n"
+ "Before=shutdown.target %s\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_MAKEFS_PATH " swap %s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ where_unit,
+ escaped);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit_file);
+
+ return generator_add_symlink(dir, where_unit, "requires", unit);
+}
+
+int generator_hook_up_mkfs(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type) {
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to work on. */
+ if (!is_device_path(node))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format something that is not a device node: %s",
+ node);
+
+ if (!type || streq(type, "auto"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format partition %s, filesystem type is not specified",
+ node);
+
+ r = unit_name_from_path_instance("systemd-mkfs", node, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ node);
+
+ unit_file = strjoina(dir, "/", unit);
+ log_debug("Creating %s", unit_file);
+
+ escaped = cescape(node);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path(where, ".mount", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ where);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Make File System on %%f\n"
+ "Documentation=man:systemd-mkfs@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.device\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.device\n"
+ /* fsck might or might not be used, so let's be safe and order
+ * ourselves before both systemd-fsck@.service and the mount unit. */
+ "Before=shutdown.target systemd-fsck@%%i.service %s\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_MAKEFS_PATH " %s %s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ where_unit,
+ type,
+ escaped);
+ // XXX: what about local-fs-pre.target?
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit_file);
+
+ return generator_add_symlink(dir, where_unit, "requires", unit);
+}
+
+int generator_hook_up_growfs(
+ const char *dir,
+ const char *where,
+ const char *target) {
+
+ _cleanup_free_ char *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ escaped = cescape(where);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path_instance("systemd-growfs", where, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ where);
+
+ r = unit_name_from_path(where, ".mount", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ where);
+
+ unit_file = strjoina(dir, "/", unit);
+ log_debug("Creating %s", unit_file);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Grow File System on %%f\n"
+ "Documentation=man:systemd-growfs@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.mount\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.mount\n"
+ "Before=shutdown.target %s\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_GROWFS_PATH " %s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ target,
+ escaped);
+
+ return generator_add_symlink(dir, where_unit, "wants", unit);
+}
+
+void log_setup_generator(void) {
+ log_set_prohibit_ipc(true);
+ log_setup_service();
+}
diff --git a/src/shared/generator.h b/src/shared/generator.h
new file mode 100644
index 0000000..5a1c1e3
--- /dev/null
+++ b/src/shared/generator.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+#include "main-func.h"
+
+int generator_open_unit_file(
+ const char *dest,
+ const char *source,
+ const char *name,
+ FILE **file);
+
+int generator_add_symlink(const char *root, const char *dst, const char *dep_type, const char *src);
+
+int generator_write_fsck_deps(
+ FILE *f,
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type);
+
+int generator_write_timeouts(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts,
+ char **filtered);
+
+int generator_write_device_deps(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts);
+
+int generator_write_initrd_root_device_deps(
+ const char *dir,
+ const char *what);
+
+int generator_hook_up_mkswap(
+ const char *dir,
+ const char *what);
+int generator_hook_up_mkfs(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type);
+int generator_hook_up_growfs(
+ const char *dir,
+ const char *where,
+ const char *target);
+
+void log_setup_generator(void);
+
+/* Similar to DEFINE_MAIN_FUNCTION, but initializes logging and assigns positional arguments. */
+#define DEFINE_MAIN_GENERATOR_FUNCTION(impl) \
+ _DEFINE_MAIN_FUNCTION( \
+ ({ \
+ log_setup_generator(); \
+ if (argc > 1 && argc != 4) \
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), \
+ "This program takes zero or three arguments."); \
+ }), \
+ impl(argc > 1 ? argv[1] : "/tmp", \
+ argc > 1 ? argv[2] : "/tmp", \
+ argc > 1 ? argv[3] : "/tmp"), \
+ r < 0 ? EXIT_FAILURE : EXIT_SUCCESS)
diff --git a/src/shared/gpt.h b/src/shared/gpt.h
new file mode 100644
index 0000000..fd953fa
--- /dev/null
+++ b/src/shared/gpt.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <endian.h>
+
+#include "sd-id128.h"
+
+/* We only support root disk discovery for x86, x86-64, Itanium and ARM for
+ * now, since EFI for anything else doesn't really exist, and we only
+ * care for root partitions on the same disk as the EFI ESP. */
+
+#define GPT_ROOT_X86 SD_ID128_MAKE(44,47,95,40,f2,97,41,b2,9a,f7,d1,31,d5,f0,45,8a)
+#define GPT_ROOT_X86_64 SD_ID128_MAKE(4f,68,bc,e3,e8,cd,4d,b1,96,e7,fb,ca,f9,84,b7,09)
+#define GPT_ROOT_ARM SD_ID128_MAKE(69,da,d7,10,2c,e4,4e,3c,b1,6c,21,a1,d4,9a,be,d3)
+#define GPT_ROOT_ARM_64 SD_ID128_MAKE(b9,21,b0,45,1d,f0,41,c3,af,44,4c,6f,28,0d,3f,ae)
+#define GPT_ROOT_IA64 SD_ID128_MAKE(99,3d,8d,3d,f8,0e,42,25,85,5a,9d,af,8e,d7,ea,97)
+#define GPT_ESP SD_ID128_MAKE(c1,2a,73,28,f8,1f,11,d2,ba,4b,00,a0,c9,3e,c9,3b)
+#define GPT_SWAP SD_ID128_MAKE(06,57,fd,6d,a4,ab,43,c4,84,e5,09,33,c8,4b,4f,4f)
+#define GPT_HOME SD_ID128_MAKE(93,3a,c7,e1,2e,b4,4f,13,b8,44,0e,14,e2,ae,f9,15)
+#define GPT_SRV SD_ID128_MAKE(3b,8f,84,25,20,e0,4f,3b,90,7f,1a,25,a7,6f,98,e8)
+
+/* Verity partitions for the root partitions above (we only define them for the root partitions, because only they are
+ * are commonly read-only and hence suitable for verity). */
+#define GPT_ROOT_X86_VERITY SD_ID128_MAKE(d1,3c,5d,3b,b5,d1,42,2a,b2,9f,94,54,fd,c8,9d,76)
+#define GPT_ROOT_X86_64_VERITY SD_ID128_MAKE(2c,73,57,ed,eb,d2,46,d9,ae,c1,23,d4,37,ec,2b,f5)
+#define GPT_ROOT_ARM_VERITY SD_ID128_MAKE(73,86,cd,f2,20,3c,47,a9,a4,98,f2,ec,ce,45,a2,d6)
+#define GPT_ROOT_ARM_64_VERITY SD_ID128_MAKE(df,33,00,ce,d6,9f,4c,92,97,8c,9b,fb,0f,38,d8,20)
+#define GPT_ROOT_IA64_VERITY SD_ID128_MAKE(86,ed,10,d5,b6,07,45,bb,89,57,d3,50,f2,3d,05,71)
+
+#if defined(__x86_64__)
+# define GPT_ROOT_NATIVE GPT_ROOT_X86_64
+# define GPT_ROOT_SECONDARY GPT_ROOT_X86
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_64_VERITY
+# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_X86_VERITY
+#elif defined(__i386__)
+# define GPT_ROOT_NATIVE GPT_ROOT_X86
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_VERITY
+#endif
+
+#if defined(__ia64__)
+# define GPT_ROOT_NATIVE GPT_ROOT_IA64
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_IA64_VERITY
+#endif
+
+#if defined(__aarch64__) && (__BYTE_ORDER != __BIG_ENDIAN)
+# define GPT_ROOT_NATIVE GPT_ROOT_ARM_64
+# define GPT_ROOT_SECONDARY GPT_ROOT_ARM
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_64_VERITY
+# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_ARM_VERITY
+#elif defined(__arm__) && (__BYTE_ORDER != __BIG_ENDIAN)
+# define GPT_ROOT_NATIVE GPT_ROOT_ARM
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_VERITY
+#endif
+
+#define GPT_FLAG_NO_BLOCK_IO_PROTOCOL (1ULL << 1)
+
+/* Flags we recognize on the root, swap, home and srv partitions when
+ * doing auto-discovery. These happen to be identical to what
+ * Microsoft defines for its own Basic Data Partitions, but that's
+ * just because we saw no point in defining any other values here. */
+#define GPT_FLAG_READ_ONLY (1ULL << 60)
+#define GPT_FLAG_NO_AUTO (1ULL << 63)
+
+#define GPT_LINUX_GENERIC SD_ID128_MAKE(0f,c6,3d,af,84,83,47,72,8e,79,3d,69,d8,47,7d,e4)
diff --git a/src/shared/id128-print.c b/src/shared/id128-print.c
new file mode 100644
index 0000000..1b20b8f
--- /dev/null
+++ b/src/shared/id128-print.c
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "id128-print.h"
+#include "log.h"
+#include "pretty-print.h"
+#include "terminal-util.h"
+
+int id128_pretty_print(sd_id128_t id, bool pretty) {
+ unsigned i;
+ _cleanup_free_ char *man_link = NULL, *mod_link = NULL;
+ const char *on, *off;
+
+ if (!pretty) {
+ printf(SD_ID128_FORMAT_STR "\n",
+ SD_ID128_FORMAT_VAL(id));
+ return 0;
+ }
+
+ on = ansi_highlight();
+ off = ansi_normal();
+
+ if (terminal_urlify("man:systemd-id128(1)", "systemd-id128(1)", &man_link) < 0)
+ return log_oom();
+
+ if (terminal_urlify("https://docs.python.org/3/library/uuid.html", "uuid", &mod_link) < 0)
+ return log_oom();
+
+ printf("As string:\n"
+ "%s" SD_ID128_FORMAT_STR "%s\n\n"
+ "As UUID:\n"
+ "%s%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x%s\n\n"
+ "As %s macro:\n"
+ "%s#define MESSAGE_XYZ SD_ID128_MAKE(",
+ on, SD_ID128_FORMAT_VAL(id), off,
+ on, SD_ID128_FORMAT_VAL(id), off,
+ man_link,
+ on);
+ for (i = 0; i < 16; i++)
+ printf("%02x%s", id.bytes[i], i != 15 ? "," : "");
+ printf(")%s\n\n", off);
+
+ printf("As Python constant:\n"
+ ">>> import %s\n"
+ ">>> %sMESSAGE_XYZ = uuid.UUID('" SD_ID128_FORMAT_STR "')%s\n",
+ mod_link,
+ on, SD_ID128_FORMAT_VAL(id), off);
+
+ return 0;
+}
+
+int id128_print_new(bool pretty) {
+ sd_id128_t id;
+ int r;
+
+ r = sd_id128_randomize(&id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate ID: %m");
+
+ return id128_pretty_print(id, pretty);
+}
diff --git a/src/shared/id128-print.h b/src/shared/id128-print.h
new file mode 100644
index 0000000..5d50de0
--- /dev/null
+++ b/src/shared/id128-print.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+int id128_pretty_print(sd_id128_t id, bool pretty);
+int id128_print_new(bool pretty);
diff --git a/src/shared/ima-util.c b/src/shared/ima-util.c
new file mode 100644
index 0000000..0d4ce3c
--- /dev/null
+++ b/src/shared/ima-util.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "ima-util.h"
+
+static int use_ima_cached = -1;
+
+bool use_ima(void) {
+
+ if (use_ima_cached < 0)
+ use_ima_cached = access("/sys/kernel/security/ima/", F_OK) >= 0;
+
+ return use_ima_cached;
+}
diff --git a/src/shared/ima-util.h b/src/shared/ima-util.h
new file mode 100644
index 0000000..8f20741
--- /dev/null
+++ b/src/shared/ima-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+bool use_ima(void);
diff --git a/src/shared/import-util.c b/src/shared/import-util.c
new file mode 100644
index 0000000..bcd6c0c
--- /dev/null
+++ b/src/shared/import-util.c
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "import-util.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "util.h"
+
+int import_url_last_component(const char *url, char **ret) {
+ const char *e, *p;
+ char *s;
+
+ e = strchrnul(url, '?');
+
+ while (e > url && e[-1] == '/')
+ e--;
+
+ p = e;
+ while (p > url && p[-1] != '/')
+ p--;
+
+ if (e <= p)
+ return -EINVAL;
+
+ s = strndup(p, e - p);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int import_url_change_last_component(const char *url, const char *suffix, char **ret) {
+ const char *e;
+ char *s;
+
+ assert(url);
+ assert(ret);
+
+ e = strchrnul(url, '?');
+
+ while (e > url && e[-1] == '/')
+ e--;
+
+ while (e > url && e[-1] != '/')
+ e--;
+
+ if (e <= url)
+ return -EINVAL;
+
+ s = new(char, (e - url) + strlen(suffix) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(s, url, e - url), suffix);
+ *ret = s;
+ return 0;
+}
+
+static const char* const import_verify_table[_IMPORT_VERIFY_MAX] = {
+ [IMPORT_VERIFY_NO] = "no",
+ [IMPORT_VERIFY_CHECKSUM] = "checksum",
+ [IMPORT_VERIFY_SIGNATURE] = "signature",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(import_verify, ImportVerify);
+
+int tar_strip_suffixes(const char *name, char **ret) {
+ const char *e;
+ char *s;
+
+ e = endswith(name, ".tar");
+ if (!e)
+ e = endswith(name, ".tar.xz");
+ if (!e)
+ e = endswith(name, ".tar.gz");
+ if (!e)
+ e = endswith(name, ".tar.bz2");
+ if (!e)
+ e = endswith(name, ".tgz");
+ if (!e)
+ e = strchr(name, 0);
+
+ if (e <= name)
+ return -EINVAL;
+
+ s = strndup(name, e - name);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int raw_strip_suffixes(const char *p, char **ret) {
+
+ static const char suffixes[] =
+ ".xz\0"
+ ".gz\0"
+ ".bz2\0"
+ ".raw\0"
+ ".qcow2\0"
+ ".img\0"
+ ".bin\0";
+
+ _cleanup_free_ char *q = NULL;
+
+ q = strdup(p);
+ if (!q)
+ return -ENOMEM;
+
+ for (;;) {
+ const char *sfx;
+ bool changed = false;
+
+ NULSTR_FOREACH(sfx, suffixes) {
+ char *e;
+
+ e = endswith(q, sfx);
+ if (e) {
+ *e = 0;
+ changed = true;
+ }
+ }
+
+ if (!changed)
+ break;
+ }
+
+ *ret = TAKE_PTR(q);
+
+ return 0;
+}
+
+int import_assign_pool_quota_and_warn(const char *path) {
+ int r;
+
+ r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true);
+ if (r == -ENOTTY) {
+ log_debug_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, as directory is not on btrfs or not a subvolume. Ignoring.");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines: %m");
+ if (r > 0)
+ log_info("Set up default quota hierarchy for /var/lib/machines.");
+
+ r = btrfs_subvol_auto_qgroup(path, 0, true);
+ if (r == -ENOTTY) {
+ log_debug_errno(r, "Failed to set up quota hierarchy for %s, as directory is not on btrfs or not a subvolume. Ignoring.", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up default quota hierarchy for %s: %m", path);
+ if (r > 0)
+ log_debug("Set up default quota hierarchy for %s.", path);
+
+ return 0;
+}
diff --git a/src/shared/import-util.h b/src/shared/import-util.h
new file mode 100644
index 0000000..0f2a517
--- /dev/null
+++ b/src/shared/import-util.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+typedef enum ImportVerify {
+ IMPORT_VERIFY_NO,
+ IMPORT_VERIFY_CHECKSUM,
+ IMPORT_VERIFY_SIGNATURE,
+ _IMPORT_VERIFY_MAX,
+ _IMPORT_VERIFY_INVALID = -1,
+} ImportVerify;
+
+int import_url_last_component(const char *url, char **ret);
+int import_url_change_last_component(const char *url, const char *suffix, char **ret);
+
+const char* import_verify_to_string(ImportVerify v) _const_;
+ImportVerify import_verify_from_string(const char *s) _pure_;
+
+int tar_strip_suffixes(const char *name, char **ret);
+int raw_strip_suffixes(const char *name, char **ret);
+
+int import_assign_pool_quota_and_warn(const char *path);
diff --git a/src/shared/initreq.h b/src/shared/initreq.h
new file mode 100644
index 0000000..1d7ff81
--- /dev/null
+++ b/src/shared/initreq.h
@@ -0,0 +1,73 @@
+/*
+ * initreq.h Interface to talk to init through /dev/initctl.
+ *
+ * Copyright (C) 1995-2004 Miquel van Smoorenburg
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * Version: @(#)initreq.h 1.28 31-Mar-2004 MvS
+ */
+
+#pragma once
+
+#include <sys/param.h>
+
+#if defined(__FreeBSD_kernel__)
+# define INIT_FIFO "/etc/.initctl"
+#else
+# define INIT_FIFO "/dev/initctl"
+#endif
+
+#define INIT_MAGIC 0x03091969
+#define INIT_CMD_START 0
+#define INIT_CMD_RUNLVL 1
+#define INIT_CMD_POWERFAIL 2
+#define INIT_CMD_POWERFAILNOW 3
+#define INIT_CMD_POWEROK 4
+#define INIT_CMD_BSD 5
+#define INIT_CMD_SETENV 6
+#define INIT_CMD_UNSETENV 7
+
+#define INIT_CMD_CHANGECONS 12345
+
+#ifdef MAXHOSTNAMELEN
+# define INITRQ_HLEN MAXHOSTNAMELEN
+#else
+# define INITRQ_HLEN 64
+#endif
+
+/*
+ * This is what BSD 4.4 uses when talking to init.
+ * Linux doesn't use this right now.
+ */
+struct init_request_bsd {
+ char gen_id[8]; /* Beats me.. telnetd uses "fe" */
+ char tty_id[16]; /* Tty name minus /dev/tty */
+ char host[INITRQ_HLEN]; /* Hostname */
+ char term_type[16]; /* Terminal type */
+ int signal; /* Signal to send */
+ int pid; /* Process to send to */
+ char exec_name[128]; /* Program to execute */
+ char reserved[128]; /* For future expansion. */
+};
+
+/*
+ * Because of legacy interfaces, "runlevel" and "sleeptime"
+ * aren't in a separate struct in the union.
+ *
+ * The weird sizes are because init expects the whole
+ * struct to be 384 bytes.
+ */
+struct init_request {
+ int magic; /* Magic number */
+ int cmd; /* What kind of request */
+ int runlevel; /* Runlevel to change to */
+ int sleeptime; /* Time between TERM and KILL */
+ union {
+ struct init_request_bsd bsd;
+ char data[368];
+ } i;
+};
diff --git a/src/shared/install-printf.c b/src/shared/install-printf.c
new file mode 100644
index 0000000..d2143be
--- /dev/null
+++ b/src/shared/install-printf.c
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "format-util.h"
+#include "install-printf.h"
+#include "install.h"
+#include "macro.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+static int specifier_prefix_and_instance(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ _cleanup_free_ char *prefix = NULL;
+ int r;
+
+ assert(i);
+
+ r = unit_name_to_prefix_and_instance(i->name, &prefix);
+ if (r < 0)
+ return r;
+
+ if (endswith(prefix, "@") && i->default_instance) {
+ char *ans;
+
+ ans = strjoin(prefix, i->default_instance);
+ if (!ans)
+ return -ENOMEM;
+ *ret = ans;
+ } else
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+static int specifier_name(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ char *ans;
+
+ assert(i);
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE) && i->default_instance)
+ return unit_name_replace_instance(i->name, i->default_instance, ret);
+
+ ans = strdup(i->name);
+ if (!ans)
+ return -ENOMEM;
+ *ret = ans;
+ return 0;
+}
+
+static int specifier_prefix(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+
+ assert(i);
+
+ return unit_name_to_prefix(i->name, ret);
+}
+
+static int specifier_instance(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ char *instance;
+ int r;
+
+ assert(i);
+
+ r = unit_name_to_instance(i->name, &instance);
+ if (r < 0)
+ return r;
+
+ if (isempty(instance)) {
+ r = free_and_strdup(&instance, strempty(i->default_instance));
+ if (r < 0)
+ return r;
+ }
+
+ *ret = instance;
+ return 0;
+}
+
+static int specifier_last_component(char specifier, const void *data, const void *userdata, char **ret) {
+ _cleanup_free_ char *prefix = NULL;
+ char *dash;
+ int r;
+
+ r = specifier_prefix(specifier, data, userdata, &prefix);
+ if (r < 0)
+ return r;
+
+ dash = strrchr(prefix, '-');
+ if (dash) {
+ dash = strdup(dash + 1);
+ if (!dash)
+ return -ENOMEM;
+ *ret = dash;
+ } else
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret) {
+ /* This is similar to unit_full_printf() but does not support
+ * anything path-related.
+ *
+ * %n: the full id of the unit (foo@bar.waldo)
+ * %N: the id of the unit without the suffix (foo@bar)
+ * %p: the prefix (foo)
+ * %i: the instance (bar)
+
+ * %U the UID of the running user
+ * %u the username of running user
+ * %m the machine ID of the running system
+ * %H the host name of the running system
+ * %b the boot ID of the running system
+ * %v `uname -r` of the running system
+ */
+
+ const Specifier table[] = {
+ { 'n', specifier_name, NULL },
+ { 'N', specifier_prefix_and_instance, NULL },
+ { 'p', specifier_prefix, NULL },
+ { 'i', specifier_instance, NULL },
+ { 'j', specifier_last_component, NULL },
+
+ { 'g', specifier_group_name, NULL },
+ { 'G', specifier_group_id, NULL },
+ { 'U', specifier_user_id, NULL },
+ { 'u', specifier_user_name, NULL },
+
+ { 'm', specifier_machine_id, NULL },
+ { 'H', specifier_host_name, NULL },
+ { 'b', specifier_boot_id, NULL },
+ { 'v', specifier_kernel_release, NULL },
+ {}
+ };
+
+ assert(i);
+ assert(format);
+ assert(ret);
+
+ return specifier_printf(format, table, i, ret);
+}
diff --git a/src/shared/install-printf.h b/src/shared/install-printf.h
new file mode 100644
index 0000000..fa8ea7e
--- /dev/null
+++ b/src/shared/install-printf.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "install.h"
+
+int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret);
diff --git a/src/shared/install.c b/src/shared/install.c
new file mode 100644
index 0000000..8629304
--- /dev/null
+++ b/src/shared/install.c
@@ -0,0 +1,3383 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "dirent-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "install-printf.h"
+#include "install.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "set.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+#define UNIT_FILE_FOLLOW_SYMLINK_MAX 64
+
+typedef enum SearchFlags {
+ SEARCH_LOAD = 1 << 0,
+ SEARCH_FOLLOW_CONFIG_SYMLINKS = 1 << 1,
+ SEARCH_DROPIN = 1 << 2,
+} SearchFlags;
+
+typedef struct {
+ OrderedHashmap *will_process;
+ OrderedHashmap *have_processed;
+} InstallContext;
+
+typedef enum {
+ PRESET_UNKNOWN,
+ PRESET_ENABLE,
+ PRESET_DISABLE,
+} PresetAction;
+
+typedef struct {
+ char *pattern;
+ PresetAction action;
+ char **instances;
+} PresetRule;
+
+typedef struct {
+ PresetRule *rules;
+ size_t n_rules;
+} Presets;
+
+static bool unit_file_install_info_has_rules(const UnitFileInstallInfo *i) {
+ assert(i);
+
+ return !strv_isempty(i->aliases) ||
+ !strv_isempty(i->wanted_by) ||
+ !strv_isempty(i->required_by);
+}
+
+static bool unit_file_install_info_has_also(const UnitFileInstallInfo *i) {
+ assert(i);
+
+ return !strv_isempty(i->also);
+}
+
+static void presets_freep(Presets *p) {
+ size_t i;
+
+ if (!p)
+ return;
+
+ for (i = 0; i < p->n_rules; i++) {
+ free(p->rules[i].pattern);
+ strv_free(p->rules[i].instances);
+ }
+
+ free(p->rules);
+ p->n_rules = 0;
+}
+
+bool unit_type_may_alias(UnitType type) {
+ return IN_SET(type,
+ UNIT_SERVICE,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_DEVICE,
+ UNIT_TIMER,
+ UNIT_PATH);
+}
+
+bool unit_type_may_template(UnitType type) {
+ return IN_SET(type,
+ UNIT_SERVICE,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_TIMER,
+ UNIT_PATH);
+}
+
+static const char *unit_file_type_table[_UNIT_FILE_TYPE_MAX] = {
+ [UNIT_FILE_TYPE_REGULAR] = "regular",
+ [UNIT_FILE_TYPE_SYMLINK] = "symlink",
+ [UNIT_FILE_TYPE_MASKED] = "masked",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(unit_file_type, UnitFileType);
+
+static int in_search_path(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+ char **i;
+
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ STRV_FOREACH(i, p->search_path)
+ if (path_equal(parent, *i))
+ return true;
+
+ return false;
+}
+
+static const char* skip_root(const LookupPaths *p, const char *path) {
+ char *e;
+
+ assert(p);
+ assert(path);
+
+ if (!p->root_dir)
+ return path;
+
+ e = path_startswith(path, p->root_dir);
+ if (!e)
+ return NULL;
+
+ /* Make sure the returned path starts with a slash */
+ if (e[0] != '/') {
+ if (e == path || e[-1] != '/')
+ return NULL;
+
+ e--;
+ }
+
+ return e;
+}
+
+static int path_is_generator(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->generator) ||
+ path_equal_ptr(parent, p->generator_early) ||
+ path_equal_ptr(parent, p->generator_late);
+}
+
+static int path_is_transient(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->transient);
+}
+
+static int path_is_control(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->persistent_control) ||
+ path_equal_ptr(parent, p->runtime_control);
+}
+
+static int path_is_config(const LookupPaths *p, const char *path, bool check_parent) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ /* Note that we do *not* have generic checks for /etc or /run in place, since with
+ * them we couldn't discern configuration from transient or generated units */
+
+ if (check_parent) {
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ path = parent;
+ }
+
+ return path_equal_ptr(path, p->persistent_config) ||
+ path_equal_ptr(path, p->runtime_config);
+}
+
+static int path_is_runtime(const LookupPaths *p, const char *path, bool check_parent) {
+ _cleanup_free_ char *parent = NULL;
+ const char *rpath;
+
+ assert(p);
+ assert(path);
+
+ /* Everything in /run is considered runtime. On top of that we also add
+ * explicit checks for the various runtime directories, as safety net. */
+
+ rpath = skip_root(p, path);
+ if (rpath && path_startswith(rpath, "/run"))
+ return true;
+
+ if (check_parent) {
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ path = parent;
+ }
+
+ return path_equal_ptr(path, p->runtime_config) ||
+ path_equal_ptr(path, p->generator) ||
+ path_equal_ptr(path, p->generator_early) ||
+ path_equal_ptr(path, p->generator_late) ||
+ path_equal_ptr(path, p->transient) ||
+ path_equal_ptr(path, p->runtime_control);
+}
+
+static int path_is_vendor(const LookupPaths *p, const char *path) {
+ const char *rpath;
+
+ assert(p);
+ assert(path);
+
+ rpath = skip_root(p, path);
+ if (!rpath)
+ return 0;
+
+ if (path_startswith(rpath, "/usr"))
+ return true;
+
+#if HAVE_SPLIT_USR
+ if (path_startswith(rpath, "/lib"))
+ return true;
+#endif
+
+ return path_equal(rpath, SYSTEM_DATA_UNIT_PATH);
+}
+
+int unit_file_changes_add(
+ UnitFileChange **changes,
+ size_t *n_changes,
+ UnitFileChangeType type,
+ const char *path,
+ const char *source) {
+
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ UnitFileChange *c;
+
+ assert(path);
+ assert(!changes == !n_changes);
+
+ if (!changes)
+ return 0;
+
+ c = reallocarray(*changes, *n_changes + 1, sizeof(UnitFileChange));
+ if (!c)
+ return -ENOMEM;
+ *changes = c;
+
+ p = strdup(path);
+ if (source)
+ s = strdup(source);
+
+ if (!p || (source && !s))
+ return -ENOMEM;
+
+ path_simplify(p, false);
+ if (s)
+ path_simplify(s, false);
+
+ c[*n_changes] = (UnitFileChange) { type, p, s };
+ p = s = NULL;
+ (*n_changes) ++;
+ return 0;
+}
+
+void unit_file_changes_free(UnitFileChange *changes, size_t n_changes) {
+ size_t i;
+
+ assert(changes || n_changes == 0);
+
+ for (i = 0; i < n_changes; i++) {
+ free(changes[i].path);
+ free(changes[i].source);
+ }
+
+ free(changes);
+}
+
+void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet) {
+ size_t i;
+ bool logged = false;
+
+ assert(changes || n_changes == 0);
+ /* If verb is not specified, errors are not allowed! */
+ assert(verb || r >= 0);
+
+ for (i = 0; i < n_changes; i++) {
+ assert(verb || changes[i].type >= 0);
+
+ switch(changes[i].type) {
+ case UNIT_FILE_SYMLINK:
+ if (!quiet)
+ log_info("Created symlink %s %s %s.",
+ changes[i].path,
+ special_glyph(SPECIAL_GLYPH_ARROW),
+ changes[i].source);
+ break;
+ case UNIT_FILE_UNLINK:
+ if (!quiet)
+ log_info("Removed %s.", changes[i].path);
+ break;
+ case UNIT_FILE_IS_MASKED:
+ if (!quiet)
+ log_info("Unit %s is masked, ignoring.", changes[i].path);
+ break;
+ case UNIT_FILE_IS_DANGLING:
+ if (!quiet)
+ log_info("Unit %s is an alias to a unit that is not present, ignoring.",
+ changes[i].path);
+ break;
+ case -EEXIST:
+ if (changes[i].source)
+ log_error_errno(changes[i].type,
+ "Failed to %s unit, file %s already exists and is a symlink to %s.",
+ verb, changes[i].path, changes[i].source);
+ else
+ log_error_errno(changes[i].type,
+ "Failed to %s unit, file %s already exists.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -ERFKILL:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s is masked.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -EADDRNOTAVAIL:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s is transient or generated.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -ELOOP:
+ log_error_errno(changes[i].type, "Failed to %s unit, refusing to operate on linked unit file %s",
+ verb, changes[i].path);
+ logged = true;
+ break;
+
+ case -ENOENT:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s does not exist.", verb, changes[i].path);
+ logged = true;
+ break;
+
+ default:
+ assert(changes[i].type < 0);
+ log_error_errno(changes[i].type, "Failed to %s unit, file %s: %m.",
+ verb, changes[i].path);
+ logged = true;
+ }
+ }
+
+ if (r < 0 && !logged)
+ log_error_errno(r, "Failed to %s: %m.", verb);
+}
+
+/**
+ * Checks if two paths or symlinks from wd are the same, when root is the root of the filesystem.
+ * wc should be the full path in the host file system.
+ */
+static bool chroot_symlinks_same(const char *root, const char *wd, const char *a, const char *b) {
+ assert(path_is_absolute(wd));
+
+ /* This will give incorrect results if the paths are relative and go outside
+ * of the chroot. False negatives are possible. */
+
+ if (!root)
+ root = "/";
+
+ a = strjoina(path_is_absolute(a) ? root : wd, "/", a);
+ b = strjoina(path_is_absolute(b) ? root : wd, "/", b);
+ return path_equal_or_files_same(a, b, 0);
+}
+
+static int create_symlink(
+ const LookupPaths *paths,
+ const char *old_path,
+ const char *new_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *dest = NULL, *dirname = NULL;
+ const char *rp;
+ int r;
+
+ assert(old_path);
+ assert(new_path);
+
+ rp = skip_root(paths, old_path);
+ if (rp)
+ old_path = rp;
+
+ /* Actually create a symlink, and remember that we did. Is
+ * smart enough to check if there's already a valid symlink in
+ * place.
+ *
+ * Returns 1 if a symlink was created or already exists and points to
+ * the right place, or negative on error.
+ */
+
+ mkdir_parents_label(new_path, 0755);
+
+ if (symlink(old_path, new_path) >= 0) {
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path);
+ return 1;
+ }
+
+ if (errno != EEXIST) {
+ unit_file_changes_add(changes, n_changes, -errno, new_path, NULL);
+ return -errno;
+ }
+
+ r = readlink_malloc(new_path, &dest);
+ if (r < 0) {
+ /* translate EINVAL (non-symlink exists) to EEXIST */
+ if (r == -EINVAL)
+ r = -EEXIST;
+
+ unit_file_changes_add(changes, n_changes, r, new_path, NULL);
+ return r;
+ }
+
+ dirname = dirname_malloc(new_path);
+ if (!dirname)
+ return -ENOMEM;
+
+ if (chroot_symlinks_same(paths->root_dir, dirname, dest, old_path)) {
+ log_debug("Symlink %s → %s already exists", new_path, dest);
+ return 1;
+ }
+
+ if (!force) {
+ unit_file_changes_add(changes, n_changes, -EEXIST, new_path, dest);
+ return -EEXIST;
+ }
+
+ r = symlink_atomic(old_path, new_path);
+ if (r < 0) {
+ unit_file_changes_add(changes, n_changes, r, new_path, NULL);
+ return r;
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, new_path, NULL);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path);
+
+ return 1;
+}
+
+static int mark_symlink_for_removal(
+ Set **remove_symlinks_to,
+ const char *p) {
+
+ char *n;
+ int r;
+
+ assert(p);
+
+ r = set_ensure_allocated(remove_symlinks_to, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ n = strdup(p);
+ if (!n)
+ return -ENOMEM;
+
+ path_simplify(n, false);
+
+ r = set_consume(*remove_symlinks_to, n);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int remove_marked_symlinks_fd(
+ Set *remove_symlinks_to,
+ int fd,
+ const char *path,
+ const char *config_path,
+ const LookupPaths *lp,
+ bool dry_run,
+ bool *restart,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(remove_symlinks_to);
+ assert(fd >= 0);
+ assert(path);
+ assert(config_path);
+ assert(lp);
+ assert(restart);
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ rewinddir(d);
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ dirent_ensure_type(d, de);
+
+ if (de->d_type == DT_DIR) {
+ _cleanup_free_ char *p = NULL;
+ int nfd, q;
+
+ nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (nfd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ if (r == 0)
+ r = -errno;
+ continue;
+ }
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p) {
+ safe_close(nfd);
+ return -ENOMEM;
+ }
+
+ /* This will close nfd, regardless whether it succeeds or not */
+ q = remove_marked_symlinks_fd(remove_symlinks_to, nfd, p, config_path, lp, dry_run, restart, changes, n_changes);
+ if (q < 0 && r == 0)
+ r = q;
+
+ } else if (de->d_type == DT_LNK) {
+ _cleanup_free_ char *p = NULL, *dest = NULL;
+ const char *rp;
+ bool found;
+ int q;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p)
+ return -ENOMEM;
+ path_simplify(p, false);
+
+ q = readlink_malloc(p, &dest);
+ if (q == -ENOENT)
+ continue;
+ if (q < 0) {
+ if (r == 0)
+ r = q;
+ continue;
+ }
+
+ /* We remove all links pointing to a file or path that is marked, as well as all files sharing
+ * the same name as a file that is marked. */
+
+ found = set_contains(remove_symlinks_to, dest) ||
+ set_contains(remove_symlinks_to, basename(dest)) ||
+ set_contains(remove_symlinks_to, de->d_name);
+
+ if (!found)
+ continue;
+
+ if (!dry_run) {
+ if (unlinkat(fd, de->d_name, 0) < 0 && errno != ENOENT) {
+ if (r == 0)
+ r = -errno;
+ unit_file_changes_add(changes, n_changes, -errno, p, NULL);
+ continue;
+ }
+
+ (void) rmdir_parents(p, config_path);
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, p, NULL);
+
+ /* Now, remember the full path (but with the root prefix removed) of
+ * the symlink we just removed, and remove any symlinks to it, too. */
+
+ rp = skip_root(lp, p);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: p);
+ if (q < 0)
+ return q;
+ if (q > 0 && !dry_run)
+ *restart = true;
+ }
+ }
+
+ return r;
+}
+
+static int remove_marked_symlinks(
+ Set *remove_symlinks_to,
+ const char *config_path,
+ const LookupPaths *lp,
+ bool dry_run,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_close_ int fd = -1;
+ bool restart;
+ int r = 0;
+
+ assert(config_path);
+ assert(lp);
+
+ if (set_size(remove_symlinks_to) <= 0)
+ return 0;
+
+ fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC);
+ if (fd < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ do {
+ int q, cfd;
+ restart = false;
+
+ cfd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (cfd < 0)
+ return -errno;
+
+ /* This takes possession of cfd and closes it */
+ q = remove_marked_symlinks_fd(remove_symlinks_to, cfd, config_path, config_path, lp, dry_run, &restart, changes, n_changes);
+ if (r == 0)
+ r = q;
+ } while (restart);
+
+ return r;
+}
+
+static int is_symlink_with_known_name(const UnitFileInstallInfo *i, const char *name) {
+ int r;
+
+ if (streq(name, i->name))
+ return true;
+
+ if (strv_contains(i->aliases, name))
+ return true;
+
+ /* Look for template symlink matching DefaultInstance */
+ if (i->default_instance && unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = unit_name_replace_instance(i->name, i->default_instance, &s);
+ if (r < 0) {
+ if (r != -EINVAL)
+ return r;
+
+ } else if (streq(name, s))
+ return true;
+ }
+
+ return false;
+}
+
+static int find_symlinks_fd(
+ const char *root_dir,
+ const UnitFileInstallInfo *i,
+ bool match_aliases,
+ bool ignore_same_name,
+ int fd,
+ const char *path,
+ const char *config_path,
+ bool *same_name_link) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(i);
+ assert(fd >= 0);
+ assert(path);
+ assert(config_path);
+ assert(same_name_link);
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ dirent_ensure_type(d, de);
+
+ if (de->d_type == DT_DIR) {
+ _cleanup_free_ char *p = NULL;
+ int nfd, q;
+
+ nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (nfd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ if (r == 0)
+ r = -errno;
+ continue;
+ }
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p) {
+ safe_close(nfd);
+ return -ENOMEM;
+ }
+
+ /* This will close nfd, regardless whether it succeeds or not */
+ q = find_symlinks_fd(root_dir, i, match_aliases, ignore_same_name, nfd,
+ p, config_path, same_name_link);
+ if (q > 0)
+ return 1;
+ if (r == 0)
+ r = q;
+
+ } else if (de->d_type == DT_LNK) {
+ _cleanup_free_ char *p = NULL, *dest = NULL;
+ bool found_path = false, found_dest, b = false;
+ int q;
+
+ /* Acquire symlink name */
+ p = path_make_absolute(de->d_name, path);
+ if (!p)
+ return -ENOMEM;
+
+ /* Acquire symlink destination */
+ q = readlink_malloc(p, &dest);
+ if (q == -ENOENT)
+ continue;
+ if (q < 0) {
+ if (r == 0)
+ r = q;
+ continue;
+ }
+
+ /* Make absolute */
+ if (!path_is_absolute(dest)) {
+ char *x;
+
+ x = prefix_root(root_dir, dest);
+ if (!x)
+ return -ENOMEM;
+
+ free_and_replace(dest, x);
+ }
+
+ assert(unit_name_is_valid(i->name, UNIT_NAME_ANY));
+ if (!ignore_same_name)
+ /* Check if the symlink itself matches what we are looking for.
+ *
+ * If ignore_same_name is specified, we are in one of the directories which
+ * have lower priority than the unit file, and even if a file or symlink with
+ * this name was found, we should ignore it. */
+ found_path = streq(de->d_name, i->name);
+
+ /* Check if what the symlink points to matches what we are looking for */
+ found_dest = streq(basename(dest), i->name);
+
+ if (found_path && found_dest) {
+ _cleanup_free_ char *t = NULL;
+
+ /* Filter out same name links in the main
+ * config path */
+ t = path_make_absolute(i->name, config_path);
+ if (!t)
+ return -ENOMEM;
+
+ b = path_equal(t, p);
+ }
+
+ if (b)
+ *same_name_link = true;
+ else if (found_path || found_dest) {
+ if (!match_aliases)
+ return 1;
+
+ /* Check if symlink name is in the set of names used by [Install] */
+ q = is_symlink_with_known_name(i, de->d_name);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return 1;
+ }
+ }
+ }
+
+ return r;
+}
+
+static int find_symlinks(
+ const char *root_dir,
+ const UnitFileInstallInfo *i,
+ bool match_name,
+ bool ignore_same_name,
+ const char *config_path,
+ bool *same_name_link) {
+
+ int fd;
+
+ assert(i);
+ assert(config_path);
+ assert(same_name_link);
+
+ fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC);
+ if (fd < 0) {
+ if (IN_SET(errno, ENOENT, ENOTDIR, EACCES))
+ return 0;
+ return -errno;
+ }
+
+ /* This takes possession of fd and closes it */
+ return find_symlinks_fd(root_dir, i, match_name, ignore_same_name, fd,
+ config_path, config_path, same_name_link);
+}
+
+static int find_symlinks_in_scope(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const UnitFileInstallInfo *i,
+ bool match_name,
+ UnitFileState *state) {
+
+ bool same_name_link_runtime = false, same_name_link_config = false;
+ bool enabled_in_runtime = false, enabled_at_all = false;
+ bool ignore_same_name = false;
+ char **p;
+ int r;
+
+ assert(paths);
+ assert(i);
+
+ /* As we iterate over the list of search paths in paths->search_path, we may encounter "same name"
+ * symlinks. The ones which are "below" (i.e. have lower priority) than the unit file itself are
+ * efectively masked, so we should ignore them. */
+
+ STRV_FOREACH(p, paths->search_path) {
+ bool same_name_link = false;
+
+ r = find_symlinks(paths->root_dir, i, match_name, ignore_same_name, *p, &same_name_link);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* We found symlinks in this dir? Yay! Let's see where precisely it is enabled. */
+
+ if (path_equal_ptr(*p, paths->persistent_config)) {
+ /* This is the best outcome, let's return it immediately. */
+ *state = UNIT_FILE_ENABLED;
+ return 1;
+ }
+
+ /* look for global enablement of user units */
+ if (scope == UNIT_FILE_USER && path_is_user_config_dir(*p)) {
+ *state = UNIT_FILE_ENABLED;
+ return 1;
+ }
+
+ r = path_is_runtime(paths, *p, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ enabled_in_runtime = true;
+ else
+ enabled_at_all = true;
+
+ } else if (same_name_link) {
+ if (path_equal_ptr(*p, paths->persistent_config))
+ same_name_link_config = true;
+ else {
+ r = path_is_runtime(paths, *p, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ same_name_link_runtime = true;
+ }
+ }
+
+ /* Check if next iteration will be "below" the unit file (either a regular file
+ * or a symlink), and hence should be ignored */
+ if (!ignore_same_name && path_startswith(i->path, *p))
+ ignore_same_name = true;
+ }
+
+ if (enabled_in_runtime) {
+ *state = UNIT_FILE_ENABLED_RUNTIME;
+ return 1;
+ }
+
+ /* Here's a special rule: if the unit we are looking for is an instance, and it symlinked in the search path
+ * outside of runtime and configuration directory, then we consider it statically enabled. Note we do that only
+ * for instance, not for regular names, as those are merely aliases, while instances explicitly instantiate
+ * something, and hence are a much stronger concept. */
+ if (enabled_at_all && unit_name_is_valid(i->name, UNIT_NAME_INSTANCE)) {
+ *state = UNIT_FILE_STATIC;
+ return 1;
+ }
+
+ /* Hmm, we didn't find it, but maybe we found the same name
+ * link? */
+ if (same_name_link_config) {
+ *state = UNIT_FILE_LINKED;
+ return 1;
+ }
+ if (same_name_link_runtime) {
+ *state = UNIT_FILE_LINKED_RUNTIME;
+ return 1;
+ }
+
+ return 0;
+}
+
+static void install_info_free(UnitFileInstallInfo *i) {
+
+ if (!i)
+ return;
+
+ free(i->name);
+ free(i->path);
+ strv_free(i->aliases);
+ strv_free(i->wanted_by);
+ strv_free(i->required_by);
+ strv_free(i->also);
+ free(i->default_instance);
+ free(i->symlink_target);
+ free(i);
+}
+
+static void install_context_done(InstallContext *c) {
+ assert(c);
+
+ c->will_process = ordered_hashmap_free_with_destructor(c->will_process, install_info_free);
+ c->have_processed = ordered_hashmap_free_with_destructor(c->have_processed, install_info_free);
+}
+
+static UnitFileInstallInfo *install_info_find(InstallContext *c, const char *name) {
+ UnitFileInstallInfo *i;
+
+ i = ordered_hashmap_get(c->have_processed, name);
+ if (i)
+ return i;
+
+ return ordered_hashmap_get(c->will_process, name);
+}
+
+static int install_info_may_process(
+ const UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+ assert(i);
+ assert(paths);
+
+ /* Checks whether the loaded unit file is one we should process, or is masked,
+ * transient or generated and thus not subject to enable/disable operations. */
+
+ if (i->type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, -ERFKILL, i->path, NULL);
+ return -ERFKILL;
+ }
+ if (path_is_generator(paths, i->path) ||
+ path_is_transient(paths, i->path)) {
+ unit_file_changes_add(changes, n_changes, -EADDRNOTAVAIL, i->path, NULL);
+ return -EADDRNOTAVAIL;
+ }
+
+ return 0;
+}
+
+/**
+ * Adds a new UnitFileInstallInfo entry under name in the InstallContext.will_process
+ * hashmap, or retrieves the existing one if already present.
+ *
+ * Returns negative on error, 0 if the unit was already known, 1 otherwise.
+ */
+static int install_info_add(
+ InstallContext *c,
+ const char *name,
+ const char *path,
+ bool auxiliary,
+ UnitFileInstallInfo **ret) {
+
+ UnitFileInstallInfo *i = NULL;
+ int r;
+
+ assert(c);
+ assert(name || path);
+
+ if (!name)
+ name = basename(path);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ i = install_info_find(c, name);
+ if (i) {
+ i->auxiliary = i->auxiliary && auxiliary;
+
+ if (ret)
+ *ret = i;
+ return 0;
+ }
+
+ r = ordered_hashmap_ensure_allocated(&c->will_process, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ i = new(UnitFileInstallInfo, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (UnitFileInstallInfo) {
+ .type = _UNIT_FILE_TYPE_INVALID,
+ .auxiliary = auxiliary,
+ };
+
+ i->name = strdup(name);
+ if (!i->name) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (path) {
+ i->path = strdup(path);
+ if (!i->path) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ r = ordered_hashmap_put(c->will_process, i->name, i);
+ if (r < 0)
+ goto fail;
+
+ if (ret)
+ *ret = i;
+
+ return 1;
+
+fail:
+ install_info_free(i);
+ return r;
+}
+
+static int config_parse_alias(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitType type;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ type = unit_name_to_type(unit);
+ if (!unit_type_may_alias(type))
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Alias= is not allowed for %s units, ignoring.",
+ unit_type_to_string(type));
+
+ return config_parse_strv(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+}
+
+static int config_parse_also(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitFileInstallInfo *info = userdata, *alsoinfo = NULL;
+ InstallContext *c = data;
+ int r;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *printed = NULL;
+
+ r = extract_first_word(&rvalue, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = install_full_printf(info, word, &printed);
+ if (r < 0)
+ return r;
+
+ if (!unit_name_is_valid(printed, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_add(c, printed, NULL, true, &alsoinfo);
+ if (r < 0)
+ return r;
+
+ r = strv_push(&info->also, printed);
+ if (r < 0)
+ return r;
+
+ printed = NULL;
+ }
+
+ return 0;
+}
+
+static int config_parse_default_instance(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitFileInstallInfo *i = data;
+ _cleanup_free_ char *printed = NULL;
+ int r;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (unit_name_is_valid(unit, UNIT_NAME_INSTANCE))
+ /* When enabling an instance, we might be using a template unit file,
+ * but we should ignore DefaultInstance silently. */
+ return 0;
+ if (!unit_name_is_valid(unit, UNIT_NAME_TEMPLATE))
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "DefaultInstance= only makes sense for template units, ignoring.");
+
+ r = install_full_printf(i, rvalue, &printed);
+ if (r < 0)
+ return r;
+
+ if (!unit_instance_is_valid(printed))
+ return -EINVAL;
+
+ return free_and_replace(i->default_instance, printed);
+}
+
+static int unit_file_load(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const char *path,
+ const char *root_dir,
+ SearchFlags flags) {
+
+ const ConfigTableItem items[] = {
+ { "Install", "Alias", config_parse_alias, 0, &info->aliases },
+ { "Install", "WantedBy", config_parse_strv, 0, &info->wanted_by },
+ { "Install", "RequiredBy", config_parse_strv, 0, &info->required_by },
+ { "Install", "DefaultInstance", config_parse_default_instance, 0, info },
+ { "Install", "Also", config_parse_also, 0, c },
+ {}
+ };
+
+ UnitType type;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int r;
+
+ assert(info);
+ assert(path);
+
+ if (!(flags & SEARCH_DROPIN)) {
+ /* Loading or checking for the main unit file… */
+
+ type = unit_name_to_type(info->name);
+ if (type < 0)
+ return -EINVAL;
+ if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE|UNIT_NAME_INSTANCE) && !unit_type_may_template(type))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unit type %s cannot be templated.", unit_type_to_string(type));
+
+ if (!(flags & SEARCH_LOAD)) {
+ r = lstat(path, &st);
+ if (r < 0)
+ return -errno;
+
+ if (null_or_empty(&st))
+ info->type = UNIT_FILE_TYPE_MASKED;
+ else if (S_ISREG(st.st_mode))
+ info->type = UNIT_FILE_TYPE_REGULAR;
+ else if (S_ISLNK(st.st_mode))
+ return -ELOOP;
+ else if (S_ISDIR(st.st_mode))
+ return -EISDIR;
+ else
+ return -ENOTTY;
+
+ return 0;
+ }
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+ } else {
+ /* Operating on a drop-in file. If we aren't supposed to load the unit file drop-ins don't matter, let's hence shortcut this. */
+
+ if (!(flags & SEARCH_LOAD))
+ return 0;
+
+ fd = chase_symlinks_and_open(path, root_dir, 0, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
+ if (fd < 0)
+ return fd;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (null_or_empty(&st)) {
+ if ((flags & SEARCH_DROPIN) == 0)
+ info->type = UNIT_FILE_TYPE_MASKED;
+
+ return 0;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return -errno;
+ fd = -1;
+
+ /* c is only needed if we actually load the file (it's referenced from items[] btw, in case you wonder.) */
+ assert(c);
+
+ r = config_parse(info->name, path, f,
+ NULL,
+ config_item_table_lookup, items,
+ CONFIG_PARSE_RELAXED|CONFIG_PARSE_ALLOW_INCLUDE, info);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse %s: %m", info->name);
+
+ if ((flags & SEARCH_DROPIN) == 0)
+ info->type = UNIT_FILE_TYPE_REGULAR;
+
+ return
+ (int) strv_length(info->aliases) +
+ (int) strv_length(info->wanted_by) +
+ (int) strv_length(info->required_by);
+}
+
+static int unit_file_load_or_readlink(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const char *path,
+ const char *root_dir,
+ SearchFlags flags) {
+
+ _cleanup_free_ char *target = NULL;
+ int r;
+
+ r = unit_file_load(c, info, path, root_dir, flags);
+ if (r != -ELOOP || (flags & SEARCH_DROPIN))
+ return r;
+
+ /* This is a symlink, let's read it. */
+
+ r = readlink_malloc(path, &target);
+ if (r < 0)
+ return r;
+
+ if (path_equal(target, "/dev/null"))
+ info->type = UNIT_FILE_TYPE_MASKED;
+ else {
+ const char *bn;
+ UnitType a, b;
+
+ bn = basename(target);
+
+ if (unit_name_is_valid(info->name, UNIT_NAME_PLAIN)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ } else if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+
+ } else if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ /* Enforce that the symlink destination does not
+ * change the unit file type. */
+
+ a = unit_name_to_type(info->name);
+ b = unit_name_to_type(bn);
+ if (a < 0 || b < 0 || a != b)
+ return -EINVAL;
+
+ if (path_is_absolute(target))
+ /* This is an absolute path, prefix the root so that we always deal with fully qualified paths */
+ info->symlink_target = prefix_root(root_dir, target);
+ else
+ /* This is a relative path, take it relative to the dir the symlink is located in. */
+ info->symlink_target = file_in_same_dir(path, target);
+ if (!info->symlink_target)
+ return -ENOMEM;
+
+ info->type = UNIT_FILE_TYPE_SYMLINK;
+ }
+
+ return 0;
+}
+
+static int unit_file_search(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const LookupPaths *paths,
+ SearchFlags flags) {
+
+ const char *dropin_dir_name = NULL, *dropin_template_dir_name = NULL;
+ _cleanup_strv_free_ char **dirs = NULL, **files = NULL;
+ _cleanup_free_ char *template = NULL;
+ bool found_unit = false;
+ int r, result;
+ char **p;
+
+ assert(info);
+ assert(paths);
+
+ /* Was this unit already loaded? */
+ if (info->type != _UNIT_FILE_TYPE_INVALID)
+ return 0;
+
+ if (info->path)
+ return unit_file_load_or_readlink(c, info, info->path, paths->root_dir, flags);
+
+ assert(info->name);
+
+ if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) {
+ r = unit_name_template(info->name, &template);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(p, paths->search_path) {
+ _cleanup_free_ char *path = NULL;
+
+ path = strjoin(*p, "/", info->name);
+ if (!path)
+ return -ENOMEM;
+
+ r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags);
+ if (r >= 0) {
+ info->path = TAKE_PTR(path);
+ result = r;
+ found_unit = true;
+ break;
+ } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES))
+ return r;
+ }
+
+ if (!found_unit && template) {
+
+ /* Unit file doesn't exist, however instance
+ * enablement was requested. We will check if it is
+ * possible to load template unit file. */
+
+ STRV_FOREACH(p, paths->search_path) {
+ _cleanup_free_ char *path = NULL;
+
+ path = strjoin(*p, "/", template);
+ if (!path)
+ return -ENOMEM;
+
+ r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags);
+ if (r >= 0) {
+ info->path = TAKE_PTR(path);
+ result = r;
+ found_unit = true;
+ break;
+ } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES))
+ return r;
+ }
+ }
+
+ if (!found_unit)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Cannot find unit %s%s%s.",
+ info->name, template ? " or " : "", strempty(template));
+
+ if (info->type == UNIT_FILE_TYPE_MASKED)
+ return result;
+
+ /* Search for drop-in directories */
+
+ dropin_dir_name = strjoina(info->name, ".d");
+ STRV_FOREACH(p, paths->search_path) {
+ char *path;
+
+ path = path_join(*p, dropin_dir_name);
+ if (!path)
+ return -ENOMEM;
+
+ r = strv_consume(&dirs, path);
+ if (r < 0)
+ return r;
+ }
+
+ if (template) {
+ dropin_template_dir_name = strjoina(template, ".d");
+ STRV_FOREACH(p, paths->search_path) {
+ char *path;
+
+ path = path_join(*p, dropin_template_dir_name);
+ if (!path)
+ return -ENOMEM;
+
+ r = strv_consume(&dirs, path);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Load drop-in conf files */
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char**) dirs);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get list of conf files: %m");
+
+ STRV_FOREACH(p, files) {
+ r = unit_file_load_or_readlink(c, info, *p, paths->root_dir, flags | SEARCH_DROPIN);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load conf file %s: %m", *p);
+ }
+
+ return result;
+}
+
+static int install_info_follow(
+ InstallContext *c,
+ UnitFileInstallInfo *i,
+ const char *root_dir,
+ SearchFlags flags,
+ bool ignore_different_name) {
+
+ assert(c);
+ assert(i);
+
+ if (i->type != UNIT_FILE_TYPE_SYMLINK)
+ return -EINVAL;
+ if (!i->symlink_target)
+ return -EINVAL;
+
+ /* If the basename doesn't match, the caller should add a
+ * complete new entry for this. */
+
+ if (!ignore_different_name && !streq(basename(i->symlink_target), i->name))
+ return -EXDEV;
+
+ free_and_replace(i->path, i->symlink_target);
+ i->type = _UNIT_FILE_TYPE_INVALID;
+
+ return unit_file_load_or_readlink(c, i, i->path, root_dir, flags);
+}
+
+/**
+ * Search for the unit file. If the unit name is a symlink, follow the symlink to the
+ * target, maybe more than once. Propagate the instance name if present.
+ */
+static int install_info_traverse(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ UnitFileInstallInfo *start,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret) {
+
+ UnitFileInstallInfo *i;
+ unsigned k = 0;
+ int r;
+
+ assert(paths);
+ assert(start);
+ assert(c);
+
+ r = unit_file_search(c, start, paths, flags);
+ if (r < 0)
+ return r;
+
+ i = start;
+ while (i->type == UNIT_FILE_TYPE_SYMLINK) {
+ /* Follow the symlink */
+
+ if (++k > UNIT_FILE_FOLLOW_SYMLINK_MAX)
+ return -ELOOP;
+
+ if (!(flags & SEARCH_FOLLOW_CONFIG_SYMLINKS)) {
+ r = path_is_config(paths, i->path, true);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return -ELOOP;
+ }
+
+ r = install_info_follow(c, i, paths->root_dir, flags, false);
+ if (r == -EXDEV) {
+ _cleanup_free_ char *buffer = NULL;
+ const char *bn;
+
+ /* Target has a different name, create a new
+ * install info object for that, and continue
+ * with that. */
+
+ bn = basename(i->symlink_target);
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_INSTANCE) &&
+ unit_name_is_valid(bn, UNIT_NAME_TEMPLATE)) {
+
+ _cleanup_free_ char *instance = NULL;
+
+ r = unit_name_to_instance(i->name, &instance);
+ if (r < 0)
+ return r;
+
+ r = unit_name_replace_instance(bn, instance, &buffer);
+ if (r < 0)
+ return r;
+
+ if (streq(buffer, i->name)) {
+
+ /* We filled in the instance, and the target stayed the same? If so, then let's
+ * honour the link as it is. */
+
+ r = install_info_follow(c, i, paths->root_dir, flags, true);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ bn = buffer;
+ }
+
+ r = install_info_add(c, bn, NULL, false, &i);
+ if (r < 0)
+ return r;
+
+ /* Try again, with the new target we found. */
+ r = unit_file_search(c, i, paths, flags);
+ if (r == -ENOENT)
+ /* Translate error code to highlight this specific case */
+ return -ENOLINK;
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ if (ret)
+ *ret = i;
+
+ return 0;
+}
+
+/**
+ * Call install_info_add() with name_or_path as the path (if name_or_path starts with "/")
+ * or the name (otherwise). root_dir is prepended to the path.
+ */
+static int install_info_add_auto(
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name_or_path,
+ UnitFileInstallInfo **ret) {
+
+ assert(c);
+ assert(name_or_path);
+
+ if (path_is_absolute(name_or_path)) {
+ const char *pp;
+
+ pp = prefix_roota(paths->root_dir, name_or_path);
+
+ return install_info_add(c, NULL, pp, false, ret);
+ } else
+ return install_info_add(c, name_or_path, NULL, false, ret);
+}
+
+static int install_info_discover(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(name);
+
+ r = install_info_add_auto(c, paths, name, &i);
+ if (r >= 0)
+ r = install_info_traverse(scope, c, paths, i, flags, ret);
+
+ if (r < 0)
+ unit_file_changes_add(changes, n_changes, r, name, NULL);
+ return r;
+}
+
+static int install_info_discover_and_check(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r;
+
+ r = install_info_discover(scope, c, paths, name, flags, ret, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ return install_info_may_process(ret ? *ret : NULL, paths, changes, n_changes);
+}
+
+static int install_info_symlink_alias(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ char **s;
+ int r = 0, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ STRV_FOREACH(s, i->aliases) {
+ _cleanup_free_ char *alias_path = NULL, *dst = NULL;
+
+ q = install_full_printf(i, *s, &dst);
+ if (q < 0)
+ return q;
+
+ alias_path = path_make_absolute(dst, config_path);
+ if (!alias_path)
+ return -ENOMEM;
+
+ q = create_symlink(paths, i->path, alias_path, force, changes, n_changes);
+ if (r == 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int install_info_symlink_wants(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ char **list,
+ const char *suffix,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *buf = NULL;
+ const char *n;
+ char **s;
+ int r = 0, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ if (strv_isempty(list))
+ return 0;
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) {
+ UnitFileInstallInfo instance = {
+ .type = _UNIT_FILE_TYPE_INVALID,
+ };
+ _cleanup_free_ char *path = NULL;
+
+ /* If this is a template, and we have no instance, don't do anything */
+ if (!i->default_instance)
+ return 1;
+
+ r = unit_name_replace_instance(i->name, i->default_instance, &buf);
+ if (r < 0)
+ return r;
+
+ instance.name = buf;
+ r = unit_file_search(NULL, &instance, paths, SEARCH_FOLLOW_CONFIG_SYMLINKS);
+ if (r < 0)
+ return r;
+
+ path = TAKE_PTR(instance.path);
+
+ if (instance.type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, -ERFKILL, path, NULL);
+ return -ERFKILL;
+ }
+
+ n = buf;
+ } else
+ n = i->name;
+
+ STRV_FOREACH(s, list) {
+ _cleanup_free_ char *path = NULL, *dst = NULL;
+
+ q = install_full_printf(i, *s, &dst);
+ if (q < 0)
+ return q;
+
+ if (!unit_name_is_valid(dst, UNIT_NAME_ANY)) {
+ r = -EINVAL;
+ continue;
+ }
+
+ path = strjoin(config_path, "/", dst, suffix, n);
+ if (!path)
+ return -ENOMEM;
+
+ q = create_symlink(paths, i->path, path, true, changes, n_changes);
+ if (r == 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int install_info_symlink_link(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+ assert(i->path);
+
+ r = in_search_path(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ path = strjoin(config_path, "/", i->name);
+ if (!path)
+ return -ENOMEM;
+
+ return create_symlink(paths, i->path, path, force, changes, n_changes);
+}
+
+static int install_info_apply(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ if (i->type != UNIT_FILE_TYPE_REGULAR)
+ return 0;
+
+ r = install_info_symlink_alias(i, paths, config_path, force, changes, n_changes);
+
+ q = install_info_symlink_wants(i, paths, config_path, i->wanted_by, ".wants/", changes, n_changes);
+ if (r == 0)
+ r = q;
+
+ q = install_info_symlink_wants(i, paths, config_path, i->required_by, ".requires/", changes, n_changes);
+ if (r == 0)
+ r = q;
+
+ q = install_info_symlink_link(i, paths, config_path, force, changes, n_changes);
+ /* Do not count links to the unit file towards the "carries_install_info" count */
+ if (r == 0 && q < 0)
+ r = q;
+
+ return r;
+}
+
+static int install_context_apply(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ SearchFlags flags,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(config_path);
+
+ if (ordered_hashmap_isempty(c->will_process))
+ return 0;
+
+ r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = 0;
+ while ((i = ordered_hashmap_first(c->will_process))) {
+ int q;
+
+ q = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name);
+ if (q < 0)
+ return q;
+
+ q = install_info_traverse(scope, c, paths, i, flags, NULL);
+ if (q < 0) {
+ unit_file_changes_add(changes, n_changes, r, i->name, NULL);
+ return q;
+ }
+
+ /* We can attempt to process a masked unit when a different unit
+ * that we were processing specifies it in Also=. */
+ if (i->type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path, NULL);
+ if (r >= 0)
+ /* Assume that something *could* have been enabled here,
+ * avoid "empty [Install] section" warning. */
+ r += 1;
+ continue;
+ }
+
+ if (i->type != UNIT_FILE_TYPE_REGULAR)
+ continue;
+
+ q = install_info_apply(i, paths, config_path, force, changes, n_changes);
+ if (r >= 0) {
+ if (q < 0)
+ r = q;
+ else
+ r += q;
+ }
+ }
+
+ return r;
+}
+
+static int install_context_mark_for_removal(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ Set **remove_symlinks_to,
+ const char *config_path,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(config_path);
+
+ /* Marks all items for removal */
+
+ if (ordered_hashmap_isempty(c->will_process))
+ return 0;
+
+ r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ while ((i = ordered_hashmap_first(c->will_process))) {
+
+ r = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name);
+ if (r < 0)
+ return r;
+
+ r = install_info_traverse(scope, c, paths, i, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, NULL);
+ if (r == -ENOLINK) {
+ log_debug_errno(r, "Name %s leads to a dangling symlink, removing name.", i->name);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_DANGLING, i->path ?: i->name, NULL);
+ } else if (r == -ENOENT) {
+
+ if (i->auxiliary) /* some unit specified in Also= or similar is missing */
+ log_debug_errno(r, "Auxiliary unit of %s not found, removing name.", i->name);
+ else {
+ log_debug_errno(r, "Unit %s not found, removing name.", i->name);
+ unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL);
+ }
+
+ } else if (r < 0) {
+ log_debug_errno(r, "Failed to find unit %s, removing name: %m", i->name);
+ unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL);
+ } else if (i->type == UNIT_FILE_TYPE_MASKED) {
+ log_debug("Unit file %s is masked, ignoring.", i->name);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path ?: i->name, NULL);
+ continue;
+ } else if (i->type != UNIT_FILE_TYPE_REGULAR) {
+ log_debug("Unit %s has type %s, ignoring.", i->name, unit_file_type_to_string(i->type) ?: "invalid");
+ continue;
+ }
+
+ r = mark_symlink_for_removal(remove_symlinks_to, i->name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int unit_file_mask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *path = NULL;
+ int q;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) {
+ if (r == 0)
+ r = -EINVAL;
+ continue;
+ }
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ q = create_symlink(&paths, "/dev/null", path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+int unit_file_unmask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ const char *config_path;
+ char **i;
+ bool dry_run;
+ int r, q;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ dry_run = !!(flags & UNIT_FILE_DRY_RUN);
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *path = NULL;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ r = null_or_empty_path(path);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo] = strdup(*i);
+ if (!todo[n_todo])
+ return -ENOMEM;
+
+ n_todo++;
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_free_ char *path = NULL;
+ const char *rp;
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ if (!dry_run && unlink(path) < 0) {
+ if (errno != ENOENT) {
+ if (r >= 0)
+ r = -errno;
+ unit_file_changes_add(changes, n_changes, -errno, path, NULL);
+ }
+
+ continue;
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, path, NULL);
+
+ rp = skip_root(&paths, path);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: path);
+ if (q < 0)
+ return q;
+ }
+
+ q = remove_marked_symlinks(remove_symlinks_to, config_path, &paths, dry_run, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ return r;
+}
+
+int unit_file_link(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ const char *config_path;
+ char **i;
+ int r, q;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *full = NULL;
+ struct stat st;
+ char *fn;
+
+ if (!path_is_absolute(*i))
+ return -EINVAL;
+
+ fn = basename(*i);
+ if (!unit_name_is_valid(fn, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ full = prefix_root(paths.root_dir, *i);
+ if (!full)
+ return -ENOMEM;
+
+ if (lstat(full, &st) < 0)
+ return -errno;
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+
+ q = in_search_path(&paths, *i);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ continue;
+
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo] = strdup(*i);
+ if (!todo[n_todo])
+ return -ENOMEM;
+
+ n_todo++;
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_free_ char *new_path = NULL;
+
+ new_path = path_make_absolute(basename(*i), config_path);
+ if (!new_path)
+ return -ENOMEM;
+
+ q = create_symlink(&paths, *i, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int path_shall_revert(const LookupPaths *paths, const char *path) {
+ int r;
+
+ assert(paths);
+ assert(path);
+
+ /* Checks whether the path is one where the drop-in directories shall be removed. */
+
+ r = path_is_config(paths, path, true);
+ if (r != 0)
+ return r;
+
+ r = path_is_control(paths, path);
+ if (r != 0)
+ return r;
+
+ return path_is_transient(paths, path);
+}
+
+int unit_file_revert(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ char **i;
+ int r, q;
+
+ /* Puts a unit file back into vendor state. This means:
+ *
+ * a) we remove all drop-in snippets added by the user ("config"), add to transient units ("transient"), and
+ * added via "systemctl set-property" ("control"), but not if the drop-in is generated ("generated").
+ *
+ * c) if there's a vendor unit file (i.e. one in /usr) we remove any configured overriding unit files (i.e. in
+ * "config", but not in "transient" or "control" or even "generated").
+ *
+ * We remove all that in both the runtime and the persistent directories, if that applies.
+ */
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, files) {
+ bool has_vendor = false;
+ char **p;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ STRV_FOREACH(p, paths.search_path) {
+ _cleanup_free_ char *path = NULL, *dropin = NULL;
+ struct stat st;
+
+ path = path_make_absolute(*i, *p);
+ if (!path)
+ return -ENOMEM;
+
+ r = lstat(path, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISREG(st.st_mode)) {
+ /* Check if there's a vendor version */
+ r = path_is_vendor(&paths, path);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ has_vendor = true;
+ }
+
+ dropin = strappend(path, ".d");
+ if (!dropin)
+ return -ENOMEM;
+
+ r = lstat(dropin, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISDIR(st.st_mode)) {
+ /* Remove the drop-ins */
+ r = path_shall_revert(&paths, dropin);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo++] = TAKE_PTR(dropin);
+ }
+ }
+ }
+
+ if (!has_vendor)
+ continue;
+
+ /* OK, there's a vendor version, hence drop all configuration versions */
+ STRV_FOREACH(p, paths.search_path) {
+ _cleanup_free_ char *path = NULL;
+ struct stat st;
+
+ path = path_make_absolute(*i, *p);
+ if (!path)
+ return -ENOMEM;
+
+ r = lstat(path, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
+ r = path_is_config(&paths, path, true);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo++] = TAKE_PTR(path);
+ }
+ }
+ }
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_strv_free_ char **fs = NULL;
+ const char *rp;
+ char **j;
+
+ (void) get_files_in_directory(*i, &fs);
+
+ q = rm_rf(*i, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (q < 0 && q != -ENOENT && r >= 0) {
+ r = q;
+ continue;
+ }
+
+ STRV_FOREACH(j, fs) {
+ _cleanup_free_ char *t = NULL;
+
+ t = strjoin(*i, "/", *j);
+ if (!t)
+ return -ENOMEM;
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, t, NULL);
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, *i, NULL);
+
+ rp = skip_root(&paths, *i);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: *i);
+ if (q < 0)
+ return q;
+ }
+
+ q = remove_marked_symlinks(remove_symlinks_to, paths.runtime_config, &paths, false, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ q = remove_marked_symlinks(remove_symlinks_to, paths.persistent_config, &paths, false, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ return r;
+}
+
+int unit_file_add_dependency(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ const char *target,
+ UnitDependency dep,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i, *target_info;
+ const char *config_path;
+ char **f;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(target);
+
+ if (!IN_SET(dep, UNIT_WANTS, UNIT_REQUIRES))
+ return -EINVAL;
+
+ if (!unit_name_is_valid(target, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = install_info_discover_and_check(scope, &c, &paths, target, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &target_info, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(target_info->type == UNIT_FILE_TYPE_REGULAR);
+
+ STRV_FOREACH(f, files) {
+ char ***l;
+
+ r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(i->type == UNIT_FILE_TYPE_REGULAR);
+
+ /* We didn't actually load anything from the unit
+ * file, but instead just add in our new symlink to
+ * create. */
+
+ if (dep == UNIT_WANTS)
+ l = &i->wanted_by;
+ else
+ l = &i->required_by;
+
+ strv_free(*l);
+ *l = strv_new(target_info->name);
+ if (!*l)
+ return -ENOMEM;
+ }
+
+ return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_FOLLOW_CONFIG_SYMLINKS, changes, n_changes);
+}
+
+int unit_file_enable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ const char *config_path;
+ UnitFileInstallInfo *i;
+ char **f;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(f, files) {
+ r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(i->type == UNIT_FILE_TYPE_REGULAR);
+ }
+
+ /* This will return the number of symlink rules that were
+ supposed to be created, not the ones actually created. This
+ is useful to determine whether the passed files had any
+ installation data at all. */
+
+ return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_LOAD, changes, n_changes);
+}
+
+int unit_file_disable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_add(&c, *i, NULL, false, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = install_context_mark_for_removal(scope, &c, &paths, &remove_symlinks_to, config_path, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ return remove_marked_symlinks(remove_symlinks_to, config_path, &paths, !!(flags & UNIT_FILE_DRY_RUN), changes, n_changes);
+}
+
+int unit_file_reenable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ char **n;
+ int r;
+ size_t l, i;
+
+ /* First, we invoke the disable command with only the basename... */
+ l = strv_length(files);
+ n = newa(char*, l+1);
+ for (i = 0; i < l; i++)
+ n[i] = basename(files[i]);
+ n[i] = NULL;
+
+ r = unit_file_disable(scope, flags, root_dir, n, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ /* But the enable command with the full name */
+ return unit_file_enable(scope, flags, root_dir, files, changes, n_changes);
+}
+
+int unit_file_set_default(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ const char *name,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ const char *new_path;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ if (unit_name_to_type(name) != UNIT_TARGET) /* this also validates the name */
+ return -EINVAL;
+ if (streq(name, SPECIAL_DEFAULT_TARGET))
+ return -EINVAL;
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ r = install_info_discover_and_check(scope, &c, &paths, name, 0, &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ new_path = strjoina(paths.persistent_config, "/" SPECIAL_DEFAULT_TARGET);
+ return create_symlink(&paths, i->path, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+int unit_file_get_default(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **name) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ char *n;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ r = install_info_discover(scope, &c, &paths, SPECIAL_DEFAULT_TARGET, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, NULL, NULL);
+ if (r < 0)
+ return r;
+ r = install_info_may_process(i, &paths, NULL, 0);
+ if (r < 0)
+ return r;
+
+ n = strdup(i->name);
+ if (!n)
+ return -ENOMEM;
+
+ *name = n;
+ return 0;
+}
+
+int unit_file_lookup_state(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const char *name,
+ UnitFileState *ret) {
+
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ UnitFileState state;
+ int r;
+
+ assert(paths);
+ assert(name);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_discover(scope, &c, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, NULL, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to discover unit %s: %m", name);
+
+ assert(IN_SET(i->type, UNIT_FILE_TYPE_REGULAR, UNIT_FILE_TYPE_MASKED));
+ log_debug("Found unit %s at %s (%s)", name, strna(i->path),
+ i->type == UNIT_FILE_TYPE_REGULAR ? "regular file" : "mask");
+
+ /* Shortcut things, if the caller just wants to know if this unit exists. */
+ if (!ret)
+ return 0;
+
+ switch (i->type) {
+
+ case UNIT_FILE_TYPE_MASKED:
+ r = path_is_runtime(paths, i->path, true);
+ if (r < 0)
+ return r;
+
+ state = r > 0 ? UNIT_FILE_MASKED_RUNTIME : UNIT_FILE_MASKED;
+ break;
+
+ case UNIT_FILE_TYPE_REGULAR:
+ r = path_is_generator(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ state = UNIT_FILE_GENERATED;
+ break;
+ }
+
+ r = path_is_transient(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ state = UNIT_FILE_TRANSIENT;
+ break;
+ }
+
+ /* Check if any of the Alias= symlinks have been created.
+ * We ignore other aliases, and only check those that would
+ * be created by systemctl enable for this unit. */
+ r = find_symlinks_in_scope(scope, paths, i, true, &state);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ break;
+
+ /* Check if the file is known under other names. If it is,
+ * it might be in use. Report that as UNIT_FILE_INDIRECT. */
+ r = find_symlinks_in_scope(scope, paths, i, false, &state);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ state = UNIT_FILE_INDIRECT;
+ else {
+ if (unit_file_install_info_has_rules(i))
+ state = UNIT_FILE_DISABLED;
+ else if (unit_file_install_info_has_also(i))
+ state = UNIT_FILE_INDIRECT;
+ else
+ state = UNIT_FILE_STATIC;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Unexpect unit file type.");
+ }
+
+ *ret = state;
+ return 0;
+}
+
+int unit_file_get_state(
+ UnitFileScope scope,
+ const char *root_dir,
+ const char *name,
+ UnitFileState *ret) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ return unit_file_lookup_state(scope, &paths, name, ret);
+}
+
+int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name) {
+ _cleanup_(install_context_done) InstallContext c = {};
+ int r;
+
+ assert(paths);
+ assert(name);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_discover(scope, &c, paths, name, 0, NULL, NULL, NULL);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int split_pattern_into_name_and_instances(const char *pattern, char **out_unit_name, char ***out_instances) {
+ _cleanup_strv_free_ char **instances = NULL;
+ _cleanup_free_ char *unit_name = NULL;
+ int r;
+
+ assert(pattern);
+ assert(out_instances);
+ assert(out_unit_name);
+
+ r = extract_first_word(&pattern, &unit_name, NULL, 0);
+ if (r < 0)
+ return r;
+
+ /* We handle the instances logic when unit name is extracted */
+ if (pattern) {
+ /* We only create instances when a rule of templated unit
+ * is seen. A rule like enable foo@.service a b c will
+ * result in an array of (a, b, c) as instance names */
+ if (!unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+
+ instances = strv_split(pattern, WHITESPACE);
+ if (!instances)
+ return -ENOMEM;
+
+ *out_instances = TAKE_PTR(instances);
+ }
+
+ *out_unit_name = TAKE_PTR(unit_name);
+
+ return 0;
+}
+
+static int read_presets(UnitFileScope scope, const char *root_dir, Presets *presets) {
+ _cleanup_(presets_freep) Presets ps = {};
+ size_t n_allocated = 0;
+ _cleanup_strv_free_ char **files = NULL;
+ char **p;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(presets);
+
+ switch (scope) {
+ case UNIT_FILE_SYSTEM:
+ r = conf_files_list(&files, ".preset", root_dir, 0,
+ "/etc/systemd/system-preset",
+ "/run/systemd/system-preset",
+ "/usr/local/lib/systemd/system-preset",
+ "/usr/lib/systemd/system-preset",
+#if HAVE_SPLIT_USR
+ "/lib/systemd/system-preset",
+#endif
+ NULL);
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ case UNIT_FILE_USER:
+ r = conf_files_list(&files, ".preset", root_dir, 0,
+ "/etc/systemd/user-preset",
+ "/run/systemd/user-preset",
+ "/usr/local/lib/systemd/user-preset",
+ "/usr/lib/systemd/user-preset",
+ NULL);
+ break;
+
+ default:
+ assert_not_reached("Invalid unit file scope");
+ }
+
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, files) {
+ _cleanup_fclose_ FILE *f;
+ int n = 0;
+
+ f = fopen(*p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ PresetRule rule = {};
+ const char *parameter;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+ n++;
+
+ if (isempty(l))
+ continue;
+ if (strchr(COMMENTS, *l))
+ continue;
+
+ parameter = first_word(l, "enable");
+ if (parameter) {
+ char *unit_name;
+ char **instances = NULL;
+
+ /* Unit_name will remain the same as parameter when no instances are specified */
+ r = split_pattern_into_name_and_instances(parameter, &unit_name, &instances);
+ if (r < 0) {
+ log_syntax(NULL, LOG_WARNING, *p, n, r, "Couldn't parse line '%s'. Ignoring.", line);
+ continue;
+ }
+
+ rule = (PresetRule) {
+ .pattern = unit_name,
+ .action = PRESET_ENABLE,
+ .instances = instances,
+ };
+ }
+
+ parameter = first_word(l, "disable");
+ if (parameter) {
+ char *pattern;
+
+ pattern = strdup(parameter);
+ if (!pattern)
+ return -ENOMEM;
+
+ rule = (PresetRule) {
+ .pattern = pattern,
+ .action = PRESET_DISABLE,
+ };
+ }
+
+ if (rule.action) {
+ if (!GREEDY_REALLOC(ps.rules, n_allocated, ps.n_rules + 1))
+ return -ENOMEM;
+
+ ps.rules[ps.n_rules++] = rule;
+ continue;
+ }
+
+ log_syntax(NULL, LOG_WARNING, *p, n, 0, "Couldn't parse line '%s'. Ignoring.", line);
+ }
+ }
+
+ *presets = ps;
+ ps = (Presets){};
+
+ return 0;
+}
+
+static int pattern_match_multiple_instances(
+ const PresetRule rule,
+ const char *unit_name,
+ char ***ret) {
+
+ _cleanup_free_ char *templated_name = NULL;
+ int r;
+
+ /* If no ret is needed or the rule itself does not have instances
+ * initalized, we return not matching */
+ if (!ret || !rule.instances)
+ return 0;
+
+ r = unit_name_template(unit_name, &templated_name);
+ if (r < 0)
+ return r;
+ if (!streq(rule.pattern, templated_name))
+ return 0;
+
+ /* Compose a list of specified instances when unit name is a template */
+ if (unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *prefix = NULL;
+ _cleanup_strv_free_ char **out_strv = NULL;
+ char **iter;
+
+ r = unit_name_to_prefix(unit_name, &prefix);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(iter, rule.instances) {
+ _cleanup_free_ char *name = NULL;
+ r = unit_name_build(prefix, *iter, ".service", &name);
+ if (r < 0)
+ return r;
+ r = strv_extend(&out_strv, name);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(out_strv);
+ return 1;
+ } else {
+ /* We now know the input unit name is an instance name */
+ _cleanup_free_ char *instance_name = NULL;
+
+ r = unit_name_to_instance(unit_name, &instance_name);
+ if (r < 0)
+ return r;
+
+ if (strv_find(rule.instances, instance_name))
+ return 1;
+ }
+ return 0;
+}
+
+static int query_presets(const char *name, const Presets presets, char ***instance_name_list) {
+ PresetAction action = PRESET_UNKNOWN;
+ size_t i;
+ char **s;
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ for (i = 0; i < presets.n_rules; i++)
+ if (pattern_match_multiple_instances(presets.rules[i], name, instance_name_list) > 0 ||
+ fnmatch(presets.rules[i].pattern, name, FNM_NOESCAPE) == 0) {
+ action = presets.rules[i].action;
+ break;
+ }
+
+ switch (action) {
+ case PRESET_UNKNOWN:
+ log_debug("Preset files don't specify rule for %s. Enabling.", name);
+ return 1;
+ case PRESET_ENABLE:
+ if (instance_name_list && *instance_name_list)
+ STRV_FOREACH(s, *instance_name_list)
+ log_debug("Preset files say enable %s.", *s);
+ else
+ log_debug("Preset files say enable %s.", name);
+ return 1;
+ case PRESET_DISABLE:
+ log_debug("Preset files say disable %s.", name);
+ return 0;
+ default:
+ assert_not_reached("invalid preset action");
+ }
+}
+
+int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name) {
+ _cleanup_(presets_freep) Presets presets = {};
+ int r;
+
+ r = read_presets(scope, root_dir, &presets);
+ if (r < 0)
+ return r;
+
+ return query_presets(name, presets, NULL);
+}
+
+static int execute_preset(
+ UnitFileScope scope,
+ InstallContext *plus,
+ InstallContext *minus,
+ const LookupPaths *paths,
+ const char *config_path,
+ char **files,
+ UnitFilePresetMode mode,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r;
+
+ assert(plus);
+ assert(minus);
+ assert(paths);
+ assert(config_path);
+
+ if (mode != UNIT_FILE_PRESET_ENABLE_ONLY) {
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+
+ r = install_context_mark_for_removal(scope, minus, paths, &remove_symlinks_to, config_path, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ r = remove_marked_symlinks(remove_symlinks_to, config_path, paths, false, changes, n_changes);
+ } else
+ r = 0;
+
+ if (mode != UNIT_FILE_PRESET_DISABLE_ONLY) {
+ int q;
+
+ /* Returns number of symlinks that where supposed to be installed. */
+ q = install_context_apply(scope, plus, paths, config_path, force, SEARCH_LOAD, changes, n_changes);
+ if (r >= 0) {
+ if (q < 0)
+ r = q;
+ else
+ r += q;
+ }
+ }
+
+ return r;
+}
+
+static int preset_prepare_one(
+ UnitFileScope scope,
+ InstallContext *plus,
+ InstallContext *minus,
+ LookupPaths *paths,
+ const char *name,
+ Presets presets,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext tmp = {};
+ _cleanup_strv_free_ char **instance_name_list = NULL;
+ UnitFileInstallInfo *i;
+ int r;
+
+ if (install_info_find(plus, name) || install_info_find(minus, name))
+ return 0;
+
+ r = install_info_discover(scope, &tmp, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ if (!streq(name, i->name)) {
+ log_debug("Skipping %s because it is an alias for %s.", name, i->name);
+ return 0;
+ }
+
+ r = query_presets(name, presets, &instance_name_list);
+ if (r < 0)
+ return r;
+
+ if (r > 0) {
+ if (instance_name_list) {
+ char **s;
+ STRV_FOREACH(s, instance_name_list) {
+ r = install_info_discover_and_check(scope, plus, paths, *s, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ r = install_info_discover_and_check(scope, plus, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+
+ } else
+ r = install_info_discover(scope, minus, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+
+ return r;
+}
+
+int unit_file_preset(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext plus = {}, minus = {};
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(presets_freep) Presets presets = {};
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(mode < _UNIT_FILE_PRESET_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = read_presets(scope, root_dir, &presets);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, files) {
+ r = preset_prepare_one(scope, &plus, &minus, &paths, *i, presets, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+
+ return execute_preset(scope, &plus, &minus, &paths, config_path, files, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+int unit_file_preset_all(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext plus = {}, minus = {};
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(presets_freep) Presets presets = {};
+ const char *config_path = NULL;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(mode < _UNIT_FILE_PRESET_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = read_presets(scope, root_dir, &presets);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, paths.search_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*i);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ dirent_ensure_type(d, de);
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ /* we don't pass changes[] in, because we want to handle errors on our own */
+ r = preset_prepare_one(scope, &plus, &minus, &paths, de->d_name, presets, NULL, 0);
+ if (r == -ERFKILL)
+ r = unit_file_changes_add(changes, n_changes,
+ UNIT_FILE_IS_MASKED, de->d_name, NULL);
+ else if (r == -ENOLINK)
+ r = unit_file_changes_add(changes, n_changes,
+ UNIT_FILE_IS_DANGLING, de->d_name, NULL);
+ else if (r == -EADDRNOTAVAIL) /* Ignore generated/transient units when applying preset */
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return execute_preset(scope, &plus, &minus, &paths, config_path, NULL, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+static void unit_file_list_free_one(UnitFileList *f) {
+ if (!f)
+ return;
+
+ free(f->path);
+ free(f);
+}
+
+Hashmap* unit_file_list_free(Hashmap *h) {
+ return hashmap_free_with_destructor(h, unit_file_list_free_one);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(UnitFileList*, unit_file_list_free_one);
+
+int unit_file_get_list(
+ UnitFileScope scope,
+ const char *root_dir,
+ Hashmap *h,
+ char **states,
+ char **patterns) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(h);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, paths.search_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*i);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+ if (IN_SET(errno, ENOTDIR, EACCES)) {
+ log_debug_errno(errno, "Failed to open \"%s\": %m", *i);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_(unit_file_list_free_onep) UnitFileList *f = NULL;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ if (!strv_fnmatch_or_empty(patterns, de->d_name, FNM_NOESCAPE))
+ continue;
+
+ if (hashmap_get(h, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ f = new0(UnitFileList, 1);
+ if (!f)
+ return -ENOMEM;
+
+ f->path = path_make_absolute(de->d_name, *i);
+ if (!f->path)
+ return -ENOMEM;
+
+ r = unit_file_lookup_state(scope, &paths, de->d_name, &f->state);
+ if (r < 0)
+ f->state = UNIT_FILE_BAD;
+
+ if (!strv_isempty(states) &&
+ !strv_contains(states, unit_file_state_to_string(f->state)))
+ continue;
+
+ r = hashmap_put(h, basename(f->path), f);
+ if (r < 0)
+ return r;
+
+ f = NULL; /* prevent cleanup */
+ }
+ }
+
+ return 0;
+}
+
+static const char* const unit_file_state_table[_UNIT_FILE_STATE_MAX] = {
+ [UNIT_FILE_ENABLED] = "enabled",
+ [UNIT_FILE_ENABLED_RUNTIME] = "enabled-runtime",
+ [UNIT_FILE_LINKED] = "linked",
+ [UNIT_FILE_LINKED_RUNTIME] = "linked-runtime",
+ [UNIT_FILE_MASKED] = "masked",
+ [UNIT_FILE_MASKED_RUNTIME] = "masked-runtime",
+ [UNIT_FILE_STATIC] = "static",
+ [UNIT_FILE_DISABLED] = "disabled",
+ [UNIT_FILE_INDIRECT] = "indirect",
+ [UNIT_FILE_GENERATED] = "generated",
+ [UNIT_FILE_TRANSIENT] = "transient",
+ [UNIT_FILE_BAD] = "bad",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_state, UnitFileState);
+
+static const char* const unit_file_change_type_table[_UNIT_FILE_CHANGE_TYPE_MAX] = {
+ [UNIT_FILE_SYMLINK] = "symlink",
+ [UNIT_FILE_UNLINK] = "unlink",
+ [UNIT_FILE_IS_MASKED] = "masked",
+ [UNIT_FILE_IS_DANGLING] = "dangling",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_change_type, UnitFileChangeType);
+
+static const char* const unit_file_preset_mode_table[_UNIT_FILE_PRESET_MAX] = {
+ [UNIT_FILE_PRESET_FULL] = "full",
+ [UNIT_FILE_PRESET_ENABLE_ONLY] = "enable-only",
+ [UNIT_FILE_PRESET_DISABLE_ONLY] = "disable-only",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_preset_mode, UnitFilePresetMode);
diff --git a/src/shared/install.h b/src/shared/install.h
new file mode 100644
index 0000000..e452940
--- /dev/null
+++ b/src/shared/install.h
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef enum UnitFileScope UnitFileScope;
+typedef enum UnitFileState UnitFileState;
+typedef enum UnitFilePresetMode UnitFilePresetMode;
+typedef enum UnitFileChangeType UnitFileChangeType;
+typedef enum UnitFileFlags UnitFileFlags;
+typedef enum UnitFileType UnitFileType;
+typedef struct UnitFileChange UnitFileChange;
+typedef struct UnitFileList UnitFileList;
+typedef struct UnitFileInstallInfo UnitFileInstallInfo;
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+#include "path-lookup.h"
+#include "strv.h"
+#include "unit-name.h"
+
+enum UnitFileScope {
+ UNIT_FILE_SYSTEM,
+ UNIT_FILE_GLOBAL,
+ UNIT_FILE_USER,
+ _UNIT_FILE_SCOPE_MAX,
+ _UNIT_FILE_SCOPE_INVALID = -1
+};
+
+enum UnitFileState {
+ UNIT_FILE_ENABLED,
+ UNIT_FILE_ENABLED_RUNTIME,
+ UNIT_FILE_LINKED,
+ UNIT_FILE_LINKED_RUNTIME,
+ UNIT_FILE_MASKED,
+ UNIT_FILE_MASKED_RUNTIME,
+ UNIT_FILE_STATIC,
+ UNIT_FILE_DISABLED,
+ UNIT_FILE_INDIRECT,
+ UNIT_FILE_GENERATED,
+ UNIT_FILE_TRANSIENT,
+ UNIT_FILE_BAD,
+ _UNIT_FILE_STATE_MAX,
+ _UNIT_FILE_STATE_INVALID = -1
+};
+
+enum UnitFilePresetMode {
+ UNIT_FILE_PRESET_FULL,
+ UNIT_FILE_PRESET_ENABLE_ONLY,
+ UNIT_FILE_PRESET_DISABLE_ONLY,
+ _UNIT_FILE_PRESET_MAX,
+ _UNIT_FILE_PRESET_INVALID = -1
+};
+
+enum UnitFileChangeType {
+ UNIT_FILE_SYMLINK,
+ UNIT_FILE_UNLINK,
+ UNIT_FILE_IS_MASKED,
+ UNIT_FILE_IS_DANGLING,
+ _UNIT_FILE_CHANGE_TYPE_MAX,
+ _UNIT_FILE_CHANGE_TYPE_INVALID = INT_MIN
+};
+
+enum UnitFileFlags {
+ UNIT_FILE_RUNTIME = 1 << 0,
+ UNIT_FILE_FORCE = 1 << 1,
+ UNIT_FILE_DRY_RUN = 1 << 2,
+};
+
+/* type can either one of the UnitFileChangeTypes listed above, or a negative error.
+ * If source is specified, it should be the contents of the path symlink.
+ * In case of an error, source should be the existing symlink contents or NULL
+ */
+struct UnitFileChange {
+ int type; /* UnitFileChangeType or bust */
+ char *path;
+ char *source;
+};
+
+static inline bool unit_file_changes_have_modification(const UnitFileChange* changes, size_t n_changes) {
+ size_t i;
+ for (i = 0; i < n_changes; i++)
+ if (IN_SET(changes[i].type, UNIT_FILE_SYMLINK, UNIT_FILE_UNLINK))
+ return true;
+ return false;
+}
+
+struct UnitFileList {
+ char *path;
+ UnitFileState state;
+};
+
+enum UnitFileType {
+ UNIT_FILE_TYPE_REGULAR,
+ UNIT_FILE_TYPE_SYMLINK,
+ UNIT_FILE_TYPE_MASKED,
+ _UNIT_FILE_TYPE_MAX,
+ _UNIT_FILE_TYPE_INVALID = -1,
+};
+
+struct UnitFileInstallInfo {
+ char *name;
+ char *path;
+
+ char **aliases;
+ char **wanted_by;
+ char **required_by;
+ char **also;
+
+ char *default_instance;
+ char *symlink_target;
+
+ UnitFileType type;
+ bool auxiliary;
+};
+
+bool unit_type_may_alias(UnitType type) _const_;
+bool unit_type_may_template(UnitType type) _const_;
+
+int unit_file_enable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_disable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_reenable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_preset(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_preset_all(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_mask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_unmask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_link(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_revert(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_set_default(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ const char *file,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_get_default(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **name);
+int unit_file_add_dependency(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ const char *target,
+ UnitDependency dep,
+ UnitFileChange **changes,
+ size_t *n_changes);
+
+int unit_file_lookup_state(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const char *name,
+ UnitFileState *ret);
+
+int unit_file_get_state(UnitFileScope scope, const char *root_dir, const char *filename, UnitFileState *ret);
+int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name);
+
+int unit_file_get_list(UnitFileScope scope, const char *root_dir, Hashmap *h, char **states, char **patterns);
+Hashmap* unit_file_list_free(Hashmap *h);
+
+int unit_file_changes_add(UnitFileChange **changes, size_t *n_changes, UnitFileChangeType type, const char *path, const char *source);
+void unit_file_changes_free(UnitFileChange *changes, size_t n_changes);
+void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet);
+
+int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name);
+
+const char *unit_file_state_to_string(UnitFileState s) _const_;
+UnitFileState unit_file_state_from_string(const char *s) _pure_;
+/* from_string conversion is unreliable because of the overlap between -EPERM and -1 for error. */
+
+const char *unit_file_change_type_to_string(UnitFileChangeType s) _const_;
+UnitFileChangeType unit_file_change_type_from_string(const char *s) _pure_;
+
+const char *unit_file_preset_mode_to_string(UnitFilePresetMode m) _const_;
+UnitFilePresetMode unit_file_preset_mode_from_string(const char *s) _pure_;
diff --git a/src/shared/ip-protocol-list.c b/src/shared/ip-protocol-list.c
new file mode 100644
index 0000000..aa675ea
--- /dev/null
+++ b/src/shared/ip-protocol-list.c
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <netinet/in.h>
+
+#include "alloc-util.h"
+#include "ip-protocol-list.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+static const struct ip_protocol_name* lookup_ip_protocol(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "ip-protocol-from-name.h"
+#include "ip-protocol-to-name.h"
+
+const char *ip_protocol_to_name(int id) {
+
+ if (id < 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(ip_protocol_names))
+ return NULL;
+
+ return ip_protocol_names[id];
+}
+
+int ip_protocol_from_name(const char *name) {
+ const struct ip_protocol_name *sc;
+
+ assert(name);
+
+ sc = lookup_ip_protocol(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+int parse_ip_protocol(const char *s) {
+ _cleanup_free_ char *str = NULL;
+ int i, r;
+
+ assert(s);
+
+ if (isempty(s))
+ return IPPROTO_IP;
+
+ /* Do not use strdupa() here, as the input string may come from *
+ * command line or config files. */
+ str = strdup(s);
+ if (!str)
+ return -ENOMEM;
+
+ i = ip_protocol_from_name(ascii_strlower(str));
+ if (i >= 0)
+ return i;
+
+ r = safe_atoi(str, &i);
+ if (r < 0)
+ return r;
+
+ if (!ip_protocol_to_name(i))
+ return -EINVAL;
+
+ return i;
+}
diff --git a/src/shared/ip-protocol-list.h b/src/shared/ip-protocol-list.h
new file mode 100644
index 0000000..5c94969
--- /dev/null
+++ b/src/shared/ip-protocol-list.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+const char *ip_protocol_to_name(int id);
+int ip_protocol_from_name(const char *name);
+int parse_ip_protocol(const char *s);
diff --git a/src/shared/ip-protocol-to-name.awk b/src/shared/ip-protocol-to-name.awk
new file mode 100644
index 0000000..824f811
--- /dev/null
+++ b/src/shared/ip-protocol-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const ip_protocol_names[] = { "
+}
+!/HOPOPTS/ {
+ printf " [IPPROTO_%s] = \"%s\",\n", $1, tolower($1)
+}
+END{
+ print "};"
+}
diff --git a/src/shared/journal-importer.c b/src/shared/journal-importer.c
new file mode 100644
index 0000000..8638cd3
--- /dev/null
+++ b/src/shared/journal-importer.c
@@ -0,0 +1,504 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "journal-file.h"
+#include "journal-importer.h"
+#include "journal-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "unaligned.h"
+
+enum {
+ IMPORTER_STATE_LINE = 0, /* waiting to read, or reading line */
+ IMPORTER_STATE_DATA_START, /* reading binary data header */
+ IMPORTER_STATE_DATA, /* reading binary data */
+ IMPORTER_STATE_DATA_FINISH, /* expecting newline */
+ IMPORTER_STATE_EOF, /* done */
+};
+
+static int iovw_put(struct iovec_wrapper *iovw, void* data, size_t len) {
+ if (iovw->count >= ENTRY_FIELD_COUNT_MAX)
+ return -E2BIG;
+
+ if (!GREEDY_REALLOC(iovw->iovec, iovw->size_bytes, iovw->count + 1))
+ return log_oom();
+
+ iovw->iovec[iovw->count++] = IOVEC_MAKE(data, len);
+ return 0;
+}
+
+static void iovw_free_contents(struct iovec_wrapper *iovw) {
+ iovw->iovec = mfree(iovw->iovec);
+ iovw->size_bytes = iovw->count = 0;
+}
+
+static void iovw_rebase(struct iovec_wrapper *iovw, char *old, char *new) {
+ size_t i;
+
+ for (i = 0; i < iovw->count; i++)
+ iovw->iovec[i].iov_base = (char*) iovw->iovec[i].iov_base - old + new;
+}
+
+size_t iovw_size(struct iovec_wrapper *iovw) {
+ size_t n = 0, i;
+
+ for (i = 0; i < iovw->count; i++)
+ n += iovw->iovec[i].iov_len;
+
+ return n;
+}
+
+void journal_importer_cleanup(JournalImporter *imp) {
+ if (imp->fd >= 0 && !imp->passive_fd) {
+ log_debug("Closing %s (fd=%d)", imp->name ?: "importer", imp->fd);
+ safe_close(imp->fd);
+ }
+
+ free(imp->name);
+ free(imp->buf);
+ iovw_free_contents(&imp->iovw);
+}
+
+static char* realloc_buffer(JournalImporter *imp, size_t size) {
+ char *b, *old = imp->buf;
+
+ b = GREEDY_REALLOC(imp->buf, imp->size, size);
+ if (!b)
+ return NULL;
+
+ iovw_rebase(&imp->iovw, old, imp->buf);
+
+ return b;
+}
+
+static int get_line(JournalImporter *imp, char **line, size_t *size) {
+ ssize_t n;
+ char *c = NULL;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_LINE);
+ assert(imp->offset <= imp->filled);
+ assert(imp->filled <= imp->size);
+ assert(!imp->buf || imp->size > 0);
+ assert(imp->fd >= 0);
+
+ for (;;) {
+ if (imp->buf) {
+ size_t start = MAX(imp->scanned, imp->offset);
+
+ c = memchr(imp->buf + start, '\n',
+ imp->filled - start);
+ if (c != NULL)
+ break;
+ }
+
+ imp->scanned = imp->filled;
+ if (imp->scanned >= DATA_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOBUFS),
+ "Entry is bigger than %u bytes.",
+ DATA_SIZE_MAX);
+
+ if (imp->passive_fd)
+ /* we have to wait for some data to come to us */
+ return -EAGAIN;
+
+ /* We know that imp->filled is at most DATA_SIZE_MAX, so if
+ we reallocate it, we'll increase the size at least a bit. */
+ assert_cc(DATA_SIZE_MAX < ENTRY_SIZE_MAX);
+ if (imp->size - imp->filled < LINE_CHUNK &&
+ !realloc_buffer(imp, MIN(imp->filled + LINE_CHUNK, ENTRY_SIZE_MAX)))
+ return log_oom();
+
+ assert(imp->buf);
+ assert(imp->size - imp->filled >= LINE_CHUNK ||
+ imp->size == ENTRY_SIZE_MAX);
+
+ n = read(imp->fd,
+ imp->buf + imp->filled,
+ imp->size - imp->filled);
+ if (n < 0) {
+ if (errno != EAGAIN)
+ log_error_errno(errno, "read(%d, ..., %zu): %m",
+ imp->fd,
+ imp->size - imp->filled);
+ return -errno;
+ } else if (n == 0)
+ return 0;
+
+ imp->filled += n;
+ }
+
+ *line = imp->buf + imp->offset;
+ *size = c + 1 - imp->buf - imp->offset;
+ imp->offset += *size;
+
+ return 1;
+}
+
+static int fill_fixed_size(JournalImporter *imp, void **data, size_t size) {
+
+ assert(imp);
+ assert(IN_SET(imp->state, IMPORTER_STATE_DATA_START, IMPORTER_STATE_DATA, IMPORTER_STATE_DATA_FINISH));
+ assert(size <= DATA_SIZE_MAX);
+ assert(imp->offset <= imp->filled);
+ assert(imp->filled <= imp->size);
+ assert(imp->buf || imp->size == 0);
+ assert(!imp->buf || imp->size > 0);
+ assert(imp->fd >= 0);
+ assert(data);
+
+ while (imp->filled - imp->offset < size) {
+ int n;
+
+ if (imp->passive_fd)
+ /* we have to wait for some data to come to us */
+ return -EAGAIN;
+
+ if (!realloc_buffer(imp, imp->offset + size))
+ return log_oom();
+
+ n = read(imp->fd, imp->buf + imp->filled,
+ imp->size - imp->filled);
+ if (n < 0) {
+ if (errno != EAGAIN)
+ log_error_errno(errno, "read(%d, ..., %zu): %m", imp->fd,
+ imp->size - imp->filled);
+ return -errno;
+ } else if (n == 0)
+ return 0;
+
+ imp->filled += n;
+ }
+
+ *data = imp->buf + imp->offset;
+ imp->offset += size;
+
+ return 1;
+}
+
+static int get_data_size(JournalImporter *imp) {
+ int r;
+ void *data;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_DATA_START);
+ assert(imp->data_size == 0);
+
+ r = fill_fixed_size(imp, &data, sizeof(uint64_t));
+ if (r <= 0)
+ return r;
+
+ imp->data_size = unaligned_read_le64(data);
+ if (imp->data_size > DATA_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Stream declares field with size %zu > DATA_SIZE_MAX = %u",
+ imp->data_size, DATA_SIZE_MAX);
+ if (imp->data_size == 0)
+ log_warning("Binary field with zero length");
+
+ return 1;
+}
+
+static int get_data_data(JournalImporter *imp, void **data) {
+ int r;
+
+ assert(imp);
+ assert(data);
+ assert(imp->state == IMPORTER_STATE_DATA);
+
+ r = fill_fixed_size(imp, data, imp->data_size);
+ if (r <= 0)
+ return r;
+
+ return 1;
+}
+
+static int get_data_newline(JournalImporter *imp) {
+ int r;
+ char *data;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_DATA_FINISH);
+
+ r = fill_fixed_size(imp, (void**) &data, 1);
+ if (r <= 0)
+ return r;
+
+ assert(data);
+ if (*data != '\n') {
+ char buf[4];
+ int l;
+
+ l = cescape_char(*data, buf);
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected newline, got '%.*s'", l, buf);
+ }
+
+ return 1;
+}
+
+static int process_special_field(JournalImporter *imp, char *line) {
+ const char *value;
+ char buf[CELLESCAPE_DEFAULT_LENGTH];
+ int r;
+
+ assert(line);
+
+ value = startswith(line, "__CURSOR=");
+ if (value)
+ /* ignore __CURSOR */
+ return 1;
+
+ value = startswith(line, "__REALTIME_TIMESTAMP=");
+ if (value) {
+ uint64_t x;
+
+ r = safe_atou64(value, &x);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse __REALTIME_TIMESTAMP '%s': %m",
+ cellescape(buf, sizeof buf, value));
+ else if (!VALID_REALTIME(x)) {
+ log_warning("__REALTIME_TIMESTAMP out of range, ignoring: %"PRIu64, x);
+ return -ERANGE;
+ }
+
+ imp->ts.realtime = x;
+ return 1;
+ }
+
+ value = startswith(line, "__MONOTONIC_TIMESTAMP=");
+ if (value) {
+ uint64_t x;
+
+ r = safe_atou64(value, &x);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse __MONOTONIC_TIMESTAMP '%s': %m",
+ cellescape(buf, sizeof buf, value));
+ else if (!VALID_MONOTONIC(x)) {
+ log_warning("__MONOTONIC_TIMESTAMP out of range, ignoring: %"PRIu64, x);
+ return -ERANGE;
+ }
+
+ imp->ts.monotonic = x;
+ return 1;
+ }
+
+ /* Just a single underline, but it needs special treatment too. */
+ value = startswith(line, "_BOOT_ID=");
+ if (value) {
+ r = sd_id128_from_string(value, &imp->boot_id);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse _BOOT_ID '%s': %m",
+ cellescape(buf, sizeof buf, value));
+
+ /* store the field in the usual fashion too */
+ return 0;
+ }
+
+ value = startswith(line, "__");
+ if (value) {
+ log_notice("Unknown dunder line __%s, ignoring.", cellescape(buf, sizeof buf, value));
+ return 1;
+ }
+
+ /* no dunder */
+ return 0;
+}
+
+int journal_importer_process_data(JournalImporter *imp) {
+ int r;
+
+ switch(imp->state) {
+ case IMPORTER_STATE_LINE: {
+ char *line, *sep;
+ size_t n = 0;
+
+ assert(imp->data_size == 0);
+
+ r = get_line(imp, &line, &n);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+ assert(n > 0);
+ assert(line[n-1] == '\n');
+
+ if (n == 1) {
+ log_trace("Received empty line, event is ready");
+ return 1;
+ }
+
+ /* MESSAGE=xxx\n
+ or
+ COREDUMP\n
+ LLLLLLLL0011223344...\n
+ */
+ sep = memchr(line, '=', n);
+ if (sep) {
+ /* chomp newline */
+ n--;
+
+ if (!journal_field_valid(line, sep - line, true)) {
+ char buf[64], *t;
+
+ t = strndupa(line, sep - line);
+ log_debug("Ignoring invalid field: \"%s\"",
+ cellescape(buf, sizeof buf, t));
+
+ return 0;
+ }
+
+ line[n] = '\0';
+ r = process_special_field(imp, line);
+ if (r != 0)
+ return r < 0 ? r : 0;
+
+ r = iovw_put(&imp->iovw, line, n);
+ if (r < 0)
+ return r;
+ } else {
+ /* replace \n with = */
+ line[n-1] = '=';
+
+ imp->field_len = n;
+ imp->state = IMPORTER_STATE_DATA_START;
+
+ /* we cannot put the field in iovec until we have all data */
+ }
+
+ log_trace("Received: %.*s (%s)", (int) n, line, sep ? "text" : "binary");
+
+ return 0; /* continue */
+ }
+
+ case IMPORTER_STATE_DATA_START:
+ assert(imp->data_size == 0);
+
+ r = get_data_size(imp);
+ // log_debug("get_data_size() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ imp->state = imp->data_size > 0 ?
+ IMPORTER_STATE_DATA : IMPORTER_STATE_DATA_FINISH;
+
+ return 0; /* continue */
+
+ case IMPORTER_STATE_DATA: {
+ void *data;
+ char *field;
+
+ assert(imp->data_size > 0);
+
+ r = get_data_data(imp, &data);
+ // log_debug("get_data_data() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ assert(data);
+
+ field = (char*) data - sizeof(uint64_t) - imp->field_len;
+ memmove(field + sizeof(uint64_t), field, imp->field_len);
+
+ r = iovw_put(&imp->iovw, field + sizeof(uint64_t), imp->field_len + imp->data_size);
+ if (r < 0)
+ return r;
+
+ imp->state = IMPORTER_STATE_DATA_FINISH;
+
+ return 0; /* continue */
+ }
+
+ case IMPORTER_STATE_DATA_FINISH:
+ r = get_data_newline(imp);
+ // log_debug("get_data_newline() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ imp->data_size = 0;
+ imp->state = IMPORTER_STATE_LINE;
+
+ return 0; /* continue */
+ default:
+ assert_not_reached("wtf?");
+ }
+}
+
+int journal_importer_push_data(JournalImporter *imp, const char *data, size_t size) {
+ assert(imp);
+ assert(imp->state != IMPORTER_STATE_EOF);
+
+ if (!realloc_buffer(imp, imp->filled + size))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOMEM),
+ "Failed to store received data of size %zu "
+ "(in addition to existing %zu bytes with %zu filled): %s",
+ size, imp->size, imp->filled,
+ strerror(ENOMEM));
+
+ memcpy(imp->buf + imp->filled, data, size);
+ imp->filled += size;
+
+ return 0;
+}
+
+void journal_importer_drop_iovw(JournalImporter *imp) {
+ size_t remain, target;
+
+ /* This function drops processed data that along with the iovw that points at it */
+
+ iovw_free_contents(&imp->iovw);
+
+ /* possibly reset buffer position */
+ remain = imp->filled - imp->offset;
+
+ if (remain == 0) /* no brainer */
+ imp->offset = imp->scanned = imp->filled = 0;
+ else if (imp->offset > imp->size - imp->filled &&
+ imp->offset > remain) {
+ memcpy(imp->buf, imp->buf + imp->offset, remain);
+ imp->offset = imp->scanned = 0;
+ imp->filled = remain;
+ }
+
+ target = imp->size;
+ while (target > 16 * LINE_CHUNK && imp->filled < target / 2)
+ target /= 2;
+ if (target < imp->size) {
+ char *tmp;
+
+ tmp = realloc(imp->buf, target);
+ if (!tmp)
+ log_warning("Failed to reallocate buffer to (smaller) size %zu",
+ target);
+ else {
+ log_debug("Reallocated buffer from %zu to %zu bytes",
+ imp->size, target);
+ imp->buf = tmp;
+ imp->size = target;
+ }
+ }
+}
+
+bool journal_importer_eof(const JournalImporter *imp) {
+ return imp->state == IMPORTER_STATE_EOF;
+}
diff --git a/src/shared/journal-importer.h b/src/shared/journal-importer.h
new file mode 100644
index 0000000..7914c0c
--- /dev/null
+++ b/src/shared/journal-importer.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/uio.h>
+
+#include "sd-id128.h"
+
+#include "time-util.h"
+
+/* Make sure not to make this smaller than the maximum coredump size.
+ * See JOURNAL_SIZE_MAX in coredump.c */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#define ENTRY_SIZE_MAX (1024*1024*770u)
+#define DATA_SIZE_MAX (1024*1024*768u)
+#else
+#define ENTRY_SIZE_MAX (1024*1024*13u)
+#define DATA_SIZE_MAX (1024*1024*11u)
+#endif
+#define LINE_CHUNK 8*1024u
+
+/* The maximum number of fields in an entry */
+#define ENTRY_FIELD_COUNT_MAX 1024
+
+struct iovec_wrapper {
+ struct iovec *iovec;
+ size_t size_bytes;
+ size_t count;
+};
+
+size_t iovw_size(struct iovec_wrapper *iovw);
+
+typedef struct JournalImporter {
+ int fd;
+ bool passive_fd;
+ char *name;
+
+ char *buf;
+ size_t size; /* total size of the buffer */
+ size_t offset; /* offset to the beginning of live data in the buffer */
+ size_t scanned; /* number of bytes since the beginning of data without a newline */
+ size_t filled; /* total number of bytes in the buffer */
+
+ size_t field_len; /* used for binary fields: the field name length */
+ size_t data_size; /* and the size of the binary data chunk being processed */
+
+ struct iovec_wrapper iovw;
+
+ int state;
+ dual_timestamp ts;
+ sd_id128_t boot_id;
+} JournalImporter;
+
+void journal_importer_cleanup(JournalImporter *);
+int journal_importer_process_data(JournalImporter *);
+int journal_importer_push_data(JournalImporter *, const char *data, size_t size);
+void journal_importer_drop_iovw(JournalImporter *);
+bool journal_importer_eof(const JournalImporter *);
+
+static inline size_t journal_importer_bytes_remaining(const JournalImporter *imp) {
+ return imp->filled;
+}
diff --git a/src/shared/journal-util.c b/src/shared/journal-util.c
new file mode 100644
index 0000000..89b76af
--- /dev/null
+++ b/src/shared/journal-util.c
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "acl-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "journal-internal.h"
+#include "journal-util.h"
+#include "log.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int access_check_var_log_journal(sd_journal *j, bool want_other_users) {
+#if HAVE_ACL
+ _cleanup_strv_free_ char **g = NULL;
+ const char* dir;
+#endif
+ int r;
+
+ assert(j);
+
+ /* If we are root, we should have access, don't warn. */
+ if (getuid() == 0)
+ return 0;
+
+ /* If we are in the 'systemd-journal' group, we should have
+ * access too. */
+ r = in_group("systemd-journal");
+ if (r < 0)
+ return log_error_errno(r, "Failed to check if we are in the 'systemd-journal' group: %m");
+ if (r > 0)
+ return 0;
+
+#if HAVE_ACL
+ if (laccess("/run/log/journal", F_OK) >= 0)
+ dir = "/run/log/journal";
+ else
+ dir = "/var/log/journal";
+
+ /* If we are in any of the groups listed in the journal ACLs,
+ * then all is good, too. Let's enumerate all groups from the
+ * default ACL of the directory, which generally should allow
+ * access to most journal files too. */
+ r = acl_search_groups(dir, &g);
+ if (r < 0)
+ return log_error_errno(r, "Failed to search journal ACL: %m");
+ if (r > 0)
+ return 0;
+
+ /* Print a pretty list, if there were ACLs set. */
+ if (!strv_isempty(g)) {
+ _cleanup_free_ char *s = NULL;
+
+ /* Thre are groups in the ACL, let's list them */
+ r = strv_extend(&g, "systemd-journal");
+ if (r < 0)
+ return log_oom();
+
+ strv_sort(g);
+ strv_uniq(g);
+
+ s = strv_join(g, "', '");
+ if (!s)
+ return log_oom();
+
+ log_notice("Hint: You are currently not seeing messages from %s.\n"
+ " Users in groups '%s' can see all messages.\n"
+ " Pass -q to turn off this notice.",
+ want_other_users ? "other users and the system" : "the system",
+ s);
+ return 1;
+ }
+#endif
+
+ /* If no ACLs were found, print a short version of the message. */
+ log_notice("Hint: You are currently not seeing messages from %s.\n"
+ " Users in the 'systemd-journal' group can see all messages. Pass -q to\n"
+ " turn off this notice.",
+ want_other_users ? "other users and the system" : "the system");
+
+ return 1;
+}
+
+int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users) {
+ Iterator it;
+ void *code;
+ char *path;
+ int r = 0;
+
+ assert(j);
+
+ if (hashmap_isempty(j->errors)) {
+ if (ordered_hashmap_isempty(j->files) && !quiet)
+ log_notice("No journal files were found.");
+
+ return 0;
+ }
+
+ if (hashmap_contains(j->errors, INT_TO_PTR(-EACCES))) {
+ if (!quiet)
+ (void) access_check_var_log_journal(j, want_other_users);
+
+ if (ordered_hashmap_isempty(j->files))
+ r = log_error_errno(EACCES, "No journal files were opened due to insufficient permissions.");
+ }
+
+ HASHMAP_FOREACH_KEY(path, code, j->errors, it) {
+ int err;
+
+ err = abs(PTR_TO_INT(code));
+
+ switch (err) {
+ case EACCES:
+ continue;
+
+ case ENODATA:
+ log_warning_errno(err, "Journal file %s is truncated, ignoring file.", path);
+ break;
+
+ case EPROTONOSUPPORT:
+ log_warning_errno(err, "Journal file %1$s uses an unsupported feature, ignoring file.\n"
+ "Use SYSTEMD_LOG_LEVEL=debug journalctl --file=%1$s to see the details.",
+ path);
+ break;
+
+ case EBADMSG:
+ log_warning_errno(err, "Journal file %s corrupted, ignoring file.", path);
+ break;
+
+ default:
+ log_warning_errno(err, "An error was encountered while opening journal file or directory %s, ignoring file: %m", path);
+ break;
+ }
+ }
+
+ return r;
+}
+
+bool journal_field_valid(const char *p, size_t l, bool allow_protected) {
+ const char *a;
+
+ /* We kinda enforce POSIX syntax recommendations for
+ environment variables here, but make a couple of additional
+ requirements.
+
+ http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */
+
+ if (l == (size_t) -1)
+ l = strlen(p);
+
+ /* No empty field names */
+ if (l <= 0)
+ return false;
+
+ /* Don't allow names longer than 64 chars */
+ if (l > 64)
+ return false;
+
+ /* Variables starting with an underscore are protected */
+ if (!allow_protected && p[0] == '_')
+ return false;
+
+ /* Don't allow digits as first character */
+ if (p[0] >= '0' && p[0] <= '9')
+ return false;
+
+ /* Only allow A-Z0-9 and '_' */
+ for (a = p; a < p + l; a++)
+ if ((*a < 'A' || *a > 'Z') &&
+ (*a < '0' || *a > '9') &&
+ *a != '_')
+ return false;
+
+ return true;
+}
diff --git a/src/shared/journal-util.h b/src/shared/journal-util.h
new file mode 100644
index 0000000..da86434
--- /dev/null
+++ b/src/shared/journal-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-journal.h"
+
+bool journal_field_valid(const char *p, size_t l, bool allow_protected);
+
+int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users);
diff --git a/src/shared/json-internal.h b/src/shared/json-internal.h
new file mode 100644
index 0000000..bf158bf
--- /dev/null
+++ b/src/shared/json-internal.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#pragma once
+
+#include "json.h"
+
+/* This header should include all prototypes only the JSON parser itself and
+ * its tests need access to. Normal code consuming the JSON parser should not
+ * interface with this. */
+
+typedef union JsonValue {
+ /* Encodes a simple value. On x86-64 this structure is 16 bytes wide (as long double is 128bit). */
+ bool boolean;
+ long double real;
+ intmax_t integer;
+ uintmax_t unsig;
+} JsonValue;
+
+/* Let's protect us against accidental structure size changes on our most relevant arch */
+#ifdef __x86_64__
+assert_cc(sizeof(JsonValue) == 16U);
+#endif
+
+#define JSON_VALUE_NULL ((JsonValue) {})
+
+/* We use fake JsonVariant objects for some special values, in order to avoid memory allocations for them. Note that
+ * effectively this means that there are multiple ways to encode the same objects: via these magic values or as
+ * properly allocated JsonVariant. We convert between both on-the-fly as necessary. */
+#define JSON_VARIANT_MAGIC_TRUE ((JsonVariant*) 1)
+#define JSON_VARIANT_MAGIC_FALSE ((JsonVariant*) 2)
+#define JSON_VARIANT_MAGIC_NULL ((JsonVariant*) 3)
+#define JSON_VARIANT_MAGIC_ZERO_INTEGER ((JsonVariant*) 4)
+#define JSON_VARIANT_MAGIC_ZERO_UNSIGNED ((JsonVariant*) 5)
+#define JSON_VARIANT_MAGIC_ZERO_REAL ((JsonVariant*) 6)
+#define JSON_VARIANT_MAGIC_EMPTY_STRING ((JsonVariant*) 7)
+#define JSON_VARIANT_MAGIC_EMPTY_ARRAY ((JsonVariant*) 8)
+#define JSON_VARIANT_MAGIC_EMPTY_OBJECT ((JsonVariant*) 9)
+#define _JSON_VARIANT_MAGIC_MAX ((JsonVariant*) 10)
+
+/* This is only safe as long as we don't define more than 4K magic pointers, i.e. the page size of the simplest
+ * architectures we support. That's because we rely on the fact that malloc() will never allocate from the first memory
+ * page, as it is a faulting page for catching NULL pointer dereferences. */
+assert_cc((uintptr_t) _JSON_VARIANT_MAGIC_MAX < 4096U);
+
+enum { /* JSON tokens */
+ JSON_TOKEN_END,
+ JSON_TOKEN_COLON,
+ JSON_TOKEN_COMMA,
+ JSON_TOKEN_OBJECT_OPEN,
+ JSON_TOKEN_OBJECT_CLOSE,
+ JSON_TOKEN_ARRAY_OPEN,
+ JSON_TOKEN_ARRAY_CLOSE,
+ JSON_TOKEN_STRING,
+ JSON_TOKEN_REAL,
+ JSON_TOKEN_INTEGER,
+ JSON_TOKEN_UNSIGNED,
+ JSON_TOKEN_BOOLEAN,
+ JSON_TOKEN_NULL,
+ _JSON_TOKEN_MAX,
+ _JSON_TOKEN_INVALID = -1,
+};
+
+int json_tokenize(const char **p, char **ret_string, JsonValue *ret_value, unsigned *ret_line, unsigned *ret_column, void **state, unsigned *line, unsigned *column);
diff --git a/src/shared/json.c b/src/shared/json.c
new file mode 100644
index 0000000..3786ff1
--- /dev/null
+++ b/src/shared/json.c
@@ -0,0 +1,3480 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "float.h"
+#include "hexdecoct.h"
+#include "json-internal.h"
+#include "json.h"
+#include "macro.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "utf8.h"
+
+/* Refuse putting together variants with a larger depth than 4K by default (as a protection against overflowing stacks
+ * if code processes JSON objects recursively. Note that we store the depth in an uint16_t, hence make sure this
+ * remains under 2^16.
+ * The value was 16k, but it was discovered to be too high on llvm/x86-64. See also the issue #10738. */
+#define DEPTH_MAX (4U*1024U)
+assert_cc(DEPTH_MAX <= UINT16_MAX);
+
+typedef struct JsonSource {
+ /* When we parse from a file or similar, encodes the filename, to indicate the source of a json variant */
+ size_t n_ref;
+ unsigned max_line;
+ unsigned max_column;
+ char name[];
+} JsonSource;
+
+/* On x86-64 this whole structure should have a size of 6 * 64 bit = 48 bytes */
+struct JsonVariant {
+ union {
+ /* We either maintain a reference counter for this variant itself, or we are embedded into an
+ * array/object, in which case only that surrounding object is ref-counted. (If 'embedded' is false,
+ * see below.) */
+ size_t n_ref;
+
+ /* If this JsonVariant is part of an array/object, then this field points to the surrounding
+ * JSON_VARIANT_ARRAY/JSON_VARIANT_OBJECT object. (If 'embedded' is true, see below.) */
+ JsonVariant *parent;
+ };
+
+ /* If this was parsed from some file or buffer, this stores where from, as well as the source line/column */
+ JsonSource *source;
+ unsigned line, column;
+
+ JsonVariantType type:5;
+
+ /* A marker whether this variant is embedded into in array/object or not. If true, the 'parent' pointer above
+ * is valid. If false, the 'n_ref' field above is valid instead. */
+ bool is_embedded:1;
+
+ /* In some conditions (for example, if this object is part of an array of strings or objects), we don't store
+ * any data inline, but instead simply reference an external object and act as surrogate of it. In that case
+ * this bool is set, and the external object is referenced through the .reference field below. */
+ bool is_reference:1;
+
+ /* While comparing two arrays, we use this for marking what we already have seen */
+ bool is_marked:1;
+
+ /* The current 'depth' of the JsonVariant, i.e. how many levels of member variants this has */
+ uint16_t depth;
+
+ union {
+ /* For simple types we store the value in-line. */
+ JsonValue value;
+
+ /* For objects and arrays we store the number of elements immediately following */
+ size_t n_elements;
+
+ /* If is_reference as indicated above is set, this is where the reference object is actually stored. */
+ JsonVariant *reference;
+
+ /* Strings are placed immediately after the structure. Note that when this is a JsonVariant embedded
+ * into an array we might encode strings up to INLINE_STRING_LENGTH characters directly inside the
+ * element, while longer strings are stored as references. When this object is not embedded into an
+ * array, but stand-alone we allocate the right size for the whole structure, i.e. the array might be
+ * much larger than INLINE_STRING_LENGTH.
+ *
+ * Note that because we want to allocate arrays of the JsonVariant structure we specify [0] here,
+ * rather than the prettier []. If we wouldn't, then this char array would have undefined size, and so
+ * would the union and then the struct this is included in. And of structures with undefined size we
+ * can't allocate arrays (at least not easily). */
+ char string[0];
+ };
+};
+
+/* Inside string arrays we have a series of JasonVariant structures one after the other. In this case, strings longer
+ * than INLINE_STRING_MAX are stored as references, and all shorter ones inline. (This means — on x86-64 — strings up
+ * to 15 chars are stored within the array elements, and all others in separate allocations) */
+#define INLINE_STRING_MAX (sizeof(JsonVariant) - offsetof(JsonVariant, string) - 1U)
+
+/* Let's make sure this structure isn't increased in size accidentally. This check is only for our most relevant arch
+ * (x86-64). */
+#ifdef __x86_64__
+assert_cc(sizeof(JsonVariant) == 48U);
+assert_cc(INLINE_STRING_MAX == 15U);
+#endif
+
+static JsonSource* json_source_new(const char *name) {
+ JsonSource *s;
+
+ assert(name);
+
+ s = malloc(offsetof(JsonSource, name) + strlen(name) + 1);
+ if (!s)
+ return NULL;
+
+ *s = (JsonSource) {
+ .n_ref = 1,
+ };
+ strcpy(s->name, name);
+
+ return s;
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(JsonSource, json_source, mfree);
+
+static bool json_source_equal(JsonSource *a, JsonSource *b) {
+ if (a == b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ return streq(a->name, b->name);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(JsonSource*, json_source_unref);
+
+/* There are four kind of JsonVariant* pointers:
+ *
+ * 1. NULL
+ * 2. A 'regular' one, i.e. pointing to malloc() memory
+ * 3. A 'magic' one, i.e. one of the special JSON_VARIANT_MAGIC_XYZ values, that encode a few very basic values directly in the pointer.
+ * 4. A 'const string' one, i.e. a pointer to a const string.
+ *
+ * The four kinds of pointers can be discerned like this:
+ *
+ * Detecting #1 is easy, just compare with NULL. Detecting #3 is similarly easy: all magic pointers are below
+ * _JSON_VARIANT_MAGIC_MAX (which is pretty low, within the first memory page, which is special on Linux and other
+ * OSes, as it is a faulting page). In order to discern #2 and #4 we check the lowest bit. If it's off it's #2,
+ * otherwise #4. This makes use of the fact that malloc() will return "maximum aligned" memory, which definitely
+ * means the pointer is even. This means we can use the uneven pointers to reference static strings, as long as we
+ * make sure that all static strings used like this are aligned to 2 (or higher), and that we mask the bit on
+ * access. The JSON_VARIANT_STRING_CONST() macro encodes strings as JsonVariant* pointers, with the bit set. */
+
+static bool json_variant_is_magic(const JsonVariant *v) {
+ if (!v)
+ return false;
+
+ return v < _JSON_VARIANT_MAGIC_MAX;
+}
+
+static bool json_variant_is_const_string(const JsonVariant *v) {
+
+ if (v < _JSON_VARIANT_MAGIC_MAX)
+ return false;
+
+ /* A proper JsonVariant is aligned to whatever malloc() aligns things too, which is definitely not uneven. We
+ * hence use all uneven pointers as indicators for const strings. */
+
+ return (((uintptr_t) v) & 1) != 0;
+}
+
+static bool json_variant_is_regular(const JsonVariant *v) {
+
+ if (v < _JSON_VARIANT_MAGIC_MAX)
+ return false;
+
+ return (((uintptr_t) v) & 1) == 0;
+}
+
+static JsonVariant *json_variant_dereference(JsonVariant *v) {
+
+ /* Recursively dereference variants that are references to other variants */
+
+ if (!v)
+ return NULL;
+
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (!v->is_reference)
+ return v;
+
+ return json_variant_dereference(v->reference);
+}
+
+static uint16_t json_variant_depth(JsonVariant *v) {
+
+ v = json_variant_dereference(v);
+ if (!v)
+ return 0;
+
+ if (!json_variant_is_regular(v))
+ return 0;
+
+ return v->depth;
+}
+
+static JsonVariant *json_variant_normalize(JsonVariant *v) {
+
+ /* Converts json variants to their normalized form, i.e. fully dereferenced and wherever possible converted to
+ * the "magic" version if there is one */
+
+ if (!v)
+ return NULL;
+
+ v = json_variant_dereference(v);
+
+ switch (json_variant_type(v)) {
+
+ case JSON_VARIANT_BOOLEAN:
+ return json_variant_boolean(v) ? JSON_VARIANT_MAGIC_TRUE : JSON_VARIANT_MAGIC_FALSE;
+
+ case JSON_VARIANT_NULL:
+ return JSON_VARIANT_MAGIC_NULL;
+
+ case JSON_VARIANT_INTEGER:
+ return json_variant_integer(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_INTEGER : v;
+
+ case JSON_VARIANT_UNSIGNED:
+ return json_variant_unsigned(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_UNSIGNED : v;
+
+ case JSON_VARIANT_REAL:
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ return json_variant_real(v) == 0.0 ? JSON_VARIANT_MAGIC_ZERO_REAL : v;
+#pragma GCC diagnostic pop
+
+ case JSON_VARIANT_STRING:
+ return isempty(json_variant_string(v)) ? JSON_VARIANT_MAGIC_EMPTY_STRING : v;
+
+ case JSON_VARIANT_ARRAY:
+ return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_ARRAY : v;
+
+ case JSON_VARIANT_OBJECT:
+ return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_OBJECT : v;
+
+ default:
+ return v;
+ }
+}
+
+static JsonVariant *json_variant_conservative_normalize(JsonVariant *v) {
+
+ /* Much like json_variant_normalize(), but won't simplify if the variant has a source/line location attached to
+ * it, in order not to lose context */
+
+ if (!v)
+ return NULL;
+
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (v->source || v->line > 0 || v->column > 0)
+ return v;
+
+ return json_variant_normalize(v);
+}
+
+static int json_variant_new(JsonVariant **ret, JsonVariantType type, size_t space) {
+ JsonVariant *v;
+
+ assert_return(ret, -EINVAL);
+
+ v = malloc0(offsetof(JsonVariant, value) + space);
+ if (!v)
+ return -ENOMEM;
+
+ v->n_ref = 1;
+ v->type = type;
+
+ *ret = v;
+ return 0;
+}
+
+int json_variant_new_integer(JsonVariant **ret, intmax_t i) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (i == 0) {
+ *ret = JSON_VARIANT_MAGIC_ZERO_INTEGER;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_INTEGER, sizeof(i));
+ if (r < 0)
+ return r;
+
+ v->value.integer = i;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ if (u == 0) {
+ *ret = JSON_VARIANT_MAGIC_ZERO_UNSIGNED;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_UNSIGNED, sizeof(u));
+ if (r < 0)
+ return r;
+
+ v->value.unsig = u;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_real(JsonVariant **ret, long double d) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ if (d == 0.0) {
+#pragma GCC diagnostic pop
+ *ret = JSON_VARIANT_MAGIC_ZERO_REAL;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_REAL, sizeof(d));
+ if (r < 0)
+ return r;
+
+ v->value.real = d;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_boolean(JsonVariant **ret, bool b) {
+ assert_return(ret, -EINVAL);
+
+ if (b)
+ *ret = JSON_VARIANT_MAGIC_TRUE;
+ else
+ *ret = JSON_VARIANT_MAGIC_FALSE;
+
+ return 0;
+}
+
+int json_variant_new_null(JsonVariant **ret) {
+ assert_return(ret, -EINVAL);
+
+ *ret = JSON_VARIANT_MAGIC_NULL;
+ return 0;
+}
+
+int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ if (!s) {
+ assert_return(n == 0, -EINVAL);
+ return json_variant_new_null(ret);
+ }
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_STRING;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_STRING, n + 1);
+ if (r < 0)
+ return r;
+
+ memcpy(v->string, s, n);
+ v->string[n] = 0;
+
+ *ret = v;
+ return 0;
+}
+
+static void json_variant_set(JsonVariant *a, JsonVariant *b) {
+ assert(a);
+
+ b = json_variant_dereference(b);
+ if (!b) {
+ a->type = JSON_VARIANT_NULL;
+ return;
+ }
+
+ a->type = json_variant_type(b);
+ switch (a->type) {
+
+ case JSON_VARIANT_INTEGER:
+ a->value.integer = json_variant_integer(b);
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ a->value.unsig = json_variant_unsigned(b);
+ break;
+
+ case JSON_VARIANT_REAL:
+ a->value.real = json_variant_real(b);
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+ a->value.boolean = json_variant_boolean(b);
+ break;
+
+ case JSON_VARIANT_STRING: {
+ const char *s;
+
+ assert_se(s = json_variant_string(b));
+
+ /* Short strings we can store inline */
+ if (strnlen(s, INLINE_STRING_MAX+1) <= INLINE_STRING_MAX) {
+ strcpy(a->string, s);
+ break;
+ }
+
+ /* For longer strings, use a reference… */
+ _fallthrough_;
+ }
+
+ case JSON_VARIANT_ARRAY:
+ case JSON_VARIANT_OBJECT:
+ a->is_reference = true;
+ a->reference = json_variant_ref(json_variant_conservative_normalize(b));
+ break;
+
+ case JSON_VARIANT_NULL:
+ break;
+
+ default:
+ assert_not_reached("Unexpected variant type");
+ }
+}
+
+static void json_variant_copy_source(JsonVariant *v, JsonVariant *from) {
+ assert(v);
+ assert(from);
+
+ if (!json_variant_is_regular(from))
+ return;
+
+ v->line = from->line;
+ v->column = from->column;
+ v->source = json_source_ref(from->source);
+}
+
+int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+ assert_return(array, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements,
+ *c = array[v->n_elements];
+ uint16_t d;
+
+ d = json_variant_depth(c);
+ if (d >= DEPTH_MAX) /* Refuse too deep nesting */
+ return -ELNRNG;
+ if (d >= v->depth)
+ v->depth = d + 1;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ };
+
+ json_variant_set(w, c);
+ json_variant_copy_source(w, c);
+ }
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n) {
+ JsonVariant *v;
+ size_t i;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+ assert_return(p, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ .n_elements = n,
+ .depth = 1,
+ };
+
+ for (i = 0; i < n; i++) {
+ JsonVariant *w = v + 1 + i;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ .type = JSON_VARIANT_UNSIGNED,
+ .value.unsig = ((const uint8_t*) p)[i],
+ };
+ }
+
+ *ret = v;
+ return 0;
+}
+
+int json_variant_new_array_strv(JsonVariant **ret, char **l) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ size_t n;
+ int r;
+
+ assert(ret);
+
+ n = strv_length(l);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ .depth = 1,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements;
+ size_t k;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ .type = JSON_VARIANT_STRING,
+ };
+
+ k = strlen(l[v->n_elements]);
+
+ if (k > INLINE_STRING_MAX) {
+ /* If string is too long, store it as reference. */
+
+ r = json_variant_new_stringn(&w->reference, l[v->n_elements], k);
+ if (r < 0)
+ return r;
+
+ w->is_reference = true;
+ } else
+ memcpy(w->string, l[v->n_elements], k+1);
+ }
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_OBJECT;
+ return 0;
+ }
+ assert_return(array, -EINVAL);
+ assert_return(n % 2 == 0, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_OBJECT,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements,
+ *c = array[v->n_elements];
+ uint16_t d;
+
+ if ((v->n_elements & 1) == 0 &&
+ !json_variant_is_string(c))
+ return -EINVAL; /* Every second one needs to be a string, as it is the key name */
+
+ d = json_variant_depth(c);
+ if (d >= DEPTH_MAX) /* Refuse too deep nesting */
+ return -ELNRNG;
+ if (d >= v->depth)
+ v->depth = d + 1;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ };
+
+ json_variant_set(w, c);
+ json_variant_copy_source(w, c);
+ }
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+static void json_variant_free_inner(JsonVariant *v) {
+ assert(v);
+
+ if (!json_variant_is_regular(v))
+ return;
+
+ json_source_unref(v->source);
+
+ if (v->is_reference) {
+ json_variant_unref(v->reference);
+ return;
+ }
+
+ if (IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT)) {
+ size_t i;
+
+ for (i = 0; i < v->n_elements; i++)
+ json_variant_free_inner(v + 1 + i);
+ }
+}
+
+JsonVariant *json_variant_ref(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (v->is_embedded)
+ json_variant_ref(v->parent); /* ref the compounding variant instead */
+ else {
+ assert(v->n_ref > 0);
+ v->n_ref++;
+ }
+
+ return v;
+}
+
+JsonVariant *json_variant_unref(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ return NULL;
+
+ if (v->is_embedded)
+ json_variant_unref(v->parent);
+ else {
+ assert(v->n_ref > 0);
+ v->n_ref--;
+
+ if (v->n_ref == 0) {
+ json_variant_free_inner(v);
+ free(v);
+ }
+ }
+
+ return NULL;
+}
+
+void json_variant_unref_many(JsonVariant **array, size_t n) {
+ size_t i;
+
+ assert(array || n == 0);
+
+ for (i = 0; i < n; i++)
+ json_variant_unref(array[i]);
+}
+
+const char *json_variant_string(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_STRING)
+ return "";
+ if (json_variant_is_magic(v))
+ goto mismatch;
+ if (json_variant_is_const_string(v)) {
+ uintptr_t p = (uintptr_t) v;
+
+ assert((p & 1) != 0);
+ return (const char*) (p ^ 1U);
+ }
+
+ if (v->is_reference)
+ return json_variant_string(v->reference);
+ if (v->type != JSON_VARIANT_STRING)
+ goto mismatch;
+
+ return v->string;
+
+mismatch:
+ log_debug("Non-string JSON variant requested as string, returning NULL.");
+ return NULL;
+}
+
+bool json_variant_boolean(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_TRUE)
+ return true;
+ if (v == JSON_VARIANT_MAGIC_FALSE)
+ return false;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->type != JSON_VARIANT_BOOLEAN)
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_boolean(v->reference);
+
+ return v->value.boolean;
+
+mismatch:
+ log_debug("Non-boolean JSON variant requested as boolean, returning false.");
+ return false;
+}
+
+intmax_t json_variant_integer(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_integer(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_INTEGER:
+ return v->value.integer;
+
+ case JSON_VARIANT_UNSIGNED:
+ if (v->value.unsig <= INTMAX_MAX)
+ return (intmax_t) v->value.unsig;
+
+ log_debug("Unsigned integer %ju requested as signed integer and out of range, returning 0.", v->value.unsig);
+ return 0;
+
+ case JSON_VARIANT_REAL: {
+ intmax_t converted;
+
+ converted = (intmax_t) v->value.real;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ if ((long double) converted == v->value.real)
+#pragma GCC diagnostic pop
+ return converted;
+
+ log_debug("Real %Lg requested as integer, and cannot be converted losslessly, returning 0.", v->value.real);
+ return 0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as integer, returning 0.");
+ return 0;
+}
+
+uintmax_t json_variant_unsigned(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_integer(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_INTEGER:
+ if (v->value.integer >= 0)
+ return (uintmax_t) v->value.integer;
+
+ log_debug("Signed integer %ju requested as unsigned integer and out of range, returning 0.", v->value.integer);
+ return 0;
+
+ case JSON_VARIANT_UNSIGNED:
+ return v->value.unsig;
+
+ case JSON_VARIANT_REAL: {
+ uintmax_t converted;
+
+ converted = (uintmax_t) v->value.real;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ if ((long double) converted == v->value.real)
+#pragma GCC diagnostic pop
+ return converted;
+
+ log_debug("Real %Lg requested as unsigned integer, and cannot be converted losslessly, returning 0.", v->value.real);
+ return 0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as unsigned, returning 0.");
+ return 0;
+}
+
+long double json_variant_real(JsonVariant *v) {
+ if (!v)
+ return 0.0;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0.0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_real(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_REAL:
+ return v->value.real;
+
+ case JSON_VARIANT_INTEGER: {
+ long double converted;
+
+ converted = (long double) v->value.integer;
+
+ if ((intmax_t) converted == v->value.integer)
+ return converted;
+
+ log_debug("Signed integer %ji requested as real, and cannot be converted losslessly, returning 0.", v->value.integer);
+ return 0.0;
+ }
+
+ case JSON_VARIANT_UNSIGNED: {
+ long double converted;
+
+ converted = (long double) v->value.unsig;
+
+ if ((uintmax_t) converted == v->value.unsig)
+ return converted;
+
+ log_debug("Unsigned integer %ju requested as real, and cannot be converted losslessly, returning 0.", v->value.unsig);
+ return 0.0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as integer, returning 0.");
+ return 0.0;
+}
+
+bool json_variant_is_negative(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return false;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_is_negative(v->reference);
+
+ /* This function is useful as checking whether numbers are negative is pretty complex since we have three types
+ * of numbers. And some JSON code (OCI for example) uses negative numbers to mark "not defined" numeric
+ * values. */
+
+ switch (v->type) {
+
+ case JSON_VARIANT_REAL:
+ return v->value.real < 0;
+
+ case JSON_VARIANT_INTEGER:
+ return v->value.integer < 0;
+
+ case JSON_VARIANT_UNSIGNED:
+ return false;
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant tested for negativity, returning false.");
+ return false;
+}
+
+JsonVariantType json_variant_type(JsonVariant *v) {
+
+ if (!v)
+ return _JSON_VARIANT_TYPE_INVALID;
+
+ if (json_variant_is_const_string(v))
+ return JSON_VARIANT_STRING;
+
+ if (v == JSON_VARIANT_MAGIC_TRUE || v == JSON_VARIANT_MAGIC_FALSE)
+ return JSON_VARIANT_BOOLEAN;
+
+ if (v == JSON_VARIANT_MAGIC_NULL)
+ return JSON_VARIANT_NULL;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER)
+ return JSON_VARIANT_INTEGER;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED)
+ return JSON_VARIANT_UNSIGNED;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return JSON_VARIANT_REAL;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_STRING)
+ return JSON_VARIANT_STRING;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY)
+ return JSON_VARIANT_ARRAY;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return JSON_VARIANT_OBJECT;
+
+ return v->type;
+}
+
+bool json_variant_has_type(JsonVariant *v, JsonVariantType type) {
+ JsonVariantType rt;
+
+ v = json_variant_dereference(v);
+ if (!v)
+ return false;
+
+ rt = json_variant_type(v);
+ if (rt == type)
+ return true;
+
+ /* If it's a const string, then it only can be a string, and if it is not, it's not */
+ if (json_variant_is_const_string(v))
+ return false;
+
+ /* All three magic zeroes qualify as integer, unsigned and as real */
+ if ((v == JSON_VARIANT_MAGIC_ZERO_INTEGER || v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || v == JSON_VARIANT_MAGIC_ZERO_REAL) &&
+ IN_SET(type, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL, JSON_VARIANT_NUMBER))
+ return true;
+
+ /* All other magic variant types are only equal to themselves */
+ if (json_variant_is_magic(v))
+ return false;
+
+ /* Handle the "number" pseudo type */
+ if (type == JSON_VARIANT_NUMBER)
+ return IN_SET(rt, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL);
+
+ /* Integer conversions are OK in many cases */
+ if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_UNSIGNED)
+ return v->value.integer >= 0;
+ if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_INTEGER)
+ return v->value.unsig <= INTMAX_MAX;
+
+ /* Any integer that can be converted lossley to a real and back may also be considered a real */
+ if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_REAL)
+ return (intmax_t) (long double) v->value.integer == v->value.integer;
+ if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_REAL)
+ return (uintmax_t) (long double) v->value.unsig == v->value.unsig;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ /* Any real that can be converted losslessly to an integer and back may also be considered an integer */
+ if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_INTEGER)
+ return (long double) (intmax_t) v->value.real == v->value.real;
+ if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_UNSIGNED)
+ return (long double) (uintmax_t) v->value.real == v->value.real;
+#pragma GCC diagnostic pop
+
+ return false;
+}
+
+size_t json_variant_elements(JsonVariant *v) {
+ if (!v)
+ return 0;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY ||
+ v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_elements(v->reference);
+
+ return v->n_elements;
+
+mismatch:
+ log_debug("Number of elements in non-array/non-object JSON variant requested, returning 0.");
+ return 0;
+}
+
+JsonVariant *json_variant_by_index(JsonVariant *v, size_t idx) {
+ if (!v)
+ return NULL;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY ||
+ v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_by_index(v->reference, idx);
+ if (idx >= v->n_elements)
+ return NULL;
+
+ return json_variant_conservative_normalize(v + 1 + idx);
+
+mismatch:
+ log_debug("Element in non-array/non-object JSON variant requested by index, returning NULL.");
+ return NULL;
+}
+
+JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key) {
+ size_t i;
+
+ if (!v)
+ goto not_found;
+ if (!key)
+ goto not_found;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ goto not_found;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->type != JSON_VARIANT_OBJECT)
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_by_key(v->reference, key);
+
+ for (i = 0; i < v->n_elements; i += 2) {
+ JsonVariant *p;
+
+ p = json_variant_dereference(v + 1 + i);
+
+ if (!json_variant_has_type(p, JSON_VARIANT_STRING))
+ continue;
+
+ if (streq(json_variant_string(p), key)) {
+
+ if (ret_key)
+ *ret_key = json_variant_conservative_normalize(v + 1 + i);
+
+ return json_variant_conservative_normalize(v + 1 + i + 1);
+ }
+ }
+
+not_found:
+ if (ret_key)
+ *ret_key = NULL;
+
+ return NULL;
+
+mismatch:
+ log_debug("Element in non-object JSON variant requested by key, returning NULL.");
+ if (ret_key)
+ *ret_key = NULL;
+
+ return NULL;
+}
+
+JsonVariant *json_variant_by_key(JsonVariant *v, const char *key) {
+ return json_variant_by_key_full(v, key, NULL);
+}
+
+bool json_variant_equal(JsonVariant *a, JsonVariant *b) {
+ JsonVariantType t;
+
+ a = json_variant_normalize(a);
+ b = json_variant_normalize(b);
+
+ if (a == b)
+ return true;
+
+ t = json_variant_type(a);
+ if (!json_variant_has_type(b, t))
+ return false;
+
+ switch (t) {
+
+ case JSON_VARIANT_STRING:
+ return streq(json_variant_string(a), json_variant_string(b));
+
+ case JSON_VARIANT_INTEGER:
+ return json_variant_integer(a) == json_variant_integer(b);
+
+ case JSON_VARIANT_UNSIGNED:
+ return json_variant_unsigned(a) == json_variant_unsigned(b);
+
+ case JSON_VARIANT_REAL:
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+ return json_variant_real(a) == json_variant_real(b);
+#pragma GCC diagnostic pop
+
+ case JSON_VARIANT_BOOLEAN:
+ return json_variant_boolean(a) == json_variant_boolean(b);
+
+ case JSON_VARIANT_NULL:
+ return true;
+
+ case JSON_VARIANT_ARRAY: {
+ size_t i, n;
+
+ n = json_variant_elements(a);
+ if (n != json_variant_elements(b))
+ return false;
+
+ for (i = 0; i < n; i++) {
+ if (!json_variant_equal(json_variant_by_index(a, i), json_variant_by_index(b, i)))
+ return false;
+ }
+
+ return true;
+ }
+
+ case JSON_VARIANT_OBJECT: {
+ size_t i, n;
+
+ n = json_variant_elements(a);
+ if (n != json_variant_elements(b))
+ return false;
+
+ /* Iterate through all keys in 'a' */
+ for (i = 0; i < n; i += 2) {
+ bool found = false;
+ size_t j;
+
+ /* Match them against all keys in 'b' */
+ for (j = 0; j < n; j += 2) {
+ JsonVariant *key_b;
+
+ key_b = json_variant_by_index(b, j);
+
+ /* During the first iteration unmark everything */
+ if (i == 0)
+ key_b->is_marked = false;
+ else if (key_b->is_marked) /* In later iterations if we already marked something, don't bother with it again */
+ continue;
+
+ if (found)
+ continue;
+
+ if (json_variant_equal(json_variant_by_index(a, i), key_b) &&
+ json_variant_equal(json_variant_by_index(a, i+1), json_variant_by_index(b, j+1))) {
+ /* Key and values match! */
+ key_b->is_marked = found = true;
+
+ /* In the first iteration we continue the inner loop since we want to mark
+ * everything, otherwise exit the loop quickly after we found what we were
+ * looking for. */
+ if (i != 0)
+ break;
+ }
+ }
+
+ if (!found)
+ return false;
+ }
+
+ return true;
+ }
+
+ default:
+ assert_not_reached("Unknown variant type.");
+ }
+}
+
+int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column) {
+ assert_return(v, -EINVAL);
+
+ if (ret_source)
+ *ret_source = json_variant_is_regular(v) && v->source ? v->source->name : NULL;
+
+ if (ret_line)
+ *ret_line = json_variant_is_regular(v) ? v->line : 0;
+
+ if (ret_column)
+ *ret_column = json_variant_is_regular(v) ? v->column : 0;
+
+ return 0;
+}
+
+static int print_source(FILE *f, JsonVariant *v, JsonFormatFlags flags, bool whitespace) {
+ size_t w, k;
+
+ if (!FLAGS_SET(flags, JSON_FORMAT_SOURCE|JSON_FORMAT_PRETTY))
+ return 0;
+
+ if (!json_variant_is_regular(v))
+ return 0;
+
+ if (!v->source && v->line == 0 && v->column == 0)
+ return 0;
+
+ /* The max width we need to format the line numbers for this source file */
+ w = (v->source && v->source->max_line > 0) ?
+ DECIMAL_STR_WIDTH(v->source->max_line) :
+ DECIMAL_STR_MAX(unsigned)-1;
+ k = (v->source && v->source->max_column > 0) ?
+ DECIMAL_STR_WIDTH(v->source->max_column) :
+ DECIMAL_STR_MAX(unsigned) -1;
+
+ if (whitespace) {
+ size_t i, n;
+
+ n = 1 + (v->source ? strlen(v->source->name) : 0) +
+ ((v->source && (v->line > 0 || v->column > 0)) ? 1 : 0) +
+ (v->line > 0 ? w : 0) +
+ (((v->source || v->line > 0) && v->column > 0) ? 1 : 0) +
+ (v->column > 0 ? k : 0) +
+ 2;
+
+ for (i = 0; i < n; i++)
+ fputc(' ', f);
+ } else {
+ fputc('[', f);
+
+ if (v->source)
+ fputs(v->source->name, f);
+ if (v->source && (v->line > 0 || v->column > 0))
+ fputc(':', f);
+ if (v->line > 0)
+ fprintf(f, "%*u", (int) w, v->line);
+ if ((v->source || v->line > 0) || v->column > 0)
+ fputc(':', f);
+ if (v->column > 0)
+ fprintf(f, "%*u", (int) k, v->column);
+
+ fputc(']', f);
+ fputc(' ', f);
+ }
+
+ return 0;
+}
+
+static int json_format(FILE *f, JsonVariant *v, JsonFormatFlags flags, const char *prefix) {
+ int r;
+
+ assert(f);
+ assert(v);
+
+ switch (json_variant_type(v)) {
+
+ case JSON_VARIANT_REAL: {
+ locale_t loc;
+
+ loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0);
+ if (loc == (locale_t) 0)
+ return -errno;
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%.*Le", DECIMAL_DIG, json_variant_real(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ freelocale(loc);
+ break;
+ }
+
+ case JSON_VARIANT_INTEGER:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%" PRIdMAX, json_variant_integer(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%" PRIuMAX, json_variant_unsigned(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT, f);
+
+ if (json_variant_boolean(v))
+ fputs("true", f);
+ else
+ fputs("false", f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ break;
+
+ case JSON_VARIANT_NULL:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT, f);
+
+ fputs("null", f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_STRING: {
+ const char *q;
+
+ fputc('"', f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_GREEN, f);
+
+ for (q = json_variant_string(v); *q; q++) {
+
+ switch (*q) {
+
+ case '"':
+ fputs("\\\"", f);
+ break;
+
+ case '\\':
+ fputs("\\\\", f);
+ break;
+
+ case '\b':
+ fputs("\\b", f);
+ break;
+
+ case '\f':
+ fputs("\\f", f);
+ break;
+
+ case '\n':
+ fputs("\\n", f);
+ break;
+
+ case '\r':
+ fputs("\\r", f);
+ break;
+
+ case '\t':
+ fputs("\\t", f);
+ break;
+
+ default:
+ if ((signed char) *q >= 0 && *q < ' ')
+ fprintf(f, "\\u%04x", *q);
+ else
+ fputc(*q, f);
+ break;
+ }
+ }
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ fputc('"', f);
+ break;
+ }
+
+ case JSON_VARIANT_ARRAY: {
+ size_t i, n;
+
+ n = json_variant_elements(v);
+
+ if (n == 0)
+ fputs("[]", f);
+ else {
+ _cleanup_free_ char *joined = NULL;
+ const char *prefix2;
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ joined = strjoin(strempty(prefix), "\t");
+ if (!joined)
+ return -ENOMEM;
+
+ prefix2 = joined;
+ fputs("[\n", f);
+ } else {
+ prefix2 = strempty(prefix);
+ fputc('[', f);
+ }
+
+ for (i = 0; i < n; i++) {
+ JsonVariant *e;
+
+ assert_se(e = json_variant_by_index(v, i));
+
+ if (i > 0) {
+ if (flags & JSON_FORMAT_PRETTY)
+ fputs(",\n", f);
+ else
+ fputc(',', f);
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ print_source(f, e, flags, false);
+ fputs(prefix2, f);
+ }
+
+ r = json_format(f, e, flags, prefix2);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ fputc('\n', f);
+ print_source(f, v, flags, true);
+ fputs(strempty(prefix), f);
+ }
+
+ fputc(']', f);
+ }
+ break;
+ }
+
+ case JSON_VARIANT_OBJECT: {
+ size_t i, n;
+
+ n = json_variant_elements(v);
+
+ if (n == 0)
+ fputs("{}", f);
+ else {
+ _cleanup_free_ char *joined = NULL;
+ const char *prefix2;
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ joined = strjoin(strempty(prefix), "\t");
+ if (!joined)
+ return -ENOMEM;
+
+ prefix2 = joined;
+ fputs("{\n", f);
+ } else {
+ prefix2 = strempty(prefix);
+ fputc('{', f);
+ }
+
+ for (i = 0; i < n; i += 2) {
+ JsonVariant *e;
+
+ e = json_variant_by_index(v, i);
+
+ if (i > 0) {
+ if (flags & JSON_FORMAT_PRETTY)
+ fputs(",\n", f);
+ else
+ fputc(',', f);
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ print_source(f, e, flags, false);
+ fputs(prefix2, f);
+ }
+
+ r = json_format(f, e, flags, prefix2);
+ if (r < 0)
+ return r;
+
+ fputs(flags & JSON_FORMAT_PRETTY ? " : " : ":", f);
+
+ r = json_format(f, json_variant_by_index(v, i+1), flags, prefix2);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ fputc('\n', f);
+ print_source(f, v, flags, true);
+ fputs(strempty(prefix), f);
+ }
+
+ fputc('}', f);
+ }
+ break;
+ }
+
+ default:
+ assert_not_reached("Unexpected variant type.");
+ }
+
+ return 0;
+}
+
+int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t sz = 0;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ f = open_memstream(&s, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+
+ json_variant_dump(v, flags, f, NULL);
+
+ r = fflush_and_check(f);
+ }
+ if (r < 0)
+ return r;
+
+ assert(s);
+ *ret = TAKE_PTR(s);
+
+ return (int) sz;
+}
+
+void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix) {
+ if (!v)
+ return;
+
+ if (!f)
+ f = stdout;
+
+ print_source(f, v, flags, false);
+
+ if (((flags & (JSON_FORMAT_COLOR_AUTO|JSON_FORMAT_COLOR)) == JSON_FORMAT_COLOR_AUTO) && colors_enabled())
+ flags |= JSON_FORMAT_COLOR;
+
+ if (flags & JSON_FORMAT_SSE)
+ fputs("data: ", f);
+ if (flags & JSON_FORMAT_SEQ)
+ fputc('\x1e', f); /* ASCII Record Separator */
+
+ json_format(f, v, flags, prefix);
+
+ if (flags & (JSON_FORMAT_PRETTY|JSON_FORMAT_SEQ|JSON_FORMAT_SSE|JSON_FORMAT_NEWLINE))
+ fputc('\n', f);
+ if (flags & JSON_FORMAT_SSE)
+ fputc('\n', f); /* In case of SSE add a second newline */
+}
+
+static int json_variant_copy(JsonVariant **nv, JsonVariant *v) {
+ JsonVariantType t;
+ JsonVariant *c;
+ JsonValue value;
+ const void *source;
+ size_t k;
+
+ assert(nv);
+ assert(v);
+
+ /* Let's copy the simple types literally, and the larger types by references */
+ t = json_variant_type(v);
+ switch (t) {
+ case JSON_VARIANT_INTEGER:
+ k = sizeof(intmax_t);
+ value.integer = json_variant_integer(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ k = sizeof(uintmax_t);
+ value.unsig = json_variant_unsigned(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_REAL:
+ k = sizeof(long double);
+ value.real = json_variant_real(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+ k = sizeof(bool);
+ value.boolean = json_variant_boolean(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_NULL:
+ k = 0;
+ source = NULL;
+ break;
+
+ case JSON_VARIANT_STRING:
+ source = json_variant_string(v);
+ k = strnlen(source, INLINE_STRING_MAX + 1);
+ if (k <= INLINE_STRING_MAX) {
+ k ++;
+ break;
+ }
+
+ _fallthrough_;
+
+ default:
+ /* Everything else copy by reference */
+
+ c = malloc0(offsetof(JsonVariant, reference) + sizeof(JsonVariant*));
+ if (!c)
+ return -ENOMEM;
+
+ c->n_ref = 1;
+ c->type = t;
+ c->is_reference = true;
+ c->reference = json_variant_ref(json_variant_normalize(v));
+
+ *nv = c;
+ return 0;
+ }
+
+ c = malloc0(offsetof(JsonVariant, value) + k);
+ if (!c)
+ return -ENOMEM;
+
+ c->n_ref = 1;
+ c->type = t;
+
+ memcpy_safe(&c->value, source, k);
+
+ *nv = c;
+ return 0;
+}
+
+static bool json_single_ref(JsonVariant *v) {
+
+ /* Checks whether the caller is the single owner of the object, i.e. can get away with changing it */
+
+ if (!json_variant_is_regular(v))
+ return false;
+
+ if (v->is_embedded)
+ return json_single_ref(v->parent);
+
+ assert(v->n_ref > 0);
+ return v->n_ref == 1;
+}
+
+static int json_variant_set_source(JsonVariant **v, JsonSource *source, unsigned line, unsigned column) {
+ JsonVariant *w;
+ int r;
+
+ assert(v);
+
+ /* Patch in source and line/column number. Tries to do this in-place if the caller is the sole referencer of
+ * the object. If not, allocates a new object, possibly a surrogate for the original one */
+
+ if (!*v)
+ return 0;
+
+ if (source && line > source->max_line)
+ source->max_line = line;
+ if (source && column > source->max_column)
+ source->max_column = column;
+
+ if (!json_variant_is_regular(*v)) {
+
+ if (!source && line == 0 && column == 0)
+ return 0;
+
+ } else {
+ if (json_source_equal((*v)->source, source) &&
+ (*v)->line == line &&
+ (*v)->column == column)
+ return 0;
+
+ if (json_single_ref(*v)) { /* Sole reference? */
+ json_source_unref((*v)->source);
+ (*v)->source = json_source_ref(source);
+ (*v)->line = line;
+ (*v)->column = column;
+ return 1;
+ }
+ }
+
+ r = json_variant_copy(&w, *v);
+ if (r < 0)
+ return r;
+
+ assert(json_variant_is_regular(w));
+ assert(!w->is_embedded);
+ assert(w->n_ref == 1);
+ assert(!w->source);
+
+ w->source = json_source_ref(source);
+ w->line = line;
+ w->column = column;
+
+ json_variant_unref(*v);
+ *v = w;
+
+ return 1;
+}
+
+static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, size_t n) {
+ assert(line);
+ assert(column);
+ assert(s || n == 0);
+
+ while (n > 0) {
+
+ if (*s == '\n') {
+ (*line)++;
+ *column = 1;
+ } else if ((signed char) *s >= 0 && *s < 127) /* Process ASCII chars quickly */
+ (*column)++;
+ else {
+ int w;
+
+ w = utf8_encoded_valid_unichar(s);
+ if (w < 0) /* count invalid unichars as normal characters */
+ w = 1;
+ else if ((size_t) w > n) /* never read more than the specified number of characters */
+ w = (int) n;
+
+ (*column)++;
+
+ s += w;
+ n -= w;
+ continue;
+ }
+
+ s++;
+ n--;
+ }
+}
+
+static int unhex_ucs2(const char *c, uint16_t *ret) {
+ int aa, bb, cc, dd;
+ uint16_t x;
+
+ assert(c);
+ assert(ret);
+
+ aa = unhexchar(c[0]);
+ if (aa < 0)
+ return -EINVAL;
+
+ bb = unhexchar(c[1]);
+ if (bb < 0)
+ return -EINVAL;
+
+ cc = unhexchar(c[2]);
+ if (cc < 0)
+ return -EINVAL;
+
+ dd = unhexchar(c[3]);
+ if (dd < 0)
+ return -EINVAL;
+
+ x = ((uint16_t) aa << 12) |
+ ((uint16_t) bb << 8) |
+ ((uint16_t) cc << 4) |
+ ((uint16_t) dd);
+
+ if (x <= 0)
+ return -EINVAL;
+
+ *ret = x;
+
+ return 0;
+}
+
+static int json_parse_string(const char **p, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t n = 0, allocated = 0;
+ const char *c;
+
+ assert(p);
+ assert(*p);
+ assert(ret);
+
+ c = *p;
+
+ if (*c != '"')
+ return -EINVAL;
+
+ c++;
+
+ for (;;) {
+ int len;
+
+ /* Check for EOF */
+ if (*c == 0)
+ return -EINVAL;
+
+ /* Check for control characters 0x00..0x1f */
+ if (*c > 0 && *c < ' ')
+ return -EINVAL;
+
+ /* Check for control character 0x7f */
+ if (*c == 0x7f)
+ return -EINVAL;
+
+ if (*c == '"') {
+ if (!s) {
+ s = strdup("");
+ if (!s)
+ return -ENOMEM;
+ } else
+ s[n] = 0;
+
+ *p = c + 1;
+
+ *ret = TAKE_PTR(s);
+ return JSON_TOKEN_STRING;
+ }
+
+ if (*c == '\\') {
+ char ch = 0;
+ c++;
+
+ if (*c == 0)
+ return -EINVAL;
+
+ if (IN_SET(*c, '"', '\\', '/'))
+ ch = *c;
+ else if (*c == 'b')
+ ch = '\b';
+ else if (*c == 'f')
+ ch = '\f';
+ else if (*c == 'n')
+ ch = '\n';
+ else if (*c == 'r')
+ ch = '\r';
+ else if (*c == 't')
+ ch = '\t';
+ else if (*c == 'u') {
+ char16_t x;
+ int r;
+
+ r = unhex_ucs2(c + 1, &x);
+ if (r < 0)
+ return r;
+
+ c += 5;
+
+ if (!GREEDY_REALLOC(s, allocated, n + 5))
+ return -ENOMEM;
+
+ if (!utf16_is_surrogate(x))
+ n += utf8_encode_unichar(s + n, (char32_t) x);
+ else if (utf16_is_trailing_surrogate(x))
+ return -EINVAL;
+ else {
+ char16_t y;
+
+ if (c[0] != '\\' || c[1] != 'u')
+ return -EINVAL;
+
+ r = unhex_ucs2(c + 2, &y);
+ if (r < 0)
+ return r;
+
+ c += 6;
+
+ if (!utf16_is_trailing_surrogate(y))
+ return -EINVAL;
+
+ n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y));
+ }
+
+ continue;
+ } else
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(s, allocated, n + 2))
+ return -ENOMEM;
+
+ s[n++] = ch;
+ c ++;
+ continue;
+ }
+
+ len = utf8_encoded_valid_unichar(c);
+ if (len < 0)
+ return len;
+
+ if (!GREEDY_REALLOC(s, allocated, n + len + 1))
+ return -ENOMEM;
+
+ memcpy(s + n, c, len);
+ n += len;
+ c += len;
+ }
+}
+
+static int json_parse_number(const char **p, JsonValue *ret) {
+ bool negative = false, exponent_negative = false, is_real = false;
+ long double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0;
+ intmax_t i = 0;
+ uintmax_t u = 0;
+ const char *c;
+
+ assert(p);
+ assert(*p);
+ assert(ret);
+
+ c = *p;
+
+ if (*c == '-') {
+ negative = true;
+ c++;
+ }
+
+ if (*c == '0')
+ c++;
+ else {
+ if (!strchr("123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ if (!is_real) {
+ if (negative) {
+
+ if (i < INTMAX_MIN / 10) /* overflow */
+ is_real = true;
+ else {
+ intmax_t t = 10 * i;
+
+ if (t < INTMAX_MIN + (*c - '0')) /* overflow */
+ is_real = true;
+ else
+ i = t - (*c - '0');
+ }
+ } else {
+ if (u > UINTMAX_MAX / 10) /* overflow */
+ is_real = true;
+ else {
+ uintmax_t t = 10 * u;
+
+ if (t > UINTMAX_MAX - (*c - '0')) /* overflow */
+ is_real = true;
+ else
+ u = t + (*c - '0');
+ }
+ }
+ }
+
+ x = 10.0 * x + (*c - '0');
+
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ if (*c == '.') {
+ is_real = true;
+ c++;
+
+ if (!strchr("0123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ y = 10.0 * y + (*c - '0');
+ shift = 10.0 * shift;
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ if (IN_SET(*c, 'e', 'E')) {
+ is_real = true;
+ c++;
+
+ if (*c == '-') {
+ exponent_negative = true;
+ c++;
+ } else if (*c == '+')
+ c++;
+
+ if (!strchr("0123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ exponent = 10.0 * exponent + (*c - '0');
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ *p = c;
+
+ if (is_real) {
+ ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10l((exponent_negative ? -1.0 : 1.0) * exponent);
+ return JSON_TOKEN_REAL;
+ } else if (negative) {
+ ret->integer = i;
+ return JSON_TOKEN_INTEGER;
+ } else {
+ ret->unsig = u;
+ return JSON_TOKEN_UNSIGNED;
+ }
+}
+
+int json_tokenize(
+ const char **p,
+ char **ret_string,
+ JsonValue *ret_value,
+ unsigned *ret_line, /* 'ret_line' returns the line at the beginning of this token */
+ unsigned *ret_column,
+ void **state,
+ unsigned *line, /* 'line' is used as a line state, it always reflect the line we are at after the token was read */
+ unsigned *column) {
+
+ unsigned start_line, start_column;
+ const char *start, *c;
+ size_t n;
+ int t, r;
+
+ enum {
+ STATE_NULL,
+ STATE_VALUE,
+ STATE_VALUE_POST,
+ };
+
+ assert(p);
+ assert(*p);
+ assert(ret_string);
+ assert(ret_value);
+ assert(ret_line);
+ assert(ret_column);
+ assert(line);
+ assert(column);
+ assert(state);
+
+ t = PTR_TO_INT(*state);
+ if (t == STATE_NULL) {
+ *line = 1;
+ *column = 1;
+ t = STATE_VALUE;
+ }
+
+ /* Skip over the whitespace */
+ n = strspn(*p, WHITESPACE);
+ inc_lines_columns(line, column, *p, n);
+ c = *p + n;
+
+ /* Remember where we started processing this token */
+ start = c;
+ start_line = *line;
+ start_column = *column;
+
+ if (*c == 0) {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ r = JSON_TOKEN_END;
+ goto finish;
+ }
+
+ switch (t) {
+
+ case STATE_VALUE:
+
+ if (*c == '{') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_OBJECT_OPEN;
+ goto null_return;
+
+ } else if (*c == '}') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_OBJECT_CLOSE;
+ goto null_return;
+
+ } else if (*c == '[') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_ARRAY_OPEN;
+ goto null_return;
+
+ } else if (*c == ']') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_ARRAY_CLOSE;
+ goto null_return;
+
+ } else if (*c == '"') {
+
+ r = json_parse_string(&c, ret_string);
+ if (r < 0)
+ return r;
+
+ *ret_value = JSON_VALUE_NULL;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ goto finish;
+
+ } else if (strchr("-0123456789", *c)) {
+
+ r = json_parse_number(&c, ret_value);
+ if (r < 0)
+ return r;
+
+ *ret_string = NULL;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ goto finish;
+
+ } else if (startswith(c, "true")) {
+ *ret_string = NULL;
+ ret_value->boolean = true;
+ c += 4;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_BOOLEAN;
+ goto finish;
+
+ } else if (startswith(c, "false")) {
+ *ret_string = NULL;
+ ret_value->boolean = false;
+ c += 5;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_BOOLEAN;
+ goto finish;
+
+ } else if (startswith(c, "null")) {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ c += 4;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_NULL;
+ goto finish;
+
+ }
+
+ return -EINVAL;
+
+ case STATE_VALUE_POST:
+
+ if (*c == ':') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_COLON;
+ goto null_return;
+
+ } else if (*c == ',') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_COMMA;
+ goto null_return;
+
+ } else if (*c == '}') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_OBJECT_CLOSE;
+ goto null_return;
+
+ } else if (*c == ']') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_ARRAY_CLOSE;
+ goto null_return;
+ }
+
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unexpected tokenizer state");
+ }
+
+null_return:
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+
+finish:
+ inc_lines_columns(line, column, start, c - start);
+ *p = c;
+
+ *ret_line = start_line;
+ *ret_column = start_column;
+
+ return r;
+}
+
+typedef enum JsonExpect {
+ /* The following values are used by json_parse() */
+ EXPECT_TOPLEVEL,
+ EXPECT_END,
+ EXPECT_OBJECT_FIRST_KEY,
+ EXPECT_OBJECT_NEXT_KEY,
+ EXPECT_OBJECT_COLON,
+ EXPECT_OBJECT_VALUE,
+ EXPECT_OBJECT_COMMA,
+ EXPECT_ARRAY_FIRST_ELEMENT,
+ EXPECT_ARRAY_NEXT_ELEMENT,
+ EXPECT_ARRAY_COMMA,
+
+ /* And these are used by json_build() */
+ EXPECT_ARRAY_ELEMENT,
+ EXPECT_OBJECT_KEY,
+} JsonExpect;
+
+typedef struct JsonStack {
+ JsonExpect expect;
+ JsonVariant **elements;
+ size_t n_elements, n_elements_allocated;
+ unsigned line_before;
+ unsigned column_before;
+ size_t n_suppress; /* When building: if > 0, suppress this many subsequent elements. If == (size_t) -1, suppress all subsequent elements */
+} JsonStack;
+
+static void json_stack_release(JsonStack *s) {
+ assert(s);
+
+ json_variant_unref_many(s->elements, s->n_elements);
+ s->elements = mfree(s->elements);
+}
+
+static int json_parse_internal(
+ const char **input,
+ JsonSource *source,
+ JsonVariant **ret,
+ unsigned *line,
+ unsigned *column,
+ bool continue_end) {
+
+ size_t n_stack = 1, n_stack_allocated = 0, i;
+ unsigned line_buffer = 0, column_buffer = 0;
+ void *tokenizer_state = NULL;
+ JsonStack *stack = NULL;
+ const char *p;
+ int r;
+
+ assert_return(input, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ p = *input;
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack))
+ return -ENOMEM;
+
+ stack[0] = (JsonStack) {
+ .expect = EXPECT_TOPLEVEL,
+ };
+
+ if (!line)
+ line = &line_buffer;
+ if (!column)
+ column = &column_buffer;
+
+ for (;;) {
+ _cleanup_free_ char *string = NULL;
+ unsigned line_token, column_token;
+ JsonVariant *add = NULL;
+ JsonStack *current;
+ JsonValue value;
+ int token;
+
+ assert(n_stack > 0);
+ current = stack + n_stack - 1;
+
+ if (continue_end && current->expect == EXPECT_END)
+ goto done;
+
+ token = json_tokenize(&p, &string, &value, &line_token, &column_token, &tokenizer_state, line, column);
+ if (token < 0) {
+ r = token;
+ goto finish;
+ }
+
+ switch (token) {
+
+ case JSON_TOKEN_END:
+ if (current->expect != EXPECT_END) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(current->n_elements == 1);
+ assert(n_stack == 1);
+ goto done;
+
+ case JSON_TOKEN_COLON:
+
+ if (current->expect != EXPECT_OBJECT_COLON) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+
+ case JSON_TOKEN_COMMA:
+
+ if (current->expect == EXPECT_OBJECT_COMMA)
+ current->expect = EXPECT_OBJECT_NEXT_KEY;
+ else if (current->expect == EXPECT_ARRAY_COMMA)
+ current->expect = EXPECT_ARRAY_NEXT_ELEMENT;
+ else {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ break;
+
+ case JSON_TOKEN_OBJECT_OPEN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ /* Prepare the expect for when we return from the child */
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_OBJECT_FIRST_KEY,
+ .line_before = line_token,
+ .column_before = column_token,
+ };
+
+ current = stack + n_stack - 1;
+ break;
+
+ case JSON_TOKEN_OBJECT_CLOSE:
+ if (!IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_COMMA)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ r = json_variant_new_object(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+
+ line_token = current->line_before;
+ column_token = current->column_before;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case JSON_TOKEN_ARRAY_OPEN:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ /* Prepare the expect for when we return from the child */
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_ARRAY_FIRST_ELEMENT,
+ .line_before = line_token,
+ .column_before = column_token,
+ };
+
+ break;
+
+ case JSON_TOKEN_ARRAY_CLOSE:
+ if (!IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_COMMA)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ r = json_variant_new_array(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+
+ line_token = current->line_before;
+ column_token = current->column_before;
+
+ json_stack_release(current);
+ n_stack--, current--;
+ break;
+
+ case JSON_TOKEN_STRING:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_string(&add, string);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY))
+ current->expect = EXPECT_OBJECT_COLON;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_REAL:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_real(&add, value.real);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_INTEGER:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_integer(&add, value.integer);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_UNSIGNED:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_unsigned(&add, value.unsig);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_BOOLEAN:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_boolean(&add, value.boolean);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_NULL:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_null(&add);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected token");
+ }
+
+ if (add) {
+ (void) json_variant_set_source(&add, source, line_token, column_token);
+
+ if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ current->elements[current->n_elements++] = add;
+ }
+ }
+
+done:
+ assert(n_stack == 1);
+ assert(stack[0].n_elements == 1);
+
+ *ret = json_variant_ref(stack[0].elements[0]);
+ *input = p;
+ r = 0;
+
+finish:
+ for (i = 0; i < n_stack; i++)
+ json_stack_release(stack + i);
+
+ free(stack);
+
+ return r;
+}
+
+int json_parse(const char *input, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ return json_parse_internal(&input, NULL, ret, ret_line, ret_column, false);
+}
+
+int json_parse_continue(const char **p, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ return json_parse_internal(p, NULL, ret, ret_line, ret_column, true);
+}
+
+int json_parse_file(FILE *f, const char *path, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ _cleanup_(json_source_unrefp) JsonSource *source = NULL;
+ _cleanup_free_ char *text = NULL;
+ const char *p;
+ int r;
+
+ if (f)
+ r = read_full_stream(f, &text, NULL);
+ else if (path)
+ r = read_full_file(path, &text, NULL);
+ else
+ return -EINVAL;
+ if (r < 0)
+ return r;
+
+ if (path) {
+ source = json_source_new(path);
+ if (!source)
+ return -ENOMEM;
+ }
+
+ p = text;
+ return json_parse_internal(&p, source, ret, ret_line, ret_column, false);
+}
+
+int json_buildv(JsonVariant **ret, va_list ap) {
+ JsonStack *stack = NULL;
+ size_t n_stack = 1, n_stack_allocated = 0, i;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack))
+ return -ENOMEM;
+
+ stack[0] = (JsonStack) {
+ .expect = EXPECT_TOPLEVEL,
+ };
+
+ for (;;) {
+ _cleanup_(json_variant_unrefp) JsonVariant *add = NULL;
+ size_t n_subtract = 0; /* how much to subtract from current->n_suppress, i.e. how many elements would
+ * have been added to the current variant */
+ JsonStack *current;
+ int command;
+
+ assert(n_stack > 0);
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_END)
+ goto done;
+
+ command = va_arg(ap, int);
+
+ switch (command) {
+
+ case _JSON_BUILD_STRING: {
+ const char *p;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ p = va_arg(ap, const char *);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_string(&add, p);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_INTEGER: {
+ intmax_t j;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ j = va_arg(ap, intmax_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_integer(&add, j);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_UNSIGNED: {
+ uintmax_t j;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ j = va_arg(ap, uintmax_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_unsigned(&add, j);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_REAL: {
+ long double d;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ d = va_arg(ap, long double);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_real(&add, d);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_BOOLEAN: {
+ bool b;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ b = va_arg(ap, int);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_boolean(&add, b);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_NULL:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_null(&add);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+
+ case _JSON_BUILD_VARIANT:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ /* Note that we don't care for current->n_suppress here, after all the variant is already
+ * allocated anyway... */
+ add = va_arg(ap, JsonVariant*);
+ if (!add)
+ add = JSON_VARIANT_MAGIC_NULL;
+ else
+ json_variant_ref(add);
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+
+ case _JSON_BUILD_LITERAL: {
+ const char *l;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ l = va_arg(ap, const char *);
+
+ if (l) {
+ /* Note that we don't care for current->n_suppress here, we should generate parsing
+ * errors even in suppressed object properties */
+
+ r = json_parse(l, &add, NULL, NULL);
+ if (r < 0)
+ goto finish;
+ } else
+ add = JSON_VARIANT_MAGIC_NULL;
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_ARRAY_BEGIN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_ARRAY_ELEMENT,
+ .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the
+ * new array, then we should
+ * also suppress all array
+ * members */
+ };
+
+ break;
+
+ case _JSON_BUILD_ARRAY_END:
+ if (current->expect != EXPECT_ARRAY_ELEMENT) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case _JSON_BUILD_STRV: {
+ char **l;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ l = va_arg(ap, char **);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array_strv(&add, l);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_OBJECT_BEGIN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_OBJECT_KEY,
+ .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the
+ * new object, then we should
+ * also suppress all object
+ * members */
+ };
+
+ break;
+
+ case _JSON_BUILD_OBJECT_END:
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_object(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case _JSON_BUILD_PAIR: {
+ const char *n;
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ n = va_arg(ap, const char *);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_string(&add, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+ }
+
+ case _JSON_BUILD_PAIR_CONDITION: {
+ const char *n;
+ bool b;
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ b = va_arg(ap, int);
+ n = va_arg(ap, const char *);
+
+ if (b && current->n_suppress == 0) {
+ r = json_variant_new_string(&add, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1; /* we generated one item */
+
+ if (!b && current->n_suppress != (size_t) -1)
+ current->n_suppress += 2; /* Suppress this one and the next item */
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+ }}
+
+ /* If a variant was generated, add it to our current variant, but only if we are not supposed to suppress additions */
+ if (add && current->n_suppress == 0) {
+ if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ current->elements[current->n_elements++] = TAKE_PTR(add);
+ }
+
+ /* If we are supposed to suppress items, let's subtract how many items where generated from that
+ * counter. Except if the counter is (size_t) -1, i.e. we shall suppress an infinite number of elements
+ * on this stack level */
+ if (current->n_suppress != (size_t) -1) {
+ if (current->n_suppress <= n_subtract) /* Saturated */
+ current->n_suppress = 0;
+ else
+ current->n_suppress -= n_subtract;
+ }
+ }
+
+done:
+ assert(n_stack == 1);
+ assert(stack[0].n_elements == 1);
+
+ *ret = json_variant_ref(stack[0].elements[0]);
+ r = 0;
+
+finish:
+ for (i = 0; i < n_stack; i++)
+ json_stack_release(stack + i);
+
+ free(stack);
+
+ va_end(ap);
+
+ return r;
+}
+
+int json_build(JsonVariant **ret, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, ret);
+ r = json_buildv(ret, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int json_log_internal(
+ JsonVariant *variant,
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ PROTECT_ERRNO;
+
+ unsigned source_line, source_column;
+ char buffer[LINE_MAX];
+ const char *source;
+ va_list ap;
+ int r;
+
+ errno = ERRNO_VALUE(error);
+
+ va_start(ap, format);
+ (void) vsnprintf(buffer, sizeof buffer, format, ap);
+ va_end(ap);
+
+ if (variant) {
+ r = json_variant_get_source(variant, &source, &source_line, &source_column);
+ if (r < 0)
+ return r;
+ } else {
+ source = NULL;
+ source_line = 0;
+ source_column = 0;
+ }
+
+ if (source && source_line > 0 && source_column > 0)
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ "CONFIG_FILE=%s", source,
+ "CONFIG_LINE=%u", source_line,
+ "CONFIG_COLUMN=%u", source_column,
+ LOG_MESSAGE("%s:%u: %s", source, line, buffer),
+ NULL);
+ else
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ LOG_MESSAGE("%s", buffer),
+ NULL);
+}
+
+int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata) {
+ const JsonDispatch *p;
+ size_t i, n, m;
+ int r, done = 0;
+ bool *found;
+
+ if (!json_variant_is_object(v)) {
+ json_log(v, flags, 0, "JSON variant is not an object.");
+
+ if (flags & JSON_PERMISSIVE)
+ return 0;
+
+ return -EINVAL;
+ }
+
+ for (p = table, m = 0; p->name; p++)
+ m++;
+
+ found = newa0(bool, m);
+
+ n = json_variant_elements(v);
+ for (i = 0; i < n; i += 2) {
+ JsonVariant *key, *value;
+
+ assert_se(key = json_variant_by_index(v, i));
+ assert_se(value = json_variant_by_index(v, i+1));
+
+ for (p = table; p->name; p++)
+ if (p->name == (const char*) -1 ||
+ streq_ptr(json_variant_string(key), p->name))
+ break;
+
+ if (p->name) { /* Found a matching entry! :-) */
+ JsonDispatchFlags merged_flags;
+
+ merged_flags = flags | p->flags;
+
+ if (p->type != _JSON_VARIANT_TYPE_INVALID &&
+ !json_variant_has_type(value, p->type)) {
+
+ json_log(value, merged_flags, 0,
+ "Object field '%s' has wrong type %s, expected %s.", json_variant_string(key),
+ json_variant_type_to_string(json_variant_type(value)), json_variant_type_to_string(p->type));
+
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return -EINVAL;
+ }
+
+ if (found[p-table]) {
+ json_log(value, merged_flags, 0, "Duplicate object field '%s'.", json_variant_string(key));
+
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return -ENOTUNIQ;
+ }
+
+ found[p-table] = true;
+
+ if (p->callback) {
+ r = p->callback(json_variant_string(key), value, merged_flags, (uint8_t*) userdata + p->offset);
+ if (r < 0) {
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return r;
+ }
+ }
+
+ done ++;
+
+ } else { /* Didn't find a matching entry! :-( */
+
+ if (bad) {
+ r = bad(json_variant_string(key), value, flags, userdata);
+ if (r < 0) {
+ if (flags & JSON_PERMISSIVE)
+ continue;
+
+ return r;
+ } else
+ done ++;
+
+ } else {
+ json_log(value, flags, 0, "Unexpected object field '%s'.", json_variant_string(key));
+
+ if (flags & JSON_PERMISSIVE)
+ continue;
+
+ return -EADDRNOTAVAIL;
+ }
+ }
+ }
+
+ for (p = table; p->name; p++) {
+ JsonDispatchFlags merged_flags = p->flags | flags;
+
+ if ((merged_flags & JSON_MANDATORY) && !found[p-table]) {
+ json_log(v, merged_flags, 0, "Missing object field '%s'.", p->name);
+
+ if ((merged_flags & JSON_PERMISSIVE))
+ continue;
+
+ return -ENXIO;
+ }
+ }
+
+ return done;
+}
+
+int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ bool *b = userdata;
+
+ assert(variant);
+ assert(b);
+
+ if (!json_variant_is_boolean(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not a boolean.", strna(name));
+ return -EINVAL;
+ }
+
+ *b = json_variant_boolean(variant);
+ return 0;
+}
+
+int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int *b = userdata;
+
+ assert(variant);
+ assert(b);
+
+ if (!json_variant_is_boolean(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not a boolean.", strna(name));
+ return -EINVAL;
+ }
+
+ *b = json_variant_boolean(variant);
+ return 0;
+}
+
+int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ intmax_t *i = userdata;
+
+ assert(variant);
+ assert(i);
+
+ if (!json_variant_is_integer(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not an integer.", strna(name));
+ return -EINVAL;
+ }
+
+ *i = json_variant_integer(variant);
+ return 0;
+}
+
+int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uintmax_t *u = userdata;
+
+ assert(variant);
+ assert(u);
+
+ if (!json_variant_is_unsigned(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not an unsigned integer.", strna(name));
+ return -EINVAL;
+ }
+
+ *u = json_variant_unsigned(variant);
+ return 0;
+}
+
+int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uint32_t *u = userdata;
+
+ assert(variant);
+ assert(u);
+
+ if (!json_variant_is_unsigned(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not an unsigned integer.", strna(name));
+ return -EINVAL;
+ }
+
+ if (json_variant_unsigned(variant) > UINT32_MAX) {
+ json_log(variant, flags, 0, "JSON field '%s' out of bounds.", strna(name));
+ return -ERANGE;
+ }
+
+ *u = (uint32_t) json_variant_unsigned(variant);
+ return 0;
+}
+
+int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int32_t *i = userdata;
+
+ assert(variant);
+ assert(i);
+
+ if (!json_variant_is_integer(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not an integer.", strna(name));
+ return -EINVAL;
+ }
+
+ if (json_variant_integer(variant) < INT32_MIN || json_variant_integer(variant) > INT32_MAX) {
+ json_log(variant, flags, 0, "JSON field '%s' out of bounds.", strna(name));
+ return -ERANGE;
+ }
+
+ *i = (int32_t) json_variant_integer(variant);
+ return 0;
+}
+
+int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ int r;
+
+ assert(variant);
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant)) {
+ json_log(variant, flags, 0, "JSON field '%s' is not a string.", strna(name));
+ return -EINVAL;
+ }
+
+ r = free_and_strdup(s, json_variant_string(variant));
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***s = userdata;
+ size_t i;
+ int r;
+
+ assert(variant);
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = strv_free(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_array(variant)) {
+ json_log(variant, 0, flags, "JSON field '%s' is not an array.", strna(name));
+ return -EINVAL;
+ }
+
+ for (i = 0; i < json_variant_elements(variant); i++) {
+ JsonVariant *e;
+
+ assert_se(e = json_variant_by_index(variant, i));
+
+ if (!json_variant_is_string(e)) {
+ json_log(e, 0, flags, "JSON array element is not a string.");
+ return -EINVAL;
+ }
+
+ r = strv_extend(&l, json_variant_string(e));
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to append array element: %m");
+ }
+
+ strv_free_and_replace(*s, l);
+ return 0;
+}
+
+int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ JsonVariant **p = userdata;
+
+ assert(variant);
+ assert(p);
+
+ json_variant_unref(*p);
+ *p = json_variant_ref(variant);
+
+ return 0;
+}
+
+static const char* const json_variant_type_table[_JSON_VARIANT_TYPE_MAX] = {
+ [JSON_VARIANT_STRING] = "string",
+ [JSON_VARIANT_INTEGER] = "integer",
+ [JSON_VARIANT_UNSIGNED] = "unsigned",
+ [JSON_VARIANT_REAL] = "real",
+ [JSON_VARIANT_NUMBER] = "number",
+ [JSON_VARIANT_BOOLEAN] = "boolean",
+ [JSON_VARIANT_ARRAY] = "array",
+ [JSON_VARIANT_OBJECT] = "object",
+ [JSON_VARIANT_NULL] = "null",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(json_variant_type, JsonVariantType);
diff --git a/src/shared/json.h b/src/shared/json.h
new file mode 100644
index 0000000..f8e035c
--- /dev/null
+++ b/src/shared/json.h
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "util.h"
+
+/*
+ In case you wonder why we have our own JSON implementation, here are a couple of reasons why this implementation has
+ benefits over various other implementatins:
+
+ - We need support for 64bit signed and unsigned integers, i.e. the full 64,5bit range of -9223372036854775808…18446744073709551615
+ - All our variants are immutable after creation
+ - Special values such as true, false, zero, null, empty strings, empty array, empty objects require zero dynamic memory
+ - Progressive parsing
+ - Our integer/real type implicitly converts, but only if that's safe and loss-lessly possible
+ - There's a "builder" for putting together objects easily in varargs function calls
+ - There's a "dispatcher" for mapping objects to C data structures
+ - Every variant optionally carries parsing location information, which simplifies debugging and parse log error generation
+ - Formatter has color, line, column support
+
+ Limitations:
+ - Doesn't allow embedded NUL in strings
+ - Can't store integers outside of the -9223372036854775808…18446744073709551615 range (it will use 'long double' for
+ values outside this range, which is lossy)
+ - Can't store negative zero (will be treated identical to positive zero, and not retained across serialization)
+ - Can't store non-integer numbers that can't be stored in "long double" losslessly
+ - Allows creation and parsing of objects with duplicate keys. The "dispatcher" will refuse them however. This means
+ we can parse and pass around such objects, but will carefully refuse them when we convert them into our own data.
+
+ (These limitations should be pretty much in line with those of other JSON implementations, in fact might be less
+ limiting in most cases even.)
+*/
+
+typedef struct JsonVariant JsonVariant;
+
+typedef enum JsonVariantType {
+ JSON_VARIANT_STRING,
+ JSON_VARIANT_INTEGER,
+ JSON_VARIANT_UNSIGNED,
+ JSON_VARIANT_REAL,
+ JSON_VARIANT_NUMBER, /* This a pseudo-type: we can never create variants of this type, but we use it as wildcard check for the above three types */
+ JSON_VARIANT_BOOLEAN,
+ JSON_VARIANT_ARRAY,
+ JSON_VARIANT_OBJECT,
+ JSON_VARIANT_NULL,
+ _JSON_VARIANT_TYPE_MAX,
+ _JSON_VARIANT_TYPE_INVALID = -1
+} JsonVariantType;
+
+int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n);
+int json_variant_new_integer(JsonVariant **ret, intmax_t i);
+int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u);
+int json_variant_new_real(JsonVariant **ret, long double d);
+int json_variant_new_boolean(JsonVariant **ret, bool b);
+int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n);
+int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n);
+int json_variant_new_array_strv(JsonVariant **ret, char **l);
+int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n);
+int json_variant_new_null(JsonVariant **ret);
+
+static inline int json_variant_new_string(JsonVariant **ret, const char *s) {
+ return json_variant_new_stringn(ret, s, strlen_ptr(s));
+}
+
+JsonVariant *json_variant_ref(JsonVariant *v);
+JsonVariant *json_variant_unref(JsonVariant *v);
+void json_variant_unref_many(JsonVariant **array, size_t n);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(JsonVariant *, json_variant_unref);
+
+const char *json_variant_string(JsonVariant *v);
+intmax_t json_variant_integer(JsonVariant *v);
+uintmax_t json_variant_unsigned(JsonVariant *v);
+long double json_variant_real(JsonVariant *v);
+bool json_variant_boolean(JsonVariant *v);
+
+JsonVariantType json_variant_type(JsonVariant *v);
+bool json_variant_has_type(JsonVariant *v, JsonVariantType type);
+
+static inline bool json_variant_is_string(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_STRING);
+}
+
+static inline bool json_variant_is_integer(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_INTEGER);
+}
+
+static inline bool json_variant_is_unsigned(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_UNSIGNED);
+}
+
+static inline bool json_variant_is_real(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_REAL);
+}
+
+static inline bool json_variant_is_number(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_NUMBER);
+}
+
+static inline bool json_variant_is_boolean(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_BOOLEAN);
+}
+
+static inline bool json_variant_is_array(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_ARRAY);
+}
+
+static inline bool json_variant_is_object(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_OBJECT);
+}
+
+static inline bool json_variant_is_null(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_NULL);
+}
+
+bool json_variant_is_negative(JsonVariant *v);
+
+size_t json_variant_elements(JsonVariant *v);
+JsonVariant *json_variant_by_index(JsonVariant *v, size_t index);
+JsonVariant *json_variant_by_key(JsonVariant *v, const char *key);
+JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key);
+
+bool json_variant_equal(JsonVariant *a, JsonVariant *b);
+
+struct json_variant_foreach_state {
+ JsonVariant *variant;
+ size_t idx;
+};
+
+#define JSON_VARIANT_ARRAY_FOREACH(i, v) \
+ for (struct json_variant_foreach_state _state = { (v), 0 }; \
+ _state.idx < json_variant_elements(_state.variant) && \
+ ({ i = json_variant_by_index(_state.variant, _state.idx); \
+ true; }); \
+ _state.idx++)
+
+#define JSON_VARIANT_OBJECT_FOREACH(k, e, v) \
+ for (struct json_variant_foreach_state _state = { (v), 0 }; \
+ _state.idx < json_variant_elements(_state.variant) && \
+ ({ k = json_variant_by_index(_state.variant, _state.idx); \
+ e = json_variant_by_index(_state.variant, _state.idx + 1); \
+ true; }); \
+ _state.idx += 2)
+
+int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column);
+
+typedef enum JsonFormatFlags {
+ JSON_FORMAT_NEWLINE = 1 << 0, /* suffix with newline */
+ JSON_FORMAT_PRETTY = 1 << 1, /* add internal whitespace to appeal to human readers */
+ JSON_FORMAT_COLOR = 1 << 2, /* insert ANSI color sequences */
+ JSON_FORMAT_COLOR_AUTO = 1 << 3, /* insert ANSI color sequences if colors_enabled() says so */
+ JSON_FORMAT_SOURCE = 1 << 4, /* prefix with source filename/line/column */
+ JSON_FORMAT_SSE = 1 << 5, /* prefix/suffix with W3C server-sent events */
+ JSON_FORMAT_SEQ = 1 << 6, /* prefix/suffix with RFC 7464 application/json-seq */
+} JsonFormatFlags;
+
+int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret);
+void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix);
+
+int json_parse(const char *string, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+int json_parse_continue(const char **p, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+int json_parse_file(FILE *f, const char *path, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+
+enum {
+ _JSON_BUILD_STRING,
+ _JSON_BUILD_INTEGER,
+ _JSON_BUILD_UNSIGNED,
+ _JSON_BUILD_REAL,
+ _JSON_BUILD_BOOLEAN,
+ _JSON_BUILD_ARRAY_BEGIN,
+ _JSON_BUILD_ARRAY_END,
+ _JSON_BUILD_OBJECT_BEGIN,
+ _JSON_BUILD_OBJECT_END,
+ _JSON_BUILD_PAIR,
+ _JSON_BUILD_PAIR_CONDITION,
+ _JSON_BUILD_NULL,
+ _JSON_BUILD_VARIANT,
+ _JSON_BUILD_LITERAL,
+ _JSON_BUILD_STRV,
+ _JSON_BUILD_MAX,
+};
+
+#define JSON_BUILD_STRING(s) _JSON_BUILD_STRING, ({ const char *_x = s; _x; })
+#define JSON_BUILD_INTEGER(i) _JSON_BUILD_INTEGER, ({ intmax_t _x = i; _x; })
+#define JSON_BUILD_UNSIGNED(u) _JSON_BUILD_UNSIGNED, ({ uintmax_t _x = u; _x; })
+#define JSON_BUILD_REAL(d) _JSON_BUILD_REAL, ({ long double _x = d; _x; })
+#define JSON_BUILD_BOOLEAN(b) _JSON_BUILD_BOOLEAN, ({ bool _x = b; _x; })
+#define JSON_BUILD_ARRAY(...) _JSON_BUILD_ARRAY_BEGIN, __VA_ARGS__, _JSON_BUILD_ARRAY_END
+#define JSON_BUILD_OBJECT(...) _JSON_BUILD_OBJECT_BEGIN, __VA_ARGS__, _JSON_BUILD_OBJECT_END
+#define JSON_BUILD_PAIR(n, ...) _JSON_BUILD_PAIR, ({ const char *_x = n; _x; }), __VA_ARGS__
+#define JSON_BUILD_PAIR_CONDITION(c, n, ...) _JSON_BUILD_PAIR_CONDITION, ({ bool _x = c; _x; }), ({ const char *_x = n; _x; }), __VA_ARGS__
+#define JSON_BUILD_NULL _JSON_BUILD_NULL
+#define JSON_BUILD_VARIANT(v) _JSON_BUILD_VARIANT, ({ JsonVariant *_x = v; _x; })
+#define JSON_BUILD_LITERAL(l) _JSON_BUILD_LITERAL, ({ const char *_x = l; _x; })
+#define JSON_BUILD_STRV(l) _JSON_BUILD_STRV, ({ char **_x = l; _x; })
+
+int json_build(JsonVariant **ret, ...);
+int json_buildv(JsonVariant **ret, va_list ap);
+
+/* A bitmask of flags used by the dispatch logic. Note that this is a combined bit mask, that is generated from the bit
+ * mask originally passed into json_dispatch(), the individual bitmask associated with the static JsonDispatch callout
+ * entry, as well the bitmask specified for json_log() calls */
+typedef enum JsonDispatchFlags {
+ /* The following three may be set in JsonDispatch's .flags field or the json_dispatch() flags parameter */
+ JSON_PERMISSIVE = 1 << 0, /* Shall parsing errors be considered fatal for this property? */
+ JSON_MANDATORY = 1 << 1, /* Should existance of this property be mandatory? */
+ JSON_LOG = 1 << 2, /* Should the parser log about errors? */
+
+ /* The following two may be passed into log_json() in addition to the three above */
+ JSON_DEBUG = 1 << 3, /* Indicates that this log message is a debug message */
+ JSON_WARNING = 1 << 4, /* Indicates that this log message is a warning message */
+} JsonDispatchFlags;
+
+typedef int (*JsonDispatchCallback)(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+
+typedef struct JsonDispatch {
+ const char *name;
+ JsonVariantType type;
+ JsonDispatchCallback callback;
+ size_t offset;
+ JsonDispatchFlags flags;
+} JsonDispatch;
+
+int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata);
+
+int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+
+assert_cc(sizeof(uintmax_t) == sizeof(uint64_t))
+#define json_dispatch_uint64 json_dispatch_unsigned
+
+assert_cc(sizeof(intmax_t) == sizeof(int64_t))
+#define json_dispatch_int64 json_dispatch_integer
+
+static inline int json_dispatch_level(JsonDispatchFlags flags) {
+
+ /* Did the user request no logging? If so, then never log higher than LOG_DEBUG. Also, if this is marked as
+ * debug message, then also log at debug level. */
+
+ if (!(flags & JSON_LOG) ||
+ (flags & JSON_DEBUG))
+ return LOG_DEBUG;
+
+ /* Are we invoked in permissive mode, or is this explicitly marked as warning message? Then this should be
+ * printed at LOG_WARNING */
+ if (flags & (JSON_PERMISSIVE|JSON_WARNING))
+ return LOG_WARNING;
+
+ /* Otherwise it's an error. */
+ return LOG_ERR;
+}
+
+int json_log_internal(JsonVariant *variant, int level, int error, const char *file, int line, const char *func, const char *format, ...) _printf_(7, 8);
+
+#define json_log(variant, flags, error, ...) \
+ ({ \
+ int _level = json_dispatch_level(flags), _e = (error); \
+ (log_get_max_level() >= LOG_PRI(_level)) \
+ ? json_log_internal(variant, _level, _e, __FILE__, __LINE__, __func__, __VA_ARGS__) \
+ : -abs(_e); \
+ })
+
+#define JSON_VARIANT_STRING_CONST(x) _JSON_VARIANT_STRING_CONST(UNIQ, (x))
+
+#define _JSON_VARIANT_STRING_CONST(xq, x) \
+ ({ \
+ _align_(2) static const char UNIQ_T(json_string_const, xq)[] = (x); \
+ assert((((uintptr_t) UNIQ_T(json_string_const, xq)) & 1) == 0); \
+ (JsonVariant*) ((uintptr_t) UNIQ_T(json_string_const, xq) + 1); \
+ })
+
+const char *json_variant_type_to_string(JsonVariantType t);
+JsonVariantType json_variant_type_from_string(const char *s);
diff --git a/src/shared/libshared.sym b/src/shared/libshared.sym
new file mode 100644
index 0000000..6a7495a
--- /dev/null
+++ b/src/shared/libshared.sym
@@ -0,0 +1,3 @@
+SD_SHARED {
+ global: *;
+};
diff --git a/src/shared/linux-3.13/dm-ioctl.h b/src/shared/linux-3.13/dm-ioctl.h
new file mode 100644
index 0000000..c8a4302
--- /dev/null
+++ b/src/shared/linux-3.13/dm-ioctl.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
+ * Copyright (C) 2004 - 2009 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DM_IOCTL_V4_H
+#define _LINUX_DM_IOCTL_V4_H
+
+#include <linux/types.h>
+
+#define DM_DIR "mapper" /* Slashes not supported */
+#define DM_CONTROL_NODE "control"
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
+#define DM_UUID_LEN 129
+
+/*
+ * A traditional ioctl interface for the device mapper.
+ *
+ * Each device can have two tables associated with it, an
+ * 'active' table which is the one currently used by io passing
+ * through the device, and an 'inactive' one which is a table
+ * that is being prepared as a replacement for the 'active' one.
+ *
+ * DM_VERSION:
+ * Just get the version information for the ioctl interface.
+ *
+ * DM_REMOVE_ALL:
+ * Remove all dm devices, destroy all tables. Only really used
+ * for debug.
+ *
+ * DM_LIST_DEVICES:
+ * Get a list of all the dm device names.
+ *
+ * DM_DEV_CREATE:
+ * Create a new device, neither the 'active' or 'inactive' table
+ * slots will be filled. The device will be in suspended state
+ * after creation, however any io to the device will get errored
+ * since it will be out-of-bounds.
+ *
+ * DM_DEV_REMOVE:
+ * Remove a device, destroy any tables.
+ *
+ * DM_DEV_RENAME:
+ * Rename a device or set its uuid if none was previously supplied.
+ *
+ * DM_SUSPEND:
+ * This performs both suspend and resume, depending which flag is
+ * passed in.
+ * Suspend: This command will not return until all pending io to
+ * the device has completed. Further io will be deferred until
+ * the device is resumed.
+ * Resume: It is no longer an error to issue this command on an
+ * unsuspended device. If a table is present in the 'inactive'
+ * slot, it will be moved to the active slot, then the old table
+ * from the active slot will be _destroyed_. Finally the device
+ * is resumed.
+ *
+ * DM_DEV_STATUS:
+ * Retrieves the status for the table in the 'active' slot.
+ *
+ * DM_DEV_WAIT:
+ * Wait for a significant event to occur to the device. This
+ * could either be caused by an event triggered by one of the
+ * targets of the table in the 'active' slot, or a table change.
+ *
+ * DM_TABLE_LOAD:
+ * Load a table into the 'inactive' slot for the device. The
+ * device does _not_ need to be suspended prior to this command.
+ *
+ * DM_TABLE_CLEAR:
+ * Destroy any table in the 'inactive' slot (ie. abort).
+ *
+ * DM_TABLE_DEPS:
+ * Return a set of device dependencies for the 'active' table.
+ *
+ * DM_TABLE_STATUS:
+ * Return the targets status for the 'active' table.
+ *
+ * DM_TARGET_MSG:
+ * Pass a message string to the target at a specific offset of a device.
+ *
+ * DM_DEV_SET_GEOMETRY:
+ * Set the geometry of a device by passing in a string in this format:
+ *
+ * "cylinders heads sectors_per_track start_sector"
+ *
+ * Beware that CHS geometry is nearly obsolete and only provided
+ * for compatibility with dm devices that can be booted by a PC
+ * BIOS. See struct hd_geometry for range limits. Also note that
+ * the geometry is erased if the device size changes.
+ */
+
+/*
+ * All ioctl arguments consist of a single chunk of memory, with
+ * this structure at the start. If a uuid is specified any
+ * lookup (eg. for a DM_INFO) will be done on that, *not* the
+ * name.
+ */
+struct dm_ioctl {
+ /*
+ * The version number is made up of three parts:
+ * major - no backward or forward compatibility,
+ * minor - only backwards compatible,
+ * patch - both backwards and forwards compatible.
+ *
+ * All clients of the ioctl interface should fill in the
+ * version number of the interface that they were
+ * compiled with.
+ *
+ * All recognised ioctl commands (ie. those that don't
+ * return -ENOTTY) fill out this field, even if the
+ * command failed.
+ */
+ __u32 version[3]; /* in/out */
+ __u32 data_size; /* total size of data passed in
+ * including this struct */
+
+ __u32 data_start; /* offset to start of data
+ * relative to start of this struct */
+
+ __u32 target_count; /* in/out */
+ __s32 open_count; /* out */
+ __u32 flags; /* in/out */
+
+ /*
+ * event_nr holds either the event number (input and output) or the
+ * udev cookie value (input only).
+ * The DM_DEV_WAIT ioctl takes an event number as input.
+ * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
+ * use the field as a cookie to return in the DM_COOKIE
+ * variable with the uevents they issue.
+ * For output, the ioctls return the event number, not the cookie.
+ */
+ __u32 event_nr; /* in/out */
+ __u32 padding;
+
+ __u64 dev; /* in/out */
+
+ char name[DM_NAME_LEN]; /* device name */
+ char uuid[DM_UUID_LEN]; /* unique identifier for
+ * the block device */
+ char data[7]; /* padding or data */
+};
+
+/*
+ * Used to specify tables. These structures appear after the
+ * dm_ioctl.
+ */
+struct dm_target_spec {
+ __u64 sector_start;
+ __u64 length;
+ __s32 status; /* used when reading from kernel only */
+
+ /*
+ * Location of the next dm_target_spec.
+ * - When specifying targets on a DM_TABLE_LOAD command, this value is
+ * the number of bytes from the start of the "current" dm_target_spec
+ * to the start of the "next" dm_target_spec.
+ * - When retrieving targets on a DM_TABLE_STATUS command, this value
+ * is the number of bytes from the start of the first dm_target_spec
+ * (that follows the dm_ioctl struct) to the start of the "next"
+ * dm_target_spec.
+ */
+ __u32 next;
+
+ char target_type[DM_MAX_TYPE_NAME];
+
+ /*
+ * Parameter string starts immediately after this object.
+ * Be careful to add padding after string to ensure correct
+ * alignment of subsequent dm_target_spec.
+ */
+};
+
+/*
+ * Used to retrieve the target dependencies.
+ */
+struct dm_target_deps {
+ __u32 count; /* Array size */
+ __u32 padding; /* unused */
+ __u64 dev[0]; /* out */
+};
+
+/*
+ * Used to get a list of all dm devices.
+ */
+struct dm_name_list {
+ __u64 dev;
+ __u32 next; /* offset to the next record from
+ the _start_ of this */
+ char name[0];
+};
+
+/*
+ * Used to retrieve the target versions
+ */
+struct dm_target_versions {
+ __u32 next;
+ __u32 version[3];
+
+ char name[0];
+};
+
+/*
+ * Used to pass message to a target
+ */
+struct dm_target_msg {
+ __u64 sector; /* Device sector */
+
+ char message[0];
+};
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to dm-ioctl.c:lookup_ioctl()
+ */
+enum {
+ /* Top level cmds */
+ DM_VERSION_CMD = 0,
+ DM_REMOVE_ALL_CMD,
+ DM_LIST_DEVICES_CMD,
+
+ /* device level cmds */
+ DM_DEV_CREATE_CMD,
+ DM_DEV_REMOVE_CMD,
+ DM_DEV_RENAME_CMD,
+ DM_DEV_SUSPEND_CMD,
+ DM_DEV_STATUS_CMD,
+ DM_DEV_WAIT_CMD,
+
+ /* Table level cmds */
+ DM_TABLE_LOAD_CMD,
+ DM_TABLE_CLEAR_CMD,
+ DM_TABLE_DEPS_CMD,
+ DM_TABLE_STATUS_CMD,
+
+ /* Added later */
+ DM_LIST_VERSIONS_CMD,
+ DM_TARGET_MSG_CMD,
+ DM_DEV_SET_GEOMETRY_CMD
+};
+
+#define DM_IOCTL 0xfd
+
+#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
+#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
+
+#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
+#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
+#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
+#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
+#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
+#define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
+
+#define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
+#define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
+#define DM_TABLE_DEPS _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
+#define DM_TABLE_STATUS _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
+
+#define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl)
+
+#define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
+#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
+
+#define DM_VERSION_MAJOR 4
+#define DM_VERSION_MINOR 27
+#define DM_VERSION_PATCHLEVEL 0
+#define DM_VERSION_EXTRA "-ioctl (2013-10-30)"
+
+/* Status bits */
+#define DM_READONLY_FLAG (1 << 0) /* In/Out */
+#define DM_SUSPEND_FLAG (1 << 1) /* In/Out */
+#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */
+
+/*
+ * Flag passed into ioctl STATUS command to get table information
+ * rather than current status.
+ */
+#define DM_STATUS_TABLE_FLAG (1 << 4) /* In */
+
+/*
+ * Flags that indicate whether a table is present in either of
+ * the two table slots that a device has.
+ */
+#define DM_ACTIVE_PRESENT_FLAG (1 << 5) /* Out */
+#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
+
+/*
+ * Indicates that the buffer passed in wasn't big enough for the
+ * results.
+ */
+#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */
+
+/*
+ * This flag is now ignored.
+ */
+#define DM_SKIP_BDGET_FLAG (1 << 9) /* In */
+
+/*
+ * Set this to avoid attempting to freeze any filesystem when suspending.
+ */
+#define DM_SKIP_LOCKFS_FLAG (1 << 10) /* In */
+
+/*
+ * Set this to suspend without flushing queued ios.
+ * Also disables flushing uncommitted changes in the thin target before
+ * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT.
+ */
+#define DM_NOFLUSH_FLAG (1 << 11) /* In */
+
+/*
+ * If set, any table information returned will relate to the inactive
+ * table instead of the live one. Always check DM_INACTIVE_PRESENT_FLAG
+ * is set before using the data returned.
+ */
+#define DM_QUERY_INACTIVE_TABLE_FLAG (1 << 12) /* In */
+
+/*
+ * If set, a uevent was generated for which the caller may need to wait.
+ */
+#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */
+
+/*
+ * If set, rename changes the uuid not the name. Only permitted
+ * if no uuid was previously supplied: an existing uuid cannot be changed.
+ */
+#define DM_UUID_FLAG (1 << 14) /* In */
+
+/*
+ * If set, all buffers are wiped after use. Use when sending
+ * or requesting sensitive data such as an encryption key.
+ */
+#define DM_SECURE_DATA_FLAG (1 << 15) /* In */
+
+/*
+ * If set, a message generated output data.
+ */
+#define DM_DATA_OUT_FLAG (1 << 16) /* Out */
+
+/*
+ * If set with DM_DEV_REMOVE or DM_REMOVE_ALL this indicates that if
+ * the device cannot be removed immediately because it is still in use
+ * it should instead be scheduled for removal when it gets closed.
+ *
+ * On return from DM_DEV_REMOVE, DM_DEV_STATUS or other ioctls, this
+ * flag indicates that the device is scheduled to be removed when it
+ * gets closed.
+ */
+#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */
+
+#endif /* _LINUX_DM_IOCTL_H */
diff --git a/src/shared/linux/auto_dev-ioctl.h b/src/shared/linux/auto_dev-ioctl.h
new file mode 100644
index 0000000..d9838eb
--- /dev/null
+++ b/src/shared/linux/auto_dev-ioctl.h
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright © 2008 Red Hat, Inc. All rights reserved.
+ * Copyright © 2008 Ian Kent <raven@themaw.net>
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ */
+
+#ifndef _LINUX_AUTO_DEV_IOCTL_H
+#define _LINUX_AUTO_DEV_IOCTL_H
+
+#include <linux/auto_fs.h>
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+#else
+#include <string.h>
+#endif /* __KERNEL__ */
+
+#define AUTOFS_DEVICE_NAME "autofs"
+
+#define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1
+#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0
+
+#define AUTOFS_DEVID_LEN 16
+
+#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
+
+/*
+ * An ioctl interface for autofs mount point control.
+ */
+
+struct args_protover {
+ __u32 version;
+};
+
+struct args_protosubver {
+ __u32 sub_version;
+};
+
+struct args_openmount {
+ __u32 devid;
+};
+
+struct args_ready {
+ __u32 token;
+};
+
+struct args_fail {
+ __u32 token;
+ __s32 status;
+};
+
+struct args_setpipefd {
+ __s32 pipefd;
+};
+
+struct args_timeout {
+ __u64 timeout;
+};
+
+struct args_requester {
+ __u32 uid;
+ __u32 gid;
+};
+
+struct args_expire {
+ __u32 how;
+};
+
+struct args_askumount {
+ __u32 may_umount;
+};
+
+struct args_ismountpoint {
+ union {
+ struct args_in {
+ __u32 type;
+ } in;
+ struct args_out {
+ __u32 devid;
+ __u32 magic;
+ } out;
+ };
+};
+
+/*
+ * All the ioctls use this structure.
+ * When sending a path size must account for the total length
+ * of the chunk of memory otherwise is is the size of the
+ * structure.
+ */
+
+struct autofs_dev_ioctl {
+ __u32 ver_major;
+ __u32 ver_minor;
+ __u32 size; /* total size of data passed in
+ * including this struct */
+ __s32 ioctlfd; /* automount command fd */
+
+ /* Command parameters */
+
+ union {
+ struct args_protover protover;
+ struct args_protosubver protosubver;
+ struct args_openmount openmount;
+ struct args_ready ready;
+ struct args_fail fail;
+ struct args_setpipefd setpipefd;
+ struct args_timeout timeout;
+ struct args_requester requester;
+ struct args_expire expire;
+ struct args_askumount askumount;
+ struct args_ismountpoint ismountpoint;
+ };
+
+ char path[0];
+};
+
+static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) {
+ memset(in, 0, sizeof(struct autofs_dev_ioctl));
+ in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
+ in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
+ in->size = sizeof(struct autofs_dev_ioctl);
+ in->ioctlfd = -1;
+ return;
+}
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to autofs-dev-ioctl.c:lookup_ioctl()
+ */
+enum {
+ /* Get various version info */
+ AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
+ AUTOFS_DEV_IOCTL_PROTOVER_CMD,
+ AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD,
+
+ /* Open mount ioctl fd */
+ AUTOFS_DEV_IOCTL_OPENMOUNT_CMD,
+
+ /* Close mount ioctl fd */
+ AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD,
+
+ /* Mount/expire status returns */
+ AUTOFS_DEV_IOCTL_READY_CMD,
+ AUTOFS_DEV_IOCTL_FAIL_CMD,
+
+ /* Activate/deactivate autofs mount */
+ AUTOFS_DEV_IOCTL_SETPIPEFD_CMD,
+ AUTOFS_DEV_IOCTL_CATATONIC_CMD,
+
+ /* Expiry timeout */
+ AUTOFS_DEV_IOCTL_TIMEOUT_CMD,
+
+ /* Get mount last requesting uid and gid */
+ AUTOFS_DEV_IOCTL_REQUESTER_CMD,
+
+ /* Check for eligible expire candidates */
+ AUTOFS_DEV_IOCTL_EXPIRE_CMD,
+
+ /* Request busy status */
+ AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD,
+
+ /* Check if path is a mountpoint */
+ AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
+};
+
+#define AUTOFS_IOCTL 0x93
+
+#define AUTOFS_DEV_IOCTL_VERSION \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOVER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOSUBVER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_OPENMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_READY \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_FAIL \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_SETPIPEFD \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CATATONIC \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_TIMEOUT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_REQUESTER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_EXPIRE \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ASKUMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl)
+
+#endif /* _LINUX_AUTO_DEV_IOCTL_H */
diff --git a/src/shared/linux/bpf.h b/src/shared/linux/bpf.h
new file mode 100644
index 0000000..1df9e7e
--- /dev/null
+++ b/src/shared/linux/bpf.h
@@ -0,0 +1,1109 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64 0x07 /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW 0x18 /* double word (64-bit) */
+#define BPF_XADD 0xc0 /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV 0xb0 /* mov reg to reg */
+#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END 0xd0 /* flags for endianness conversion: */
+#define BPF_TO_LE 0x00 /* convert to little-endian */
+#define BPF_TO_BE 0x08 /* convert to big-endian */
+#define BPF_FROM_LE BPF_TO_LE
+#define BPF_FROM_BE BPF_TO_BE
+
+/* jmp encodings */
+#define BPF_JNE 0x50 /* jump != */
+#define BPF_JLT 0xa0 /* LT is unsigned, '<' */
+#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */
+#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
+#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
+#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_CALL 0x80 /* function call */
+#define BPF_EXIT 0x90 /* function return */
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG __MAX_BPF_REG
+
+struct bpf_insn {
+ __u8 code; /* opcode */
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+struct bpf_lpm_trie_key {
+ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
+ __u8 data[0]; /* Arbitrary size */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+ BPF_MAP_CREATE,
+ BPF_MAP_LOOKUP_ELEM,
+ BPF_MAP_UPDATE_ELEM,
+ BPF_MAP_DELETE_ELEM,
+ BPF_MAP_GET_NEXT_KEY,
+ BPF_PROG_LOAD,
+ BPF_OBJ_PIN,
+ BPF_OBJ_GET,
+ BPF_PROG_ATTACH,
+ BPF_PROG_DETACH,
+ BPF_PROG_TEST_RUN,
+ BPF_PROG_GET_NEXT_ID,
+ BPF_MAP_GET_NEXT_ID,
+ BPF_PROG_GET_FD_BY_ID,
+ BPF_MAP_GET_FD_BY_ID,
+ BPF_OBJ_GET_INFO_BY_FD,
+ BPF_PROG_QUERY,
+};
+
+enum bpf_map_type {
+ BPF_MAP_TYPE_UNSPEC,
+ BPF_MAP_TYPE_HASH,
+ BPF_MAP_TYPE_ARRAY,
+ BPF_MAP_TYPE_PROG_ARRAY,
+ BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ BPF_MAP_TYPE_PERCPU_HASH,
+ BPF_MAP_TYPE_PERCPU_ARRAY,
+ BPF_MAP_TYPE_STACK_TRACE,
+ BPF_MAP_TYPE_CGROUP_ARRAY,
+ BPF_MAP_TYPE_LRU_HASH,
+ BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ BPF_MAP_TYPE_LPM_TRIE,
+ BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ BPF_MAP_TYPE_HASH_OF_MAPS,
+ BPF_MAP_TYPE_DEVMAP,
+ BPF_MAP_TYPE_SOCKMAP,
+ BPF_MAP_TYPE_CPUMAP,
+};
+
+enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+ BPF_PROG_TYPE_TRACEPOINT,
+ BPF_PROG_TYPE_XDP,
+ BPF_PROG_TYPE_PERF_EVENT,
+ BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_PROG_TYPE_LWT_IN,
+ BPF_PROG_TYPE_LWT_OUT,
+ BPF_PROG_TYPE_LWT_XMIT,
+ BPF_PROG_TYPE_SOCK_OPS,
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_CGROUP_DEVICE,
+};
+
+enum bpf_attach_type {
+ BPF_CGROUP_INET_INGRESS,
+ BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_DEVICE,
+ __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
+ *
+ * NONE(default): No further bpf programs allowed in the subtree.
+ *
+ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
+ * the program in this cgroup yields to sub-cgroup program.
+ *
+ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
+ * that cgroup program gets run in addition to the program in this cgroup.
+ *
+ * Only one program is allowed to be attached to a cgroup with
+ * NONE or BPF_F_ALLOW_OVERRIDE flag.
+ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
+ * release old program and attach the new one. Attach flags has to match.
+ *
+ * Multiple programs are allowed to be attached to a cgroup with
+ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
+ * (those that were attached first, run first)
+ * The programs of sub-cgroup are executed first, then programs of
+ * this cgroup and then programs of parent cgroup.
+ * When children program makes decision (like picking TCP CA or sock bind)
+ * parent program has a chance to override it.
+ *
+ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
+ * A cgroup with NONE doesn't allow any programs in sub-cgroups.
+ * Ex1:
+ * cgrp1 (MULTI progs A, B) ->
+ * cgrp2 (OVERRIDE prog C) ->
+ * cgrp3 (MULTI prog D) ->
+ * cgrp4 (OVERRIDE prog E) ->
+ * cgrp5 (NONE prog F)
+ * the event in cgrp5 triggers execution of F,D,A,B in that order.
+ * if prog F is detached, the execution is E,D,A,B
+ * if prog F and D are detached, the execution is E,A,B
+ * if prog F, E and D are detached, the execution is C,A,B
+ *
+ * All eligible programs are executed regardless of return code from
+ * earlier programs.
+ */
+#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+#define BPF_F_ALLOW_MULTI (1U << 1)
+
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+
+/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+#define BPF_PSEUDO_MAP_FD 1
+
+/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+ * offset to another bpf function
+ */
+#define BPF_PSEUDO_CALL 1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY 0 /* create new element or update existing */
+#define BPF_NOEXIST 1 /* create new element if it didn't exist */
+#define BPF_EXIST 2 /* update existing element */
+
+/* flags for BPF_MAP_CREATE command */
+#define BPF_F_NO_PREALLOC (1U << 0)
+/* Instead of having one common LRU list in the
+ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+ * which can scale and perform better.
+ * Note, the LRU nodes (including free nodes) cannot be moved
+ * across different LRU lists.
+ */
+#define BPF_F_NO_COMMON_LRU (1U << 1)
+/* Specify numa node during map creation */
+#define BPF_F_NUMA_NODE (1U << 2)
+
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+
+#define BPF_OBJ_NAME_LEN 16U
+
+/* Flags for accessing BPF object */
+#define BPF_F_RDONLY (1U << 3)
+#define BPF_F_WRONLY (1U << 4)
+
+union bpf_attr {
+ struct { /* anonymous struct used by BPF_MAP_CREATE command */
+ __u32 map_type; /* one of enum bpf_map_type */
+ __u32 key_size; /* size of key in bytes */
+ __u32 value_size; /* size of value in bytes */
+ __u32 max_entries; /* max number of entries in a map */
+ __u32 map_flags; /* BPF_MAP_CREATE related
+ * flags defined above.
+ */
+ __u32 inner_map_fd; /* fd pointing to the inner map */
+ __u32 numa_node; /* numa node (effective only if
+ * BPF_F_NUMA_NODE is set).
+ */
+ char map_name[BPF_OBJ_NAME_LEN];
+ __u32 map_ifindex; /* ifindex of netdev to create on */
+ };
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ __u64 flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_LOAD command */
+ __u32 prog_type; /* one of enum bpf_prog_type */
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ __aligned_u64 license;
+ __u32 log_level; /* verbosity level of verifier */
+ __u32 log_size; /* size of user buffer */
+ __aligned_u64 log_buf; /* user supplied buffer */
+ __u32 kern_version; /* checked when prog_type=kprobe */
+ __u32 prog_flags;
+ char prog_name[BPF_OBJ_NAME_LEN];
+ __u32 prog_ifindex; /* ifindex of netdev to prep for */
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_* commands */
+ __aligned_u64 pathname;
+ __u32 bpf_fd;
+ __u32 file_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+ __u32 target_fd; /* container object to attach to */
+ __u32 attach_bpf_fd; /* eBPF program to attach */
+ __u32 attach_type;
+ __u32 attach_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+ __u32 prog_fd;
+ __u32 retval;
+ __u32 data_size_in;
+ __u32 data_size_out;
+ __aligned_u64 data_in;
+ __aligned_u64 data_out;
+ __u32 repeat;
+ __u32 duration;
+ } test;
+
+ struct { /* anonymous struct used by BPF_*_GET_*_ID */
+ union {
+ __u32 start_id;
+ __u32 prog_id;
+ __u32 map_id;
+ };
+ __u32 next_id;
+ __u32 open_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
+ __u32 bpf_fd;
+ __u32 info_len;
+ __aligned_u64 info;
+ } info;
+
+ struct { /* anonymous struct used by BPF_PROG_QUERY command */
+ __u32 target_fd; /* container object to query */
+ __u32 attach_type;
+ __u32 query_flags;
+ __u32 attach_flags;
+ __aligned_u64 prog_ids;
+ __u32 prog_cnt;
+ } query;
+} __attribute__((aligned(8)));
+
+/* BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(&map, &key)
+ * Return: Map value or NULL
+ *
+ * int bpf_map_update_elem(&map, &key, &value, flags)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_map_delete_elem(&map, &key)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_probe_read(void *dst, int size, void *src)
+ * Return: 0 on success or negative error
+ *
+ * u64 bpf_ktime_get_ns(void)
+ * Return: current ktime
+ *
+ * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
+ * Return: length of buffer written or negative error
+ *
+ * u32 bpf_prandom_u32(void)
+ * Return: random value
+ *
+ * u32 bpf_raw_smp_processor_id(void)
+ * Return: SMP processor ID
+ *
+ * int bpf_skb_store_bytes(skb, offset, from, len, flags)
+ * store bytes into packet
+ * @skb: pointer to skb
+ * @offset: offset within packet from skb->mac_header
+ * @from: pointer where to copy bytes from
+ * @len: number of bytes to store into packet
+ * @flags: bit 0 - if true, recompute skb->csum
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_l3_csum_replace(skb, offset, from, to, flags)
+ * recompute IP checksum
+ * @skb: pointer to skb
+ * @offset: offset within packet where IP checksum is located
+ * @from: old value of header field
+ * @to: new value of header field
+ * @flags: bits 0-3 - size of header field
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_l4_csum_replace(skb, offset, from, to, flags)
+ * recompute TCP/UDP checksum
+ * @skb: pointer to skb
+ * @offset: offset within packet where TCP/UDP checksum is located
+ * @from: old value of header field
+ * @to: new value of header field
+ * @flags: bits 0-3 - size of header field
+ * bit 4 - is pseudo header
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_tail_call(ctx, prog_array_map, index)
+ * jump into another BPF program
+ * @ctx: context pointer passed to next program
+ * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
+ * @index: 32-bit index inside array that selects specific program to run
+ * Return: 0 on success or negative error
+ *
+ * int bpf_clone_redirect(skb, ifindex, flags)
+ * redirect to another netdev
+ * @skb: pointer to skb
+ * @ifindex: ifindex of the net device
+ * @flags: bit 0 - if set, redirect to ingress instead of egress
+ * other bits - reserved
+ * Return: 0 on success or negative error
+ *
+ * u64 bpf_get_current_pid_tgid(void)
+ * Return: current->tgid << 32 | current->pid
+ *
+ * u64 bpf_get_current_uid_gid(void)
+ * Return: current_gid << 32 | current_uid
+ *
+ * int bpf_get_current_comm(char *buf, int size_of_buf)
+ * stores current->comm into buf
+ * Return: 0 on success or negative error
+ *
+ * u32 bpf_get_cgroup_classid(skb)
+ * retrieve a proc's classid
+ * @skb: pointer to skb
+ * Return: classid if != 0
+ *
+ * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_vlan_pop(skb)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_get_tunnel_key(skb, key, size, flags)
+ * int bpf_skb_set_tunnel_key(skb, key, size, flags)
+ * retrieve or populate tunnel metadata
+ * @skb: pointer to skb
+ * @key: pointer to 'struct bpf_tunnel_key'
+ * @size: size of 'struct bpf_tunnel_key'
+ * @flags: room for future extensions
+ * Return: 0 on success or negative error
+ *
+ * u64 bpf_perf_event_read(map, flags)
+ * read perf event counter value
+ * @map: pointer to perf_event_array map
+ * @flags: index of event in the map or bitmask flags
+ * Return: value of perf event counter read or error code
+ *
+ * int bpf_redirect(ifindex, flags)
+ * redirect to another netdev
+ * @ifindex: ifindex of the net device
+ * @flags:
+ * cls_bpf:
+ * bit 0 - if set, redirect to ingress instead of egress
+ * other bits - reserved
+ * xdp_bpf:
+ * all bits - reserved
+ * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
+ * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
+ * int bpf_redirect_map(map, key, flags)
+ * redirect to endpoint in map
+ * @map: pointer to dev map
+ * @key: index in map to lookup
+ * @flags: --
+ * Return: XDP_REDIRECT on success or XDP_ABORT on error
+ *
+ * u32 bpf_get_route_realm(skb)
+ * retrieve a dst's tclassid
+ * @skb: pointer to skb
+ * Return: realm if != 0
+ *
+ * int bpf_perf_event_output(ctx, map, flags, data, size)
+ * output perf raw sample
+ * @ctx: struct pt_regs*
+ * @map: pointer to perf_event_array map
+ * @flags: index of event in the map or bitmask flags
+ * @data: data on stack to be output as raw data
+ * @size: size of data
+ * Return: 0 on success or negative error
+ *
+ * int bpf_get_stackid(ctx, map, flags)
+ * walk user or kernel stack and return id
+ * @ctx: struct pt_regs*
+ * @map: pointer to stack_trace map
+ * @flags: bits 0-7 - numer of stack frames to skip
+ * bit 8 - collect user stack instead of kernel
+ * bit 9 - compare stacks by hash only
+ * bit 10 - if two different stacks hash into the same stackid
+ * discard old
+ * other bits - reserved
+ * Return: >= 0 stackid on success or negative error
+ *
+ * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
+ * calculate csum diff
+ * @from: raw from buffer
+ * @from_size: length of from buffer
+ * @to: raw to buffer
+ * @to_size: length of to buffer
+ * @seed: optional seed
+ * Return: csum result or negative error code
+ *
+ * int bpf_skb_get_tunnel_opt(skb, opt, size)
+ * retrieve tunnel options metadata
+ * @skb: pointer to skb
+ * @opt: pointer to raw tunnel option data
+ * @size: size of @opt
+ * Return: option size
+ *
+ * int bpf_skb_set_tunnel_opt(skb, opt, size)
+ * populate tunnel options metadata
+ * @skb: pointer to skb
+ * @opt: pointer to raw tunnel option data
+ * @size: size of @opt
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_proto(skb, proto, flags)
+ * Change protocol of the skb. Currently supported is v4 -> v6,
+ * v6 -> v4 transitions. The helper will also resize the skb. eBPF
+ * program is expected to fill the new headers via skb_store_bytes
+ * and lX_csum_replace.
+ * @skb: pointer to skb
+ * @proto: new skb->protocol type
+ * @flags: reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_type(skb, type)
+ * Change packet type of skb.
+ * @skb: pointer to skb
+ * @type: new skb->pkt_type type
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_under_cgroup(skb, map, index)
+ * Check cgroup2 membership of skb
+ * @skb: pointer to skb
+ * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ * @index: index of the cgroup in the bpf_map
+ * Return:
+ * == 0 skb failed the cgroup2 descendant test
+ * == 1 skb succeeded the cgroup2 descendant test
+ * < 0 error
+ *
+ * u32 bpf_get_hash_recalc(skb)
+ * Retrieve and possibly recalculate skb->hash.
+ * @skb: pointer to skb
+ * Return: hash
+ *
+ * u64 bpf_get_current_task(void)
+ * Returns current task_struct
+ * Return: current
+ *
+ * int bpf_probe_write_user(void *dst, void *src, int len)
+ * safely attempt to write to a location
+ * @dst: destination address in userspace
+ * @src: source address on stack
+ * @len: number of bytes to copy
+ * Return: 0 on success or negative error
+ *
+ * int bpf_current_task_under_cgroup(map, index)
+ * Check cgroup2 membership of current task
+ * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ * @index: index of the cgroup in the bpf_map
+ * Return:
+ * == 0 current failed the cgroup2 descendant test
+ * == 1 current succeeded the cgroup2 descendant test
+ * < 0 error
+ *
+ * int bpf_skb_change_tail(skb, len, flags)
+ * The helper will resize the skb to the given new size, to be used f.e.
+ * with control messages.
+ * @skb: pointer to skb
+ * @len: new skb length
+ * @flags: reserved
+ * Return: 0 on success or negative error
+ *
+ * int bpf_skb_pull_data(skb, len)
+ * The helper will pull in non-linear data in case the skb is non-linear
+ * and not all of len are part of the linear section. Only needed for
+ * read/write with direct packet access.
+ * @skb: pointer to skb
+ * @len: len to make read/writeable
+ * Return: 0 on success or negative error
+ *
+ * s64 bpf_csum_update(skb, csum)
+ * Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
+ * @skb: pointer to skb
+ * @csum: csum to add
+ * Return: csum on success or negative error
+ *
+ * void bpf_set_hash_invalid(skb)
+ * Invalidate current skb->hash.
+ * @skb: pointer to skb
+ *
+ * int bpf_get_numa_node_id()
+ * Return: Id of current NUMA node.
+ *
+ * int bpf_skb_change_head()
+ * Grows headroom of skb and adjusts MAC header offset accordingly.
+ * Will extends/reallocae as required automatically.
+ * May change skb data pointer and will thus invalidate any check
+ * performed for direct packet access.
+ * @skb: pointer to skb
+ * @len: length of header to be pushed in front
+ * @flags: Flags (unused for now)
+ * Return: 0 on success or negative error
+ *
+ * int bpf_xdp_adjust_head(xdp_md, delta)
+ * Adjust the xdp_md.data by delta
+ * @xdp_md: pointer to xdp_md
+ * @delta: An positive/negative integer to be added to xdp_md.data
+ * Return: 0 on success or negative on error
+ *
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * Copy a NUL terminated string from unsafe address. In case the string
+ * length is smaller than size, the target is not padded with further NUL
+ * bytes. In case the string length is larger than size, just count-1
+ * bytes are copied and the last byte is set to NUL.
+ * @dst: destination address
+ * @size: maximum number of bytes to copy, including the trailing NUL
+ * @unsafe_ptr: unsafe address
+ * Return:
+ * > 0 length of the string including the trailing NUL on success
+ * < 0 error
+ *
+ * u64 bpf_get_socket_cookie(skb)
+ * Get the cookie for the socket stored inside sk_buff.
+ * @skb: pointer to skb
+ * Return: 8 Bytes non-decreasing number on success or 0 if the socket
+ * field is missing inside sk_buff
+ *
+ * u32 bpf_get_socket_uid(skb)
+ * Get the owner uid of the socket stored inside sk_buff.
+ * @skb: pointer to skb
+ * Return: uid of the socket owner on success or overflowuid if failed.
+ *
+ * u32 bpf_set_hash(skb, hash)
+ * Set full skb->hash.
+ * @skb: pointer to skb
+ * @hash: hash to set
+ *
+ * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
+ * Calls setsockopt. Not all opts are available, only those with
+ * integer optvals plus TCP_CONGESTION.
+ * Supported levels: SOL_SOCKET and IPPROTO_TCP
+ * @bpf_socket: pointer to bpf_socket
+ * @level: SOL_SOCKET or IPPROTO_TCP
+ * @optname: option name
+ * @optval: pointer to option value
+ * @optlen: length of optval in bytes
+ * Return: 0 or negative error
+ *
+ * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
+ * Calls getsockopt. Not all opts are available.
+ * Supported levels: IPPROTO_TCP
+ * @bpf_socket: pointer to bpf_socket
+ * @level: IPPROTO_TCP
+ * @optname: option name
+ * @optval: pointer to option value
+ * @optlen: length of optval in bytes
+ * Return: 0 or negative error
+ *
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ * Set callback flags for sock_ops
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ * @flags: flags value
+ * Return: 0 for no error
+ * -EINVAL if there is no full tcp socket
+ * bits in flags that are not supported by current kernel
+ *
+ * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
+ * Grow or shrink room in sk_buff.
+ * @skb: pointer to skb
+ * @len_diff: (signed) amount of room to grow/shrink
+ * @mode: operation mode (enum bpf_adj_room_mode)
+ * @flags: reserved for future use
+ * Return: 0 on success or negative error code
+ *
+ * int bpf_sk_redirect_map(map, key, flags)
+ * Redirect skb to a sock in map using key as a lookup key for the
+ * sock in map.
+ * @map: pointer to sockmap
+ * @key: key to lookup sock in map
+ * @flags: reserved for future use
+ * Return: SK_PASS
+ *
+ * int bpf_sock_map_update(skops, map, key, flags)
+ * @skops: pointer to bpf_sock_ops
+ * @map: pointer to sockmap to update
+ * @key: key to insert/update sock in map
+ * @flags: same flags as map update elem
+ *
+ * int bpf_xdp_adjust_meta(xdp_md, delta)
+ * Adjust the xdp_md.data_meta by delta
+ * @xdp_md: pointer to xdp_md
+ * @delta: An positive/negative integer to be added to xdp_md.data_meta
+ * Return: 0 on success or negative on error
+ *
+ * int bpf_perf_event_read_value(map, flags, buf, buf_size)
+ * read perf event counter value and perf event enabled/running time
+ * @map: pointer to perf_event_array map
+ * @flags: index of event in the map or bitmask flags
+ * @buf: buf to fill
+ * @buf_size: size of the buf
+ * Return: 0 on success or negative error code
+ *
+ * int bpf_perf_prog_read_value(ctx, buf, buf_size)
+ * read perf prog attached perf event counter and enabled/running time
+ * @ctx: pointer to ctx
+ * @buf: buf to fill
+ * @buf_size: size of the buf
+ * Return : 0 on success or negative error code
+ *
+ * int bpf_override_return(pt_regs, rc)
+ * @pt_regs: pointer to struct pt_regs
+ * @rc: the return value to set
+ */
+#define __BPF_FUNC_MAPPER(FN) \
+ FN(unspec), \
+ FN(map_lookup_elem), \
+ FN(map_update_elem), \
+ FN(map_delete_elem), \
+ FN(probe_read), \
+ FN(ktime_get_ns), \
+ FN(trace_printk), \
+ FN(get_prandom_u32), \
+ FN(get_smp_processor_id), \
+ FN(skb_store_bytes), \
+ FN(l3_csum_replace), \
+ FN(l4_csum_replace), \
+ FN(tail_call), \
+ FN(clone_redirect), \
+ FN(get_current_pid_tgid), \
+ FN(get_current_uid_gid), \
+ FN(get_current_comm), \
+ FN(get_cgroup_classid), \
+ FN(skb_vlan_push), \
+ FN(skb_vlan_pop), \
+ FN(skb_get_tunnel_key), \
+ FN(skb_set_tunnel_key), \
+ FN(perf_event_read), \
+ FN(redirect), \
+ FN(get_route_realm), \
+ FN(perf_event_output), \
+ FN(skb_load_bytes), \
+ FN(get_stackid), \
+ FN(csum_diff), \
+ FN(skb_get_tunnel_opt), \
+ FN(skb_set_tunnel_opt), \
+ FN(skb_change_proto), \
+ FN(skb_change_type), \
+ FN(skb_under_cgroup), \
+ FN(get_hash_recalc), \
+ FN(get_current_task), \
+ FN(probe_write_user), \
+ FN(current_task_under_cgroup), \
+ FN(skb_change_tail), \
+ FN(skb_pull_data), \
+ FN(csum_update), \
+ FN(set_hash_invalid), \
+ FN(get_numa_node_id), \
+ FN(skb_change_head), \
+ FN(xdp_adjust_head), \
+ FN(probe_read_str), \
+ FN(get_socket_cookie), \
+ FN(get_socket_uid), \
+ FN(set_hash), \
+ FN(setsockopt), \
+ FN(skb_adjust_room), \
+ FN(redirect_map), \
+ FN(sk_redirect_map), \
+ FN(sock_map_update), \
+ FN(xdp_adjust_meta), \
+ FN(perf_event_read_value), \
+ FN(perf_prog_read_value), \
+ FN(getsockopt), \
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set),
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+enum bpf_func_id {
+ __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+ __BPF_FUNC_MAX_ID,
+};
+#undef __BPF_ENUM_FN
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
+#define BPF_F_INVALIDATE_HASH (1ULL << 1)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK 0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR (1ULL << 4)
+#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
+#define BPF_F_MARK_ENFORCE (1ULL << 6)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS (1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
+
+/* BPF_FUNC_get_stackid flags. */
+#define BPF_F_SKIP_FIELD_MASK 0xffULL
+#define BPF_F_USER_STACK (1ULL << 8)
+#define BPF_F_FAST_STACK_CMP (1ULL << 9)
+#define BPF_F_REUSE_STACKID (1ULL << 10)
+
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
+#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+ * BPF_FUNC_perf_event_read_value flags.
+ */
+#define BPF_F_INDEX_MASK 0xffffffffULL
+#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+ BPF_ADJ_ROOM_NET,
+};
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+ __u32 vlan_proto;
+ __u32 priority;
+ __u32 ingress_ifindex;
+ __u32 ifindex;
+ __u32 tc_index;
+ __u32 cb[5];
+ __u32 hash;
+ __u32 tc_classid;
+ __u32 data;
+ __u32 data_end;
+ __u32 napi_id;
+
+ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ /* ... here. */
+
+ __u32 data_meta;
+};
+
+struct bpf_tunnel_key {
+ __u32 tunnel_id;
+ union {
+ __u32 remote_ipv4;
+ __u32 remote_ipv6[4];
+ };
+ __u8 tunnel_tos;
+ __u8 tunnel_ttl;
+ __u16 tunnel_ext;
+ __u32 tunnel_label;
+};
+
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+ BPF_OK = 0,
+ /* 1 reserved */
+ BPF_DROP = 2,
+ /* 3-6 reserved */
+ BPF_REDIRECT = 7,
+ /* >127 are reserved for prog type specific return codes */
+};
+
+struct bpf_sock {
+ __u32 bound_dev_if;
+ __u32 family;
+ __u32 type;
+ __u32 protocol;
+ __u32 mark;
+ __u32 priority;
+};
+
+#define XDP_PACKET_HEADROOM 256
+
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
+ */
+enum xdp_action {
+ XDP_ABORTED = 0,
+ XDP_DROP,
+ XDP_PASS,
+ XDP_TX,
+ XDP_REDIRECT,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+ __u32 data;
+ __u32 data_end;
+ __u32 data_meta;
+ /* Below access go through struct xdp_rxq_info */
+ __u32 ingress_ifindex; /* rxq->dev->ifindex */
+ __u32 rx_queue_index; /* rxq->queue_index */
+};
+
+enum sk_action {
+ SK_DROP = 0,
+ SK_PASS,
+};
+
+#define BPF_TAG_SIZE 8
+
+struct bpf_prog_info {
+ __u32 type;
+ __u32 id;
+ __u8 tag[BPF_TAG_SIZE];
+ __u32 jited_prog_len;
+ __u32 xlated_prog_len;
+ __aligned_u64 jited_prog_insns;
+ __aligned_u64 xlated_prog_insns;
+ __u64 load_time; /* ns since boottime */
+ __u32 created_by_uid;
+ __u32 nr_map_ids;
+ __aligned_u64 map_ids;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u64 netns_dev;
+ __u64 netns_ino;
+} __attribute__((aligned(8)));
+
+struct bpf_map_info {
+ __u32 type;
+ __u32 id;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u64 netns_dev;
+ __u64 netns_ino;
+} __attribute__((aligned(8)));
+
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+ __u32 op;
+ union {
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
+ };
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ __u32 is_fullsock; /* Some TCP fields are only valid if
+ * there is a full socket. If not, the
+ * fields read as zero.
+ */
+ __u32 snd_cwnd;
+ __u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
+};
+
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+ BPF_SOCK_OPS_VOID,
+ BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or
+ * -1 if default value should be used
+ */
+ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized
+ * window (in packets) or -1 if default
+ * value should be used
+ */
+ BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an
+ * active connection is initialized
+ */
+ BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an
+ * active connection is
+ * established
+ */
+ BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a
+ * passive connection is
+ * established
+ */
+ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
+ * needs ECN
+ */
+ BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is
+ * based on the path and may be
+ * dependent on the congestion control
+ * algorithm. In general it indicates
+ * a congestion threshold. RTTs above
+ * this indicate congestion
+ */
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
+};
+
+#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+
+struct bpf_perf_event_value {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+};
+
+#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
+#define BPF_DEVCG_ACC_READ (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+ /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+ __u32 access_type;
+ __u32 major;
+ __u32 minor;
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/src/shared/linux/bpf_common.h b/src/shared/linux/bpf_common.h
new file mode 100644
index 0000000..afe7433
--- /dev/null
+++ b/src/shared/linux/bpf_common.h
@@ -0,0 +1,55 @@
+#ifndef __LINUX_BPF_COMMON_H__
+#define __LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00
+#define BPF_H 0x08
+#define BPF_B 0x10
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_MOD 0x90
+#define BPF_XOR 0xa0
+
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* __LINUX_BPF_COMMON_H__ */
diff --git a/src/shared/linux/libbpf.h b/src/shared/linux/libbpf.h
new file mode 100644
index 0000000..391eee5
--- /dev/null
+++ b/src/shared/linux/libbpf.h
@@ -0,0 +1,207 @@
+/* eBPF mini library */
+#ifndef __LIBBPF_H
+#define __LIBBPF_H
+
+#include <linux/bpf.h>
+
+struct bpf_insn;
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM) \
+ BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = (__u32) (IMM) }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD 1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Unconditional jumps */
+
+#define BPF_JMP_A(OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_JA, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = CODE, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+#endif
diff --git a/src/shared/lockfile-util.c b/src/shared/lockfile-util.c
new file mode 100644
index 0000000..260c208
--- /dev/null
+++ b/src/shared/lockfile-util.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "lockfile-util.h"
+#include "macro.h"
+#include "missing_fcntl.h"
+#include "path-util.h"
+
+int make_lock_file(const char *p, int operation, LockFile *ret) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ /*
+ * We use UNPOSIX locks if they are available. They have nice
+ * semantics, and are mostly compatible with NFS. However,
+ * they are only available on new kernels. When we detect we
+ * are running on an older kernel, then we fall back to good
+ * old BSD locks. They also have nice semantics, but are
+ * slightly problematic on NFS, where they are upgraded to
+ * POSIX locks, even though locally they are orthogonal to
+ * POSIX locks.
+ */
+
+ t = strdup(p);
+ if (!t)
+ return -ENOMEM;
+
+ for (;;) {
+ struct flock fl = {
+ .l_type = (operation & ~LOCK_NB) == LOCK_EX ? F_WRLCK : F_RDLCK,
+ .l_whence = SEEK_SET,
+ };
+ struct stat st;
+
+ fd = open(p, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600);
+ if (fd < 0)
+ return -errno;
+
+ r = fcntl(fd, (operation & LOCK_NB) ? F_OFD_SETLK : F_OFD_SETLKW, &fl);
+ if (r < 0) {
+
+ /* If the kernel is too old, use good old BSD locks */
+ if (errno == EINVAL)
+ r = flock(fd, operation);
+
+ if (r < 0)
+ return errno == EAGAIN ? -EBUSY : -errno;
+ }
+
+ /* If we acquired the lock, let's check if the file
+ * still exists in the file system. If not, then the
+ * previous exclusive owner removed it and then closed
+ * it. In such a case our acquired lock is worthless,
+ * hence try again. */
+
+ r = fstat(fd, &st);
+ if (r < 0)
+ return -errno;
+ if (st.st_nlink > 0)
+ break;
+
+ fd = safe_close(fd);
+ }
+
+ ret->path = t;
+ ret->fd = fd;
+ ret->operation = operation;
+
+ fd = -1;
+ t = NULL;
+
+ return r;
+}
+
+int make_lock_file_for(const char *p, int operation, LockFile *ret) {
+ const char *fn;
+ char *t;
+
+ assert(p);
+ assert(ret);
+
+ fn = basename(p);
+ if (!filename_is_valid(fn))
+ return -EINVAL;
+
+ t = newa(char, strlen(p) + 2 + 4 + 1);
+ stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), fn), ".lck");
+
+ return make_lock_file(t, operation, ret);
+}
+
+void release_lock_file(LockFile *f) {
+ int r;
+
+ if (!f)
+ return;
+
+ if (f->path) {
+
+ /* If we are the exclusive owner we can safely delete
+ * the lock file itself. If we are not the exclusive
+ * owner, we can try becoming it. */
+
+ if (f->fd >= 0 &&
+ (f->operation & ~LOCK_NB) == LOCK_SH) {
+ static const struct flock fl = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ };
+
+ r = fcntl(f->fd, F_OFD_SETLK, &fl);
+ if (r < 0 && errno == EINVAL)
+ r = flock(f->fd, LOCK_EX|LOCK_NB);
+
+ if (r >= 0)
+ f->operation = LOCK_EX|LOCK_NB;
+ }
+
+ if ((f->operation & ~LOCK_NB) == LOCK_EX)
+ unlink_noerrno(f->path);
+
+ f->path = mfree(f->path);
+ }
+
+ f->fd = safe_close(f->fd);
+ f->operation = 0;
+}
diff --git a/src/shared/lockfile-util.h b/src/shared/lockfile-util.h
new file mode 100644
index 0000000..e0eef34
--- /dev/null
+++ b/src/shared/lockfile-util.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef struct LockFile {
+ char *path;
+ int fd;
+ int operation;
+} LockFile;
+
+int make_lock_file(const char *p, int operation, LockFile *ret);
+int make_lock_file_for(const char *p, int operation, LockFile *ret);
+void release_lock_file(LockFile *f);
+
+#define LOCK_FILE_INIT { .fd = -1, .path = NULL }
diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c
new file mode 100644
index 0000000..15ef0f1
--- /dev/null
+++ b/src/shared/logs-show.c
@@ -0,0 +1,1462 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <syslog.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "io-util.h"
+#include "journal-internal.h"
+#include "json.h"
+#include "log.h"
+#include "logs-show.h"
+#include "macro.h"
+#include "output-mode.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "sparse-endian.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+
+/* up to three lines (each up to 100 characters) or 300 characters, whichever is less */
+#define PRINT_LINE_THRESHOLD 3
+#define PRINT_CHAR_THRESHOLD 300
+
+#define JSON_THRESHOLD 4096U
+
+static int print_catalog(FILE *f, sd_journal *j) {
+ int r;
+ _cleanup_free_ char *t = NULL, *z = NULL;
+
+ r = sd_journal_get_catalog(j, &t);
+ if (r < 0)
+ return r;
+
+ z = strreplace(strstrip(t), "\n", "\n-- ");
+ if (!z)
+ return log_oom();
+
+ fputs("-- ", f);
+ fputs(z, f);
+ fputc('\n', f);
+
+ return 0;
+}
+
+static int parse_field(const void *data, size_t length, const char *field, size_t field_len, char **target, size_t *target_len) {
+ size_t nl;
+ char *buf;
+
+ assert(data);
+ assert(field);
+ assert(target);
+
+ if (length < field_len)
+ return 0;
+
+ if (memcmp(data, field, field_len))
+ return 0;
+
+ nl = length - field_len;
+
+ buf = newdup_suffix0(char, (const char*) data + field_len, nl);
+ if (!buf)
+ return log_oom();
+
+ free(*target);
+ *target = buf;
+
+ if (target_len)
+ *target_len = nl;
+
+ return 1;
+}
+
+typedef struct ParseFieldVec {
+ const char *field;
+ size_t field_len;
+ char **target;
+ size_t *target_len;
+} ParseFieldVec;
+
+#define PARSE_FIELD_VEC_ENTRY(_field, _target, _target_len) \
+ { .field = _field, .field_len = strlen(_field), .target = _target, .target_len = _target_len }
+
+static int parse_fieldv(const void *data, size_t length, const ParseFieldVec *fields, unsigned n_fields) {
+ unsigned i;
+
+ for (i = 0; i < n_fields; i++) {
+ const ParseFieldVec *f = &fields[i];
+ int r;
+
+ r = parse_field(data, length, f->field, f->field_len, f->target, f->target_len);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ break;
+ }
+
+ return 0;
+}
+
+static int field_set_test(Set *fields, const char *name, size_t n) {
+ char *s = NULL;
+
+ if (!fields)
+ return 1;
+
+ s = strndupa(name, n);
+ if (!s)
+ return log_oom();
+
+ return set_get(fields, s) ? 1 : 0;
+}
+
+static bool shall_print(const char *p, size_t l, OutputFlags flags) {
+ assert(p);
+
+ if (flags & OUTPUT_SHOW_ALL)
+ return true;
+
+ if (l >= PRINT_CHAR_THRESHOLD)
+ return false;
+
+ if (!utf8_is_printable(p, l))
+ return false;
+
+ return true;
+}
+
+static bool print_multiline(
+ FILE *f,
+ unsigned prefix,
+ unsigned n_columns,
+ OutputFlags flags,
+ int priority,
+ const char* message,
+ size_t message_len,
+ size_t highlight[2]) {
+
+ const char *color_on = "", *color_off = "", *highlight_on = "";
+ const char *pos, *end;
+ bool ellipsized = false;
+ int line = 0;
+
+ if (flags & OUTPUT_COLOR) {
+ if (priority <= LOG_ERR) {
+ color_on = ANSI_HIGHLIGHT_RED;
+ color_off = ANSI_NORMAL;
+ highlight_on = ANSI_HIGHLIGHT;
+ } else if (priority <= LOG_NOTICE) {
+ color_on = ANSI_HIGHLIGHT;
+ color_off = ANSI_NORMAL;
+ highlight_on = ANSI_HIGHLIGHT_RED;
+ } else if (priority >= LOG_DEBUG) {
+ color_on = ANSI_GREY;
+ color_off = ANSI_NORMAL;
+ highlight_on = ANSI_HIGHLIGHT_RED;
+ }
+ }
+
+ /* A special case: make sure that we print a newline when
+ the message is empty. */
+ if (message_len == 0)
+ fputs("\n", f);
+
+ for (pos = message;
+ pos < message + message_len;
+ pos = end + 1, line++) {
+ bool continuation = line > 0;
+ bool tail_line;
+ int len;
+ for (end = pos; end < message + message_len && *end != '\n'; end++)
+ ;
+ len = end - pos;
+ assert(len >= 0);
+
+ /* We need to figure out when we are showing not-last line, *and*
+ * will skip subsequent lines. In that case, we will put the dots
+ * at the end of the line, instead of putting dots in the middle
+ * or not at all.
+ */
+ tail_line =
+ line + 1 == PRINT_LINE_THRESHOLD ||
+ end + 1 >= message + PRINT_CHAR_THRESHOLD;
+
+ if (flags & (OUTPUT_FULL_WIDTH | OUTPUT_SHOW_ALL) ||
+ (prefix + len + 1 < n_columns && !tail_line)) {
+ if (highlight &&
+ (size_t) (pos - message) <= highlight[0] &&
+ highlight[0] < (size_t) len) {
+
+ fprintf(f, "%*s%s%.*s",
+ continuation * prefix, "",
+ color_on, (int) highlight[0], pos);
+ fprintf(f, "%s%.*s",
+ highlight_on,
+ (int) (MIN((size_t) len, highlight[1]) - highlight[0]),
+ pos + highlight[0]);
+ if ((size_t) len > highlight[1])
+ fprintf(f, "%s%.*s",
+ color_on,
+ (int) (len - highlight[1]),
+ pos + highlight[1]);
+ fprintf(f, "%s\n", color_off);
+
+ } else
+ fprintf(f, "%*s%s%.*s%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ continue;
+ }
+
+ /* Beyond this point, ellipsization will happen. */
+ ellipsized = true;
+
+ if (prefix < n_columns && n_columns - prefix >= 3) {
+ if (n_columns - prefix > (unsigned) len + 3)
+ fprintf(f, "%*s%s%.*s...%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ else {
+ _cleanup_free_ char *e;
+
+ e = ellipsize_mem(pos, len, n_columns - prefix,
+ tail_line ? 100 : 90);
+ if (!e)
+ fprintf(f, "%*s%s%.*s%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ else
+ fprintf(f, "%*s%s%s%s\n",
+ continuation * prefix, "",
+ color_on, e, color_off);
+ }
+ } else
+ fputs("...\n", f);
+
+ if (tail_line)
+ break;
+ }
+
+ return ellipsized;
+}
+
+static int output_timestamp_monotonic(FILE *f, sd_journal *j, const char *monotonic) {
+ sd_id128_t boot_id;
+ uint64_t t;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ r = -ENXIO;
+ if (monotonic)
+ r = safe_atou64(monotonic, &t);
+ if (r < 0)
+ r = sd_journal_get_monotonic_usec(j, &t, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ fprintf(f, "[%5"PRI_USEC".%06"PRI_USEC"]", t / USEC_PER_SEC, t % USEC_PER_SEC);
+ return 1 + 5 + 1 + 6 + 1;
+}
+
+static int output_timestamp_realtime(FILE *f, sd_journal *j, OutputMode mode, OutputFlags flags, const char *realtime) {
+ char buf[MAX(FORMAT_TIMESTAMP_MAX, 64)];
+ struct tm *(*gettime_r)(const time_t *, struct tm *);
+ struct tm tm;
+ uint64_t x;
+ time_t t;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ if (realtime)
+ r = safe_atou64(realtime, &x);
+ if (!realtime || r < 0 || !VALID_REALTIME(x))
+ r = sd_journal_get_realtime_usec(j, &x);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ if (IN_SET(mode, OUTPUT_SHORT_FULL, OUTPUT_WITH_UNIT)) {
+ const char *k;
+
+ if (flags & OUTPUT_UTC)
+ k = format_timestamp_utc(buf, sizeof(buf), x);
+ else
+ k = format_timestamp(buf, sizeof(buf), x);
+ if (!k)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format timestamp: %" PRIu64, x);
+
+ } else {
+ char usec[7];
+
+ gettime_r = (flags & OUTPUT_UTC) ? gmtime_r : localtime_r;
+ t = (time_t) (x / USEC_PER_SEC);
+
+ switch (mode) {
+
+ case OUTPUT_SHORT_UNIX:
+ xsprintf(buf, "%10"PRI_TIME".%06"PRIu64, t, x % USEC_PER_SEC);
+ break;
+
+ case OUTPUT_SHORT_ISO:
+ if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S%z", gettime_r(&t, &tm)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format ISO time");
+ break;
+
+ case OUTPUT_SHORT_ISO_PRECISE:
+ /* No usec in strftime, so we leave space and copy over */
+ if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S.xxxxxx%z", gettime_r(&t, &tm)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format ISO-precise time");
+ xsprintf(usec, "%06"PRI_USEC, x % USEC_PER_SEC);
+ memcpy(buf + 20, usec, 6);
+ break;
+
+ case OUTPUT_SHORT:
+ case OUTPUT_SHORT_PRECISE:
+
+ if (strftime(buf, sizeof(buf), "%b %d %H:%M:%S", gettime_r(&t, &tm)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format syslog time");
+
+ if (mode == OUTPUT_SHORT_PRECISE) {
+ size_t k;
+
+ assert(sizeof(buf) > strlen(buf));
+ k = sizeof(buf) - strlen(buf);
+
+ r = snprintf(buf + strlen(buf), k, ".%06"PRIu64, x % USEC_PER_SEC);
+ if (r <= 0 || (size_t) r >= k) /* too long? */
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format precise time");
+ }
+ break;
+
+ default:
+ assert_not_reached("Unknown time format");
+ }
+ }
+
+ fputs(buf, f);
+ return (int) strlen(buf);
+}
+
+static int output_short(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) {
+
+ int r;
+ const void *data;
+ size_t length;
+ size_t n = 0;
+ _cleanup_free_ char *hostname = NULL, *identifier = NULL, *comm = NULL, *pid = NULL, *fake_pid = NULL, *message = NULL, *realtime = NULL, *monotonic = NULL, *priority = NULL, *unit = NULL, *user_unit = NULL;
+ size_t hostname_len = 0, identifier_len = 0, comm_len = 0, pid_len = 0, fake_pid_len = 0, message_len = 0, realtime_len = 0, monotonic_len = 0, priority_len = 0, unit_len = 0, user_unit_len = 0;
+ int p = LOG_INFO;
+ bool ellipsized = false;
+ const ParseFieldVec fields[] = {
+ PARSE_FIELD_VEC_ENTRY("_PID=", &pid, &pid_len),
+ PARSE_FIELD_VEC_ENTRY("_COMM=", &comm, &comm_len),
+ PARSE_FIELD_VEC_ENTRY("MESSAGE=", &message, &message_len),
+ PARSE_FIELD_VEC_ENTRY("PRIORITY=", &priority, &priority_len),
+ PARSE_FIELD_VEC_ENTRY("_HOSTNAME=", &hostname, &hostname_len),
+ PARSE_FIELD_VEC_ENTRY("SYSLOG_PID=", &fake_pid, &fake_pid_len),
+ PARSE_FIELD_VEC_ENTRY("SYSLOG_IDENTIFIER=", &identifier, &identifier_len),
+ PARSE_FIELD_VEC_ENTRY("_SOURCE_REALTIME_TIMESTAMP=", &realtime, &realtime_len),
+ PARSE_FIELD_VEC_ENTRY("_SOURCE_MONOTONIC_TIMESTAMP=", &monotonic, &monotonic_len),
+ PARSE_FIELD_VEC_ENTRY("_SYSTEMD_UNIT=", &unit, &unit_len),
+ PARSE_FIELD_VEC_ENTRY("_SYSTEMD_USER_UNIT=", &user_unit, &user_unit_len),
+ };
+ size_t highlight_shifted[] = {highlight ? highlight[0] : 0, highlight ? highlight[1] : 0};
+
+ assert(f);
+ assert(j);
+
+ /* Set the threshold to one bigger than the actual print
+ * threshold, so that if the line is actually longer than what
+ * we're willing to print, ellipsization will occur. This way
+ * we won't output a misleading line without any indication of
+ * truncation.
+ */
+ sd_journal_set_data_threshold(j, flags & (OUTPUT_SHOW_ALL|OUTPUT_FULL_WIDTH) ? 0 : PRINT_CHAR_THRESHOLD + 1);
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ r = parse_fieldv(data, length, fields, ELEMENTSOF(fields));
+ if (r < 0)
+ return r;
+ }
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to get journal fields: %m");
+
+ if (!message) {
+ log_debug("Skipping message without MESSAGE= field.");
+ return 0;
+ }
+
+ if (!(flags & OUTPUT_SHOW_ALL))
+ strip_tab_ansi(&message, &message_len, highlight_shifted);
+
+ if (priority_len == 1 && *priority >= '0' && *priority <= '7')
+ p = *priority - '0';
+
+ if (mode == OUTPUT_SHORT_MONOTONIC)
+ r = output_timestamp_monotonic(f, j, monotonic);
+ else
+ r = output_timestamp_realtime(f, j, mode, flags, realtime);
+ if (r < 0)
+ return r;
+ n += r;
+
+ if (flags & OUTPUT_NO_HOSTNAME) {
+ /* Suppress display of the hostname if this is requested. */
+ hostname = mfree(hostname);
+ hostname_len = 0;
+ }
+
+ if (hostname && shall_print(hostname, hostname_len, flags)) {
+ fprintf(f, " %.*s", (int) hostname_len, hostname);
+ n += hostname_len + 1;
+ }
+
+ if (mode == OUTPUT_WITH_UNIT && ((unit && shall_print(unit, unit_len, flags)) || (user_unit && shall_print(user_unit, user_unit_len, flags)))) {
+ if (unit) {
+ fprintf(f, " %.*s", (int) unit_len, unit);
+ n += unit_len + 1;
+ }
+ if (user_unit) {
+ if (unit)
+ fprintf(f, "/%.*s", (int) user_unit_len, user_unit);
+ else
+ fprintf(f, " %.*s", (int) user_unit_len, user_unit);
+ n += unit_len + 1;
+ }
+ } else if (identifier && shall_print(identifier, identifier_len, flags)) {
+ fprintf(f, " %.*s", (int) identifier_len, identifier);
+ n += identifier_len + 1;
+ } else if (comm && shall_print(comm, comm_len, flags)) {
+ fprintf(f, " %.*s", (int) comm_len, comm);
+ n += comm_len + 1;
+ } else
+ fputs(" unknown", f);
+
+ if (pid && shall_print(pid, pid_len, flags)) {
+ fprintf(f, "[%.*s]", (int) pid_len, pid);
+ n += pid_len + 2;
+ } else if (fake_pid && shall_print(fake_pid, fake_pid_len, flags)) {
+ fprintf(f, "[%.*s]", (int) fake_pid_len, fake_pid);
+ n += fake_pid_len + 2;
+ }
+
+ if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(message, message_len)) {
+ char bytes[FORMAT_BYTES_MAX];
+ fprintf(f, ": [%s blob data]\n", format_bytes(bytes, sizeof(bytes), message_len));
+ } else {
+ fputs(": ", f);
+ ellipsized |=
+ print_multiline(f, n + 2, n_columns, flags, p,
+ message, message_len,
+ highlight_shifted);
+ }
+
+ if (flags & OUTPUT_CATALOG)
+ print_catalog(f, j);
+
+ return ellipsized;
+}
+
+static int output_verbose(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) {
+
+ const void *data;
+ size_t length;
+ _cleanup_free_ char *cursor = NULL;
+ uint64_t realtime = 0;
+ char ts[FORMAT_TIMESTAMP_MAX + 7];
+ const char *timestamp;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_data(j, "_SOURCE_REALTIME_TIMESTAMP", &data, &length);
+ if (r == -ENOENT)
+ log_debug("Source realtime timestamp not found");
+ else if (r < 0)
+ return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get source realtime timestamp: %m");
+ else {
+ _cleanup_free_ char *value = NULL;
+
+ r = parse_field(data, length, "_SOURCE_REALTIME_TIMESTAMP=",
+ STRLEN("_SOURCE_REALTIME_TIMESTAMP="), &value,
+ NULL);
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ r = safe_atou64(value, &realtime);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse realtime timestamp: %m");
+ }
+
+ if (r < 0) {
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get realtime timestamp: %m");
+ }
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ timestamp = flags & OUTPUT_UTC ? format_timestamp_us_utc(ts, sizeof ts, realtime)
+ : format_timestamp_us(ts, sizeof ts, realtime);
+ fprintf(f, "%s [%s]\n",
+ timestamp ?: "(no timestamp)",
+ cursor);
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ const char *c;
+ int fieldlen;
+ const char *on = "", *off = "";
+
+ c = memchr(data, '=', length);
+ if (!c)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid field.");
+ fieldlen = c - (const char*) data;
+
+ r = field_set_test(output_fields, data, fieldlen);
+ if (r < 0)
+ return r;
+ if (!r)
+ continue;
+
+ if (flags & OUTPUT_COLOR && startswith(data, "MESSAGE=")) {
+ on = ANSI_HIGHLIGHT;
+ off = ANSI_NORMAL;
+ }
+
+ if ((flags & OUTPUT_SHOW_ALL) ||
+ (((length < PRINT_CHAR_THRESHOLD) || flags & OUTPUT_FULL_WIDTH)
+ && utf8_is_printable(data, length))) {
+ fprintf(f, " %s%.*s=", on, fieldlen, (const char*)data);
+ print_multiline(f, 4 + fieldlen + 1, 0, OUTPUT_FULL_WIDTH, 0, c + 1, length - fieldlen - 1, NULL);
+ fputs(off, f);
+ } else {
+ char bytes[FORMAT_BYTES_MAX];
+
+ fprintf(f, " %s%.*s=[%s blob data]%s\n",
+ on,
+ (int) (c - (const char*) data),
+ (const char*) data,
+ format_bytes(bytes, sizeof(bytes), length - (c - (const char *) data) - 1),
+ off);
+ }
+ }
+
+ if (r < 0)
+ return r;
+
+ if (flags & OUTPUT_CATALOG)
+ print_catalog(f, j);
+
+ return 0;
+}
+
+static int output_export(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) {
+
+ sd_id128_t boot_id;
+ char sid[33];
+ int r;
+ usec_t realtime, monotonic;
+ _cleanup_free_ char *cursor = NULL;
+ const void *data;
+ size_t length;
+
+ assert(j);
+
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ fprintf(f,
+ "__CURSOR=%s\n"
+ "__REALTIME_TIMESTAMP="USEC_FMT"\n"
+ "__MONOTONIC_TIMESTAMP="USEC_FMT"\n"
+ "_BOOT_ID=%s\n",
+ cursor,
+ realtime,
+ monotonic,
+ sd_id128_to_string(boot_id, sid));
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ const char *c;
+
+ /* We already printed the boot id from the data in the header, hence let's suppress it here */
+ if (memory_startswith(data, length, "_BOOT_ID="))
+ continue;
+
+ c = memchr(data, '=', length);
+ if (!c)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid field.");
+
+ r = field_set_test(output_fields, data, c - (const char *) data);
+ if (r < 0)
+ return r;
+ if (!r)
+ continue;
+
+ if (utf8_is_printable_newline(data, length, false))
+ fwrite(data, length, 1, f);
+ else {
+ uint64_t le64;
+
+ fwrite(data, c - (const char*) data, 1, f);
+ fputc('\n', f);
+ le64 = htole64(length - (c - (const char*) data) - 1);
+ fwrite(&le64, sizeof(le64), 1, f);
+ fwrite(c + 1, length - (c - (const char*) data) - 1, 1, f);
+ }
+
+ fputc('\n', f);
+ }
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+
+ if (r < 0)
+ return r;
+
+ fputc('\n', f);
+
+ return 0;
+}
+
+void json_escape(
+ FILE *f,
+ const char* p,
+ size_t l,
+ OutputFlags flags) {
+
+ assert(f);
+ assert(p);
+
+ if (!(flags & OUTPUT_SHOW_ALL) && l >= JSON_THRESHOLD)
+ fputs("null", f);
+
+ else if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(p, l)) {
+ bool not_first = false;
+
+ fputs("[ ", f);
+
+ while (l > 0) {
+ if (not_first)
+ fprintf(f, ", %u", (uint8_t) *p);
+ else {
+ not_first = true;
+ fprintf(f, "%u", (uint8_t) *p);
+ }
+
+ p++;
+ l--;
+ }
+
+ fputs(" ]", f);
+ } else {
+ fputc('"', f);
+
+ while (l > 0) {
+ if (IN_SET(*p, '"', '\\')) {
+ fputc('\\', f);
+ fputc(*p, f);
+ } else if (*p == '\n')
+ fputs("\\n", f);
+ else if ((uint8_t) *p < ' ')
+ fprintf(f, "\\u%04x", (uint8_t) *p);
+ else
+ fputc(*p, f);
+
+ p++;
+ l--;
+ }
+
+ fputc('"', f);
+ }
+}
+
+struct json_data {
+ JsonVariant* name;
+ size_t n_values;
+ JsonVariant* values[];
+};
+
+static int update_json_data(
+ Hashmap *h,
+ OutputFlags flags,
+ const char *name,
+ const void *value,
+ size_t size) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ struct json_data *d;
+ int r;
+
+ if (!(flags & OUTPUT_SHOW_ALL) && strlen(name) + 1 + size >= JSON_THRESHOLD)
+ r = json_variant_new_null(&v);
+ else if (utf8_is_printable(value, size))
+ r = json_variant_new_stringn(&v, value, size);
+ else
+ r = json_variant_new_array_bytes(&v, value, size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate JSON data: %m");
+
+ d = hashmap_get(h, name);
+ if (d) {
+ struct json_data *w;
+
+ w = realloc(d, offsetof(struct json_data, values) + sizeof(JsonVariant*) * (d->n_values + 1));
+ if (!w)
+ return log_oom();
+
+ d = w;
+ assert_se(hashmap_update(h, json_variant_string(d->name), d) >= 0);
+ } else {
+ _cleanup_(json_variant_unrefp) JsonVariant *n = NULL;
+
+ r = json_variant_new_string(&n, name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate JSON name variant: %m");
+
+ d = malloc0(offsetof(struct json_data, values) + sizeof(JsonVariant*));
+ if (!d)
+ return log_oom();
+
+ r = hashmap_put(h, json_variant_string(n), d);
+ if (r < 0) {
+ free(d);
+ return log_error_errno(r, "Failed to insert JSON name into hashmap: %m");
+ }
+
+ d->name = TAKE_PTR(n);
+ }
+
+ d->values[d->n_values++] = TAKE_PTR(v);
+ return 0;
+}
+
+static int update_json_data_split(
+ Hashmap *h,
+ OutputFlags flags,
+ Set *output_fields,
+ const void *data,
+ size_t size) {
+
+ const char *eq;
+ char *name;
+
+ assert(h);
+ assert(data || size == 0);
+
+ if (memory_startswith(data, size, "_BOOT_ID="))
+ return 0;
+
+ eq = memchr(data, '=', MIN(size, JSON_THRESHOLD));
+ if (!eq)
+ return 0;
+
+ if (eq == data)
+ return 0;
+
+ name = strndupa(data, eq - (const char*) data);
+ if (output_fields && !set_get(output_fields, name))
+ return 0;
+
+ return update_json_data(h, flags, name, eq + 1, size - (eq - (const char*) data) - 1);
+}
+
+static int output_json(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) {
+
+ char sid[SD_ID128_STRING_MAX], usecbuf[DECIMAL_STR_MAX(usec_t)];
+ _cleanup_(json_variant_unrefp) JsonVariant *object = NULL;
+ _cleanup_free_ char *cursor = NULL;
+ uint64_t realtime, monotonic;
+ JsonVariant **array = NULL;
+ struct json_data *d;
+ sd_id128_t boot_id;
+ Hashmap *h = NULL;
+ size_t n = 0;
+ Iterator i;
+ int r;
+
+ assert(j);
+
+ (void) sd_journal_set_data_threshold(j, flags & OUTPUT_SHOW_ALL ? 0 : JSON_THRESHOLD);
+
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ h = hashmap_new(&string_hash_ops);
+ if (!h)
+ return log_oom();
+
+ r = update_json_data(h, flags, "__CURSOR", cursor, strlen(cursor));
+ if (r < 0)
+ goto finish;
+
+ xsprintf(usecbuf, USEC_FMT, realtime);
+ r = update_json_data(h, flags, "__REALTIME_TIMESTAMP", usecbuf, strlen(usecbuf));
+ if (r < 0)
+ goto finish;
+
+ xsprintf(usecbuf, USEC_FMT, monotonic);
+ r = update_json_data(h, flags, "__MONOTONIC_TIMESTAMP", usecbuf, strlen(usecbuf));
+ if (r < 0)
+ goto finish;
+
+ sd_id128_to_string(boot_id, sid);
+ r = update_json_data(h, flags, "_BOOT_ID", sid, strlen(sid));
+ if (r < 0)
+ goto finish;
+
+ for (;;) {
+ const void *data;
+ size_t size;
+
+ r = sd_journal_enumerate_data(j, &data, &size);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ r = 0;
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to read journal: %m");
+ goto finish;
+ }
+ if (r == 0)
+ break;
+
+ r = update_json_data_split(h, flags, output_fields, data, size);
+ if (r < 0)
+ goto finish;
+ }
+
+ array = new(JsonVariant*, hashmap_size(h)*2);
+ if (!array) {
+ r = log_oom();
+ goto finish;
+ }
+
+ HASHMAP_FOREACH(d, h, i) {
+ assert(d->n_values > 0);
+
+ array[n++] = json_variant_ref(d->name);
+
+ if (d->n_values == 1)
+ array[n++] = json_variant_ref(d->values[0]);
+ else {
+ _cleanup_(json_variant_unrefp) JsonVariant *q = NULL;
+
+ r = json_variant_new_array(&q, d->values, d->n_values);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create JSON array: %m");
+ goto finish;
+ }
+
+ array[n++] = TAKE_PTR(q);
+ }
+ }
+
+ r = json_variant_new_object(&object, array, n);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate JSON object: %m");
+ goto finish;
+ }
+
+ json_variant_dump(object,
+ output_mode_to_json_format_flags(mode) |
+ (FLAGS_SET(flags, OUTPUT_COLOR) ? JSON_FORMAT_COLOR : 0),
+ f, NULL);
+
+ r = 0;
+
+finish:
+ while ((d = hashmap_steal_first(h))) {
+ size_t k;
+
+ json_variant_unref(d->name);
+ for (k = 0; k < d->n_values; k++)
+ json_variant_unref(d->values[k]);
+
+ free(d);
+ }
+
+ hashmap_free(h);
+
+ json_variant_unref_many(array, n);
+ free(array);
+
+ return r;
+}
+
+static int output_cat(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) {
+
+ const void *data;
+ size_t l;
+ int r;
+ const char *highlight_on = "", *highlight_off = "";
+
+ assert(j);
+ assert(f);
+
+ if (flags & OUTPUT_COLOR) {
+ highlight_on = ANSI_HIGHLIGHT_RED;
+ highlight_off = ANSI_NORMAL;
+ }
+
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_data(j, "MESSAGE", &data, &l);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+ if (r < 0) {
+ /* An entry without MESSAGE=? */
+ if (r == -ENOENT)
+ return 0;
+
+ return log_error_errno(r, "Failed to get data: %m");
+ }
+
+ assert(l >= 8);
+
+ if (highlight && (flags & OUTPUT_COLOR)) {
+ assert(highlight[0] <= highlight[1]);
+ assert(highlight[1] <= l - 8);
+
+ fwrite((const char*) data + 8, 1, highlight[0], f);
+ fwrite(highlight_on, 1, strlen(highlight_on), f);
+ fwrite((const char*) data + 8 + highlight[0], 1, highlight[1] - highlight[0], f);
+ fwrite(highlight_off, 1, strlen(highlight_off), f);
+ fwrite((const char*) data + 8 + highlight[1], 1, l - 8 - highlight[1], f);
+ } else
+ fwrite((const char*) data + 8, 1, l - 8, f);
+ fputc('\n', f);
+
+ return 0;
+}
+
+static int (*output_funcs[_OUTPUT_MODE_MAX])(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ Set *output_fields,
+ const size_t highlight[2]) = {
+
+ [OUTPUT_SHORT] = output_short,
+ [OUTPUT_SHORT_ISO] = output_short,
+ [OUTPUT_SHORT_ISO_PRECISE] = output_short,
+ [OUTPUT_SHORT_PRECISE] = output_short,
+ [OUTPUT_SHORT_MONOTONIC] = output_short,
+ [OUTPUT_SHORT_UNIX] = output_short,
+ [OUTPUT_SHORT_FULL] = output_short,
+ [OUTPUT_VERBOSE] = output_verbose,
+ [OUTPUT_EXPORT] = output_export,
+ [OUTPUT_JSON] = output_json,
+ [OUTPUT_JSON_PRETTY] = output_json,
+ [OUTPUT_JSON_SSE] = output_json,
+ [OUTPUT_JSON_SEQ] = output_json,
+ [OUTPUT_CAT] = output_cat,
+ [OUTPUT_WITH_UNIT] = output_short,
+};
+
+int show_journal_entry(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ char **output_fields,
+ const size_t highlight[2],
+ bool *ellipsized) {
+
+ int ret;
+ _cleanup_set_free_free_ Set *fields = NULL;
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ if (output_fields) {
+ fields = set_new(&string_hash_ops);
+ if (!fields)
+ return log_oom();
+
+ ret = set_put_strdupv(fields, output_fields);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = output_funcs[mode](f, j, mode, n_columns, flags, fields, highlight);
+
+ if (ellipsized && ret > 0)
+ *ellipsized = true;
+
+ return ret;
+}
+
+static int maybe_print_begin_newline(FILE *f, OutputFlags *flags) {
+ assert(f);
+ assert(flags);
+
+ if (!(*flags & OUTPUT_BEGIN_NEWLINE))
+ return 0;
+
+ /* Print a beginning new line if that's request, but only once
+ * on the first line we print. */
+
+ fputc('\n', f);
+ *flags &= ~OUTPUT_BEGIN_NEWLINE;
+ return 0;
+}
+
+int show_journal(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ OutputFlags flags,
+ bool *ellipsized) {
+
+ int r;
+ unsigned line = 0;
+ bool need_seek = false;
+ int warn_cutoff = flags & OUTPUT_WARN_CUTOFF;
+
+ assert(j);
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+
+ if (how_many == (unsigned) -1)
+ need_seek = true;
+ else {
+ /* Seek to end */
+ r = sd_journal_seek_tail(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to seek to tail: %m");
+
+ r = sd_journal_previous_skip(j, how_many);
+ if (r < 0)
+ return log_error_errno(r, "Failed to skip previous: %m");
+ }
+
+ for (;;) {
+ for (;;) {
+ usec_t usec;
+
+ if (need_seek) {
+ r = sd_journal_next(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to iterate through journal: %m");
+ }
+
+ if (r == 0)
+ break;
+
+ need_seek = true;
+
+ if (not_before > 0) {
+ r = sd_journal_get_monotonic_usec(j, &usec, NULL);
+
+ /* -ESTALE is returned if the
+ timestamp is not from this boot */
+ if (r == -ESTALE)
+ continue;
+ else if (r < 0)
+ return log_error_errno(r, "Failed to get journal time: %m");
+
+ if (usec < not_before)
+ continue;
+ }
+
+ line++;
+ maybe_print_begin_newline(f, &flags);
+
+ r = show_journal_entry(f, j, mode, n_columns, flags, NULL, NULL, ellipsized);
+ if (r < 0)
+ return r;
+ }
+
+ if (warn_cutoff && line < how_many && not_before > 0) {
+ sd_id128_t boot_id;
+ usec_t cutoff = 0;
+
+ /* Check whether the cutoff line is too early */
+
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id: %m");
+
+ r = sd_journal_get_cutoff_monotonic_usec(j, boot_id, &cutoff, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get journal cutoff time: %m");
+
+ if (r > 0 && not_before < cutoff) {
+ maybe_print_begin_newline(f, &flags);
+ fprintf(f, "Warning: Journal has been rotated since unit was started. Log output is incomplete or unavailable.\n");
+ }
+
+ warn_cutoff = false;
+ }
+
+ if (!(flags & OUTPUT_FOLLOW))
+ break;
+
+ r = sd_journal_wait(j, USEC_INFINITY);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for journal: %m");
+
+ }
+
+ return 0;
+}
+
+int add_matches_for_unit(sd_journal *j, const char *unit) {
+ const char *m1, *m2, *m3, *m4;
+ int r;
+
+ assert(j);
+ assert(unit);
+
+ m1 = strjoina("_SYSTEMD_UNIT=", unit);
+ m2 = strjoina("COREDUMP_UNIT=", unit);
+ m3 = strjoina("UNIT=", unit);
+ m4 = strjoina("OBJECT_SYSTEMD_UNIT=", unit);
+
+ (void)(
+ /* Look for messages from the service itself */
+ (r = sd_journal_add_match(j, m1, 0)) ||
+
+ /* Look for coredumps of the service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "MESSAGE_ID=fc2e22bc6ee647b6b90729ab34a250b1", 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+ (r = sd_journal_add_match(j, m2, 0)) ||
+
+ /* Look for messages from PID 1 about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "_PID=1", 0)) ||
+ (r = sd_journal_add_match(j, m3, 0)) ||
+
+ /* Look for messages from authorized daemons about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+ (r = sd_journal_add_match(j, m4, 0))
+ );
+
+ if (r == 0 && endswith(unit, ".slice")) {
+ const char *m5;
+
+ m5 = strjoina("_SYSTEMD_SLICE=", unit);
+
+ /* Show all messages belonging to a slice */
+ (void)(
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m5, 0))
+ );
+ }
+
+ return r;
+}
+
+int add_matches_for_user_unit(sd_journal *j, const char *unit, uid_t uid) {
+ int r;
+ char *m1, *m2, *m3, *m4;
+ char muid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)];
+
+ assert(j);
+ assert(unit);
+
+ m1 = strjoina("_SYSTEMD_USER_UNIT=", unit);
+ m2 = strjoina("USER_UNIT=", unit);
+ m3 = strjoina("COREDUMP_USER_UNIT=", unit);
+ m4 = strjoina("OBJECT_SYSTEMD_USER_UNIT=", unit);
+ sprintf(muid, "_UID="UID_FMT, uid);
+
+ (void) (
+ /* Look for messages from the user service itself */
+ (r = sd_journal_add_match(j, m1, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+
+ /* Look for messages from systemd about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m2, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+
+ /* Look for coredumps of the service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m3, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+
+ /* Look for messages from authorized daemons about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m4, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0))
+ );
+
+ if (r == 0 && endswith(unit, ".slice")) {
+ const char *m5;
+
+ m5 = strjoina("_SYSTEMD_SLICE=", unit);
+
+ /* Show all messages belonging to a slice */
+ (void)(
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m5, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0))
+ );
+ }
+
+ return r;
+}
+
+static int get_boot_id_for_machine(const char *machine, sd_id128_t *boot_id) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, rootfd = -1;
+ pid_t pid, child;
+ char buf[37];
+ ssize_t k;
+ int r;
+
+ assert(machine);
+ assert(boot_id);
+
+ if (!machine_name_is_valid(machine))
+ return -EINVAL;
+
+ r = container_get_leader(machine, &pid);
+ if (r < 0)
+ return r;
+
+ r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, NULL, &rootfd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-bootidns)", "(sd-bootid)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, -1, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ int fd;
+
+ pair[0] = safe_close(pair[0]);
+
+ fd = open("/proc/sys/kernel/random/boot_id", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ _exit(EXIT_FAILURE);
+
+ r = loop_read_exact(fd, buf, 36, false);
+ safe_close(fd);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ k = send(pair[1], buf, 36, MSG_NOSIGNAL);
+ if (k != 36)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-bootidns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ k = recv(pair[0], buf, 36, 0);
+ if (k != 36)
+ return -EIO;
+
+ buf[36] = 0;
+ r = sd_id128_from_string(buf, boot_id);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int add_match_this_boot(sd_journal *j, const char *machine) {
+ char match[9+32+1] = "_BOOT_ID=";
+ sd_id128_t boot_id;
+ int r;
+
+ assert(j);
+
+ if (machine) {
+ r = get_boot_id_for_machine(machine, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id of container %s: %m", machine);
+ } else {
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id: %m");
+ }
+
+ sd_id128_to_string(boot_id, match + 9);
+ r = sd_journal_add_match(j, match, strlen(match));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add conjunction: %m");
+
+ return 0;
+}
+
+int show_journal_by_unit(
+ FILE *f,
+ const char *unit,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ uid_t uid,
+ OutputFlags flags,
+ int journal_open_flags,
+ bool system_unit,
+ bool *ellipsized) {
+
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+ assert(unit);
+
+ if (how_many <= 0)
+ return 0;
+
+ r = sd_journal_open(&j, journal_open_flags);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open journal: %m");
+
+ r = add_match_this_boot(j, NULL);
+ if (r < 0)
+ return r;
+
+ if (system_unit)
+ r = add_matches_for_unit(j, unit);
+ else
+ r = add_matches_for_user_unit(j, unit, uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add unit matches: %m");
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *filter;
+
+ filter = journal_make_match_string(j);
+ if (!filter)
+ return log_oom();
+
+ log_debug("Journal filter: %s", filter);
+ }
+
+ return show_journal(f, j, mode, n_columns, not_before, how_many, flags, ellipsized);
+}
diff --git a/src/shared/logs-show.h b/src/shared/logs-show.h
new file mode 100644
index 0000000..1e0c4ea
--- /dev/null
+++ b/src/shared/logs-show.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "sd-journal.h"
+
+#include "macro.h"
+#include "output-mode.h"
+#include "time-util.h"
+#include "util.h"
+
+int show_journal_entry(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ char **output_fields,
+ const size_t highlight[2],
+ bool *ellipsized);
+int show_journal(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ OutputFlags flags,
+ bool *ellipsized);
+
+int add_match_this_boot(sd_journal *j, const char *machine);
+
+int add_matches_for_unit(
+ sd_journal *j,
+ const char *unit);
+
+int add_matches_for_user_unit(
+ sd_journal *j,
+ const char *unit,
+ uid_t uid);
+
+int show_journal_by_unit(
+ FILE *f,
+ const char *unit,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ uid_t uid,
+ OutputFlags flags,
+ int journal_open_flags,
+ bool system_unit,
+ bool *ellipsized);
+
+void json_escape(
+ FILE *f,
+ const char* p,
+ size_t l,
+ OutputFlags flags);
diff --git a/src/shared/loop-util.c b/src/shared/loop-util.c
new file mode 100644
index 0000000..bf426eb
--- /dev/null
+++ b/src/shared/loop-util.c
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/loop.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "loop-util.h"
+#include "stat-util.h"
+
+int loop_device_make(int fd, int open_flags, LoopDevice **ret) {
+ const struct loop_info64 info = {
+ .lo_flags = LO_FLAGS_AUTOCLEAR|LO_FLAGS_PARTSCAN|(open_flags == O_RDONLY ? LO_FLAGS_READ_ONLY : 0),
+ };
+
+ _cleanup_close_ int control = -1, loop = -1;
+ _cleanup_free_ char *loopdev = NULL;
+ struct stat st;
+ LoopDevice *d;
+ int nr, r;
+
+ assert(fd >= 0);
+ assert(ret);
+ assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (S_ISBLK(st.st_mode)) {
+ int copy;
+
+ /* If this is already a block device, store a copy of the fd as it is */
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ d = new0(LoopDevice, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (LoopDevice) {
+ .fd = copy,
+ .nr = -1,
+ .relinquished = true, /* It's not allocated by us, don't destroy it when this object is freed */
+ };
+
+ *ret = d;
+ return d->fd;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+
+ control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (control < 0)
+ return -errno;
+
+ nr = ioctl(control, LOOP_CTL_GET_FREE);
+ if (nr < 0)
+ return -errno;
+
+ if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
+ return -ENOMEM;
+
+ loop = open(loopdev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
+ if (loop < 0)
+ return -errno;
+
+ if (ioctl(loop, LOOP_SET_FD, fd) < 0)
+ return -errno;
+
+ if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0)
+ return -errno;
+
+ d = new(LoopDevice, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (LoopDevice) {
+ .fd = TAKE_FD(loop),
+ .node = TAKE_PTR(loopdev),
+ .nr = nr,
+ };
+
+ *ret = d;
+ return d->fd;
+}
+
+int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret) {
+ _cleanup_close_ int fd = -1;
+
+ assert(path);
+ assert(ret);
+ assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
+
+ fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
+ if (fd < 0)
+ return -errno;
+
+ return loop_device_make(fd, open_flags, ret);
+}
+
+LoopDevice* loop_device_unref(LoopDevice *d) {
+ if (!d)
+ return NULL;
+
+ if (d->fd >= 0) {
+
+ if (d->nr >= 0 && !d->relinquished) {
+ if (ioctl(d->fd, LOOP_CLR_FD) < 0)
+ log_debug_errno(errno, "Failed to clear loop device: %m");
+
+ }
+
+ safe_close(d->fd);
+ }
+
+ if (d->nr >= 0 && !d->relinquished) {
+ _cleanup_close_ int control = -1;
+
+ control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (control < 0)
+ log_debug_errno(errno, "Failed to open loop control device: %m");
+ else {
+ if (ioctl(control, LOOP_CTL_REMOVE, d->nr) < 0)
+ log_debug_errno(errno, "Failed to remove loop device: %m");
+ }
+ }
+
+ free(d->node);
+ return mfree(d);
+}
+
+void loop_device_relinquish(LoopDevice *d) {
+ assert(d);
+
+ /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
+ * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
+
+ d->relinquished = true;
+}
diff --git a/src/shared/loop-util.h b/src/shared/loop-util.h
new file mode 100644
index 0000000..d78466c
--- /dev/null
+++ b/src/shared/loop-util.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "macro.h"
+
+typedef struct LoopDevice LoopDevice;
+
+/* Some helpers for setting up loopback block devices */
+
+struct LoopDevice {
+ int fd;
+ int nr;
+ char *node;
+ bool relinquished;
+};
+
+int loop_device_make(int fd, int open_flags, LoopDevice **ret);
+int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret);
+
+LoopDevice* loop_device_unref(LoopDevice *d);
+DEFINE_TRIVIAL_CLEANUP_FUNC(LoopDevice*, loop_device_unref);
+
+void loop_device_relinquish(LoopDevice *d);
diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c
new file mode 100644
index 0000000..af06ab2
--- /dev/null
+++ b/src/shared/machine-image.c
@@ -0,0 +1,1249 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/fs.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "dissect-image.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "lockfile-util.h"
+#include "log.h"
+#include "loop-util.h"
+#include "machine-image.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "os-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "xattr-util.h"
+
+static const char* const image_search_path[_IMAGE_CLASS_MAX] = {
+ [IMAGE_MACHINE] = "/etc/machines\0" /* only place symlinks here */
+ "/run/machines\0" /* and here too */
+ "/var/lib/machines\0" /* the main place for images */
+ "/var/lib/container\0" /* legacy */
+ "/usr/local/lib/machines\0"
+ "/usr/lib/machines\0",
+
+ [IMAGE_PORTABLE] = "/etc/portables\0" /* only place symlinks here */
+ "/run/portables\0" /* and here too */
+ "/var/lib/portables\0" /* the main place for images */
+ "/usr/local/lib/portables\0"
+ "/usr/lib/portables\0",
+};
+
+static Image *image_free(Image *i) {
+ assert(i);
+
+ free(i->name);
+ free(i->path);
+
+ free(i->hostname);
+ strv_free(i->machine_info);
+ strv_free(i->os_release);
+
+ return mfree(i);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Image, image, image_free);
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(image_hash_ops, char, string_hash_func, string_compare_func,
+ Image, image_unref);
+
+static char **image_settings_path(Image *image) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *fn, *s;
+ unsigned i = 0;
+
+ assert(image);
+
+ l = new0(char*, 4);
+ if (!l)
+ return NULL;
+
+ fn = strjoina(image->name, ".nspawn");
+
+ FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
+ l[i] = strappend(s, fn);
+ if (!l[i])
+ return NULL;
+
+ i++;
+ }
+
+ l[i] = file_in_same_dir(image->path, fn);
+ if (!l[i])
+ return NULL;
+
+ return TAKE_PTR(l);
+}
+
+static char *image_roothash_path(Image *image) {
+ const char *fn;
+
+ assert(image);
+
+ fn = strjoina(image->name, ".roothash");
+
+ return file_in_same_dir(image->path, fn);
+}
+
+static int image_new(
+ ImageType t,
+ const char *pretty,
+ const char *path,
+ const char *filename,
+ bool read_only,
+ usec_t crtime,
+ usec_t mtime,
+ Image **ret) {
+
+ _cleanup_(image_unrefp) Image *i = NULL;
+
+ assert(t >= 0);
+ assert(t < _IMAGE_TYPE_MAX);
+ assert(pretty);
+ assert(filename);
+ assert(ret);
+
+ i = new0(Image, 1);
+ if (!i)
+ return -ENOMEM;
+
+ i->n_ref = 1;
+ i->type = t;
+ i->read_only = read_only;
+ i->crtime = crtime;
+ i->mtime = mtime;
+ i->usage = i->usage_exclusive = (uint64_t) -1;
+ i->limit = i->limit_exclusive = (uint64_t) -1;
+
+ i->name = strdup(pretty);
+ if (!i->name)
+ return -ENOMEM;
+
+ if (path)
+ i->path = strjoin(path, "/", filename);
+ else
+ i->path = strdup(filename);
+ if (!i->path)
+ return -ENOMEM;
+
+ path_simplify(i->path, false);
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static int extract_pretty(const char *path, const char *suffix, char **ret) {
+ _cleanup_free_ char *name = NULL;
+ const char *p;
+ size_t n;
+
+ assert(path);
+ assert(ret);
+
+ p = last_path_component(path);
+ n = strcspn(p, "/");
+
+ name = strndup(p, n);
+ if (!name)
+ return -ENOMEM;
+
+ if (suffix) {
+ char *e;
+
+ e = endswith(name, suffix);
+ if (!e)
+ return -EINVAL;
+
+ *e = 0;
+ }
+
+ if (!image_name_is_valid(name))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(name);
+ return 0;
+}
+
+static int image_make(
+ const char *pretty,
+ int dfd,
+ const char *path,
+ const char *filename,
+ const struct stat *st,
+ Image **ret) {
+
+ _cleanup_free_ char *pretty_buffer = NULL;
+ struct stat stbuf;
+ bool read_only;
+ int r;
+
+ assert(dfd >= 0 || dfd == AT_FDCWD);
+ assert(filename);
+
+ /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
+ * devices into /var/lib/machines/, and treat them normally.
+ *
+ * This function returns -ENOENT if we can't find the image after all, and -EMEDIUMTYPE if it's not a file we
+ * recognize. */
+
+ if (!st) {
+ if (fstatat(dfd, filename, &stbuf, 0) < 0)
+ return -errno;
+
+ st = &stbuf;
+ }
+
+ read_only =
+ (path && path_startswith(path, "/usr")) ||
+ (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
+
+ if (S_ISDIR(st->st_mode)) {
+ _cleanup_close_ int fd = -1;
+ unsigned file_attr = 0;
+
+ if (!ret)
+ return 0;
+
+ if (!pretty) {
+ r = extract_pretty(filename, NULL, &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ /* btrfs subvolumes have inode 256 */
+ if (st->st_ino == 256) {
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (r) {
+ BtrfsSubvolInfo info;
+
+ /* It's a btrfs subvolume */
+
+ r = btrfs_subvol_get_info_fd(fd, 0, &info);
+ if (r < 0)
+ return r;
+
+ r = image_new(IMAGE_SUBVOLUME,
+ pretty,
+ path,
+ filename,
+ info.read_only || read_only,
+ info.otime,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ if (btrfs_quota_scan_ongoing(fd) == 0) {
+ BtrfsQuotaInfo quota;
+
+ r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
+ if (r >= 0) {
+ (*ret)->usage = quota.referenced;
+ (*ret)->usage_exclusive = quota.exclusive;
+
+ (*ret)->limit = quota.referenced_max;
+ (*ret)->limit_exclusive = quota.exclusive_max;
+ }
+ }
+
+ return 0;
+ }
+ }
+
+ /* If the IMMUTABLE bit is set, we consider the
+ * directory read-only. Since the ioctl is not
+ * supported everywhere we ignore failures. */
+ (void) read_attr_fd(fd, &file_attr);
+
+ /* It's just a normal directory. */
+ r = image_new(IMAGE_DIRECTORY,
+ pretty,
+ path,
+ filename,
+ read_only || (file_attr & FS_IMMUTABLE_FL),
+ 0,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ return 0;
+
+ } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) {
+ usec_t crtime = 0;
+
+ /* It's a RAW disk image */
+
+ if (!ret)
+ return 0;
+
+ (void) fd_getcrtime_at(dfd, filename, &crtime, 0);
+
+ if (!pretty) {
+ r = extract_pretty(filename, ".raw", &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ r = image_new(IMAGE_RAW,
+ pretty,
+ path,
+ filename,
+ !(st->st_mode & 0222) || read_only,
+ crtime,
+ timespec_load(&st->st_mtim),
+ ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->usage = (*ret)->usage_exclusive = st->st_blocks * 512;
+ (*ret)->limit = (*ret)->limit_exclusive = st->st_size;
+
+ return 0;
+
+ } else if (S_ISBLK(st->st_mode)) {
+ _cleanup_close_ int block_fd = -1;
+ uint64_t size = UINT64_MAX;
+
+ /* A block device */
+
+ if (!ret)
+ return 0;
+
+ if (!pretty) {
+ r = extract_pretty(filename, NULL, &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
+ if (block_fd < 0)
+ log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path, filename);
+ else {
+ /* Refresh stat data after opening the node */
+ if (fstat(block_fd, &stbuf) < 0)
+ return -errno;
+ st = &stbuf;
+
+ if (!S_ISBLK(st->st_mode)) /* Verify that what we opened is actually what we think it is */
+ return -ENOTTY;
+
+ if (!read_only) {
+ int state = 0;
+
+ if (ioctl(block_fd, BLKROGET, &state) < 0)
+ log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path, filename);
+ else if (state)
+ read_only = true;
+ }
+
+ if (ioctl(block_fd, BLKGETSIZE64, &size) < 0)
+ log_debug_errno(errno, "Failed to issue BLKGETSIZE64 on device %s/%s, ignoring: %m", path, filename);
+
+ block_fd = safe_close(block_fd);
+ }
+
+ r = image_new(IMAGE_BLOCK,
+ pretty,
+ path,
+ filename,
+ !(st->st_mode & 0222) || read_only,
+ 0,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ if (size != 0 && size != UINT64_MAX)
+ (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
+
+ return 0;
+ }
+
+ return -EMEDIUMTYPE;
+}
+
+int image_find(ImageClass class, const char *name, Image **ret) {
+ const char *path;
+ int r;
+
+ assert(class >= 0);
+ assert(class < _IMAGE_CLASS_MAX);
+ assert(name);
+
+ /* There are no images with invalid names */
+ if (!image_name_is_valid(name))
+ return -ENOENT;
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct stat st;
+
+ d = opendir(path);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people to
+ * symlink block devices into the search path */
+ if (fstatat(dirfd(d), name, &st, 0) < 0) {
+ _cleanup_free_ char *raw = NULL;
+
+ if (errno != ENOENT)
+ return -errno;
+
+ raw = strappend(name, ".raw");
+ if (!raw)
+ return -ENOMEM;
+
+ if (fstatat(dirfd(d), raw, &st, 0) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ r = image_make(name, dirfd(d), path, raw, &st, ret);
+
+ } else {
+ if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode))
+ continue;
+
+ r = image_make(name, dirfd(d), path, name, &st, ret);
+ }
+ if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
+ continue;
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->discoverable = true;
+
+ return 1;
+ }
+
+ if (class == IMAGE_MACHINE && streq(name, ".host")) {
+ r = image_make(".host", AT_FDCWD, NULL, "/", NULL, ret);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->discoverable = true;
+
+ return r;
+ }
+
+ return -ENOENT;
+};
+
+int image_from_path(const char *path, Image **ret) {
+
+ /* Note that we don't set the 'discoverable' field of the returned object, because we don't check here whether
+ * the image is in the image search path. And if it is we don't know if the path we used is actually not
+ * overridden by another, different image earlier in the search path */
+
+ if (path_equal(path, "/"))
+ return image_make(".host", AT_FDCWD, NULL, "/", NULL, ret);
+
+ return image_make(NULL, AT_FDCWD, NULL, path, NULL, ret);
+}
+
+int image_find_harder(ImageClass class, const char *name_or_path, Image **ret) {
+ if (image_name_is_valid(name_or_path))
+ return image_find(class, name_or_path, ret);
+
+ return image_from_path(name_or_path, ret);
+}
+
+int image_discover(ImageClass class, Hashmap *h) {
+ const char *path;
+ int r;
+
+ assert(class >= 0);
+ assert(class < _IMAGE_CLASS_MAX);
+ assert(h);
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(path);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ _cleanup_(image_unrefp) Image *image = NULL;
+ _cleanup_free_ char *truncated = NULL;
+ const char *pretty;
+ struct stat st;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people
+ * to symlink block devices into the search path */
+ if (fstatat(dirfd(d), de->d_name, &st, 0) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ if (S_ISREG(st.st_mode)) {
+ const char *e;
+
+ e = endswith(de->d_name, ".raw");
+ if (!e)
+ continue;
+
+ truncated = strndup(de->d_name, e - de->d_name);
+ if (!truncated)
+ return -ENOMEM;
+
+ pretty = truncated;
+ } else if (S_ISDIR(st.st_mode) || S_ISBLK(st.st_mode))
+ pretty = de->d_name;
+ else
+ continue;
+
+ if (!image_name_is_valid(pretty))
+ continue;
+
+ if (hashmap_contains(h, pretty))
+ continue;
+
+ r = image_make(pretty, dirfd(d), path, de->d_name, &st, &image);
+ if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
+ continue;
+ if (r < 0)
+ return r;
+
+ image->discoverable = true;
+
+ r = hashmap_put(h, image->name, image);
+ if (r < 0)
+ return r;
+
+ image = NULL;
+ }
+ }
+
+ if (class == IMAGE_MACHINE && !hashmap_contains(h, ".host")) {
+ _cleanup_(image_unrefp) Image *image = NULL;
+
+ r = image_make(".host", AT_FDCWD, NULL, "/", NULL, &image);
+ if (r < 0)
+ return r;
+
+ image->discoverable = true;
+
+ r = hashmap_put(h, image->name, image);
+ if (r < 0)
+ return r;
+
+ image = NULL;
+ }
+
+ return 0;
+}
+
+int image_remove(Image *i) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ _cleanup_strv_free_ char **settings = NULL;
+ _cleanup_free_ char *roothash = NULL;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+
+ /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
+ * big guns */
+ if (unlink(i->path) < 0) {
+ r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+
+ case IMAGE_DIRECTORY:
+ /* Allow deletion of read-only directories */
+ (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
+ r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_BLOCK:
+
+ /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
+ * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
+ * the thing (it's most likely a symlink after all). */
+
+ if (path_startswith(i->path, "/dev"))
+ break;
+
+ _fallthrough_;
+ case IMAGE_RAW:
+ if (unlink(i->path) < 0)
+ return -errno;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ STRV_FOREACH(j, settings) {
+ if (unlink(*j) < 0 && errno != ENOENT)
+ log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
+ }
+
+ if (unlink(roothash) < 0 && errno != ENOENT)
+ log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
+ _cleanup_free_ char *rs = NULL;
+ const char *fn;
+
+ fn = strjoina(new_name, suffix);
+
+ rs = file_in_same_dir(path, fn);
+ if (!rs)
+ return -ENOMEM;
+
+ return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
+}
+
+int image_rename(Image *i, const char *new_name) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
+ _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
+ _cleanup_strv_free_ char **settings = NULL;
+ unsigned file_attr = 0;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (!image_name_is_valid(new_name))
+ return -EINVAL;
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ /* Make sure nobody takes the new name, between the time we
+ * checked it is currently unused in all search paths, and the
+ * time we take possession of it */
+ r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, new_name, NULL);
+ if (r >= 0)
+ return -EEXIST;
+ if (r != -ENOENT)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_DIRECTORY:
+ /* Turn of the immutable bit while we rename the image, so that we can rename it */
+ (void) read_attr_path(i->path, &file_attr);
+
+ if (file_attr & FS_IMMUTABLE_FL)
+ (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
+
+ _fallthrough_;
+ case IMAGE_SUBVOLUME:
+ new_path = file_in_same_dir(i->path, new_name);
+ break;
+
+ case IMAGE_BLOCK:
+
+ /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
+ if (path_startswith(i->path, "/dev"))
+ return -EROFS;
+
+ new_path = file_in_same_dir(i->path, new_name);
+ break;
+
+ case IMAGE_RAW: {
+ const char *fn;
+
+ fn = strjoina(new_name, ".raw");
+ new_path = file_in_same_dir(i->path, fn);
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (!new_path)
+ return -ENOMEM;
+
+ nn = strdup(new_name);
+ if (!nn)
+ return -ENOMEM;
+
+ r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
+ if (r < 0)
+ return r;
+
+ /* Restore the immutable bit, if it was set before */
+ if (file_attr & FS_IMMUTABLE_FL)
+ (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
+
+ free_and_replace(i->path, new_path);
+ free_and_replace(i->name, nn);
+
+ STRV_FOREACH(j, settings) {
+ r = rename_auxiliary_file(*j, new_name, ".nspawn");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
+ }
+
+ r = rename_auxiliary_file(roothash, new_name, ".roothash");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
+ _cleanup_free_ char *rs = NULL;
+ const char *fn;
+
+ fn = strjoina(new_name, suffix);
+
+ rs = file_in_same_dir(path, fn);
+ if (!rs)
+ return -ENOMEM;
+
+ return copy_file_atomic(path, rs, 0664, 0, COPY_REFLINK);
+}
+
+int image_clone(Image *i, const char *new_name, bool read_only) {
+ _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
+ _cleanup_strv_free_ char **settings = NULL;
+ _cleanup_free_ char *roothash = NULL;
+ const char *new_path;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (!image_name_is_valid(new_name))
+ return -EINVAL;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure nobody takes the new name, between the time we
+ * checked it is currently unused in all search paths, and the
+ * time we take possession of it */
+ r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, new_name, NULL);
+ if (r >= 0)
+ return -EEXIST;
+ if (r != -ENOENT)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+ case IMAGE_DIRECTORY:
+ /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
+ * directory. */
+
+ new_path = strjoina("/var/lib/machines/", new_name);
+
+ r = btrfs_subvol_snapshot(i->path, new_path,
+ (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
+ BTRFS_SNAPSHOT_FALLBACK_COPY |
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+ BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
+ BTRFS_SNAPSHOT_RECURSIVE |
+ BTRFS_SNAPSHOT_QUOTA);
+ if (r >= 0)
+ /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
+ (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
+
+ break;
+
+ case IMAGE_RAW:
+ new_path = strjoina("/var/lib/machines/", new_name, ".raw");
+
+ r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK);
+ break;
+
+ case IMAGE_BLOCK:
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(j, settings) {
+ r = clone_auxiliary_file(*j, new_name, ".nspawn");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
+ }
+
+ r = clone_auxiliary_file(roothash, new_name, ".roothash");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+int image_read_only(Image *i, bool b) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ int r;
+
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+
+ /* Note that we set the flag only on the top-level
+ * subvolume of the image. */
+
+ r = btrfs_subvol_set_read_only(i->path, b);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_DIRECTORY:
+ /* For simple directory trees we cannot use the access
+ mode of the top-level directory, since it has an
+ effect on the container itself. However, we can
+ use the "immutable" flag, to at least make the
+ top-level directory read-only. It's not as good as
+ a read-only subvolume, but at least something, and
+ we can read the value back. */
+
+ r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL, NULL);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_RAW: {
+ struct stat st;
+
+ if (stat(i->path, &st) < 0)
+ return -errno;
+
+ if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
+ return -errno;
+
+ /* If the images is now read-only, it's a good time to
+ * defrag it, given that no write patterns will
+ * fragment it again. */
+ if (b)
+ (void) btrfs_defrag(i->path);
+ break;
+ }
+
+ case IMAGE_BLOCK: {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int state = b;
+
+ fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTTY;
+
+ if (ioctl(fd, BLKROSET, &state) < 0)
+ return -errno;
+
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
+ _cleanup_free_ char *p = NULL;
+ LockFile t = LOCK_FILE_INIT;
+ struct stat st;
+ int r;
+
+ assert(path);
+ assert(global);
+ assert(local);
+
+ /* Locks an image path. This actually creates two locks: one
+ * "local" one, next to the image path itself, which might be
+ * shared via NFS. And another "global" one, in /run, that
+ * uses the device/inode number. This has the benefit that we
+ * can even lock a tree that is a mount point, correctly. */
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+ *local = *global = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
+ if (path_equal(path, "/"))
+ return -EBUSY;
+
+ if (stat(path, &st) >= 0) {
+ if (S_ISBLK(st.st_mode))
+ r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
+ else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
+ r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
+ else
+ return -ENOTTY;
+
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ /* For block devices we don't need the "local" lock, as the major/minor lock above should be sufficient, since
+ * block devices are device local anyway. */
+ if (!path_startswith(path, "/dev")) {
+ r = make_lock_file_for(path, operation, &t);
+ if (r < 0) {
+ if ((operation & LOCK_SH) && r == -EROFS)
+ log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path);
+ else
+ return r;
+ }
+ }
+
+ if (p) {
+ mkdir_p("/run/systemd/nspawn/locks", 0700);
+
+ r = make_lock_file(p, operation, global);
+ if (r < 0) {
+ release_lock_file(&t);
+ return r;
+ }
+ } else
+ *global = (LockFile) LOCK_FILE_INIT;
+
+ *local = t;
+ return 0;
+}
+
+int image_set_limit(Image *i, uint64_t referenced_max) {
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ if (i->type != IMAGE_SUBVOLUME)
+ return -EOPNOTSUPP;
+
+ /* We set the quota both for the subvolume as well as for the
+ * subtree. The latter is mostly for historical reasons, since
+ * we didn't use to have a concept of subtree quota, and hence
+ * only modified the subvolume quota. */
+
+ (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
+ (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
+ return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
+}
+
+int image_read_metadata(Image *i) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ int r;
+
+ assert(i);
+
+ r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+ case IMAGE_DIRECTORY: {
+ _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
+ sd_id128_t machine_id = SD_ID128_NULL;
+ _cleanup_free_ char *hostname = NULL;
+ _cleanup_free_ char *path = NULL;
+
+ r = chase_symlinks("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name);
+ else if (r >= 0) {
+ r = read_etc_hostname(path, &hostname);
+ if (r < 0)
+ log_debug_errno(errno, "Failed to read /etc/hostname of image %s: %m", i->name);
+ }
+
+ path = mfree(path);
+
+ r = chase_symlinks("/etc/machine-id", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/machine-id in image %s: %m", i->name);
+ else if (r >= 0) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ log_debug_errno(errno, "Failed to open %s: %m", path);
+ else {
+ r = id128_read_fd(fd, ID128_PLAIN, &machine_id);
+ if (r < 0)
+ log_debug_errno(r, "Image %s contains invalid machine ID.", i->name);
+ }
+ }
+
+ path = mfree(path);
+
+ r = chase_symlinks("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name);
+ else if (r >= 0) {
+ r = load_env_file_pairs(NULL, path, &machine_info);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name);
+ }
+
+ r = load_os_release_pairs(i->path, &os_release);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read os-release in image, ignoring: %m");
+
+ free_and_replace(i->hostname, hostname);
+ i->machine_id = machine_id;
+ strv_free_and_replace(i->machine_info, machine_info);
+ strv_free_and_replace(i->os_release, os_release);
+
+ break;
+ }
+
+ case IMAGE_RAW:
+ case IMAGE_BLOCK: {
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+
+ r = loop_device_make_by_path(i->path, O_RDONLY, &d);
+ if (r < 0)
+ return r;
+
+ r = dissect_image(d->fd, NULL, 0, DISSECT_IMAGE_REQUIRE_ROOT, &m);
+ if (r < 0)
+ return r;
+
+ r = dissected_image_acquire_metadata(m);
+ if (r < 0)
+ return r;
+
+ free_and_replace(i->hostname, m->hostname);
+ i->machine_id = m->machine_id;
+ strv_free_and_replace(i->machine_info, m->machine_info);
+ strv_free_and_replace(i->os_release, m->os_release);
+
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ i->metadata_valid = true;
+
+ return 0;
+}
+
+int image_name_lock(const char *name, int operation, LockFile *ret) {
+ const char *p;
+
+ assert(name);
+ assert(ret);
+
+ /* Locks an image name, regardless of the precise path used. */
+
+ if (!image_name_is_valid(name))
+ return -EINVAL;
+
+ if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+ *ret = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
+ if (streq(name, ".host"))
+ return -EBUSY;
+
+ mkdir_p("/run/systemd/nspawn/locks", 0700);
+ p = strjoina("/run/systemd/nspawn/locks/name-", name);
+
+ return make_lock_file(p, operation, ret);
+}
+
+bool image_name_is_valid(const char *s) {
+ if (!filename_is_valid(s))
+ return false;
+
+ if (string_has_cc(s, NULL))
+ return false;
+
+ if (!utf8_is_valid(s))
+ return false;
+
+ /* Temporary files for atomically creating new files */
+ if (startswith(s, ".#"))
+ return false;
+
+ return true;
+}
+
+bool image_in_search_path(ImageClass class, const char *image) {
+ const char *path;
+
+ assert(image);
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ const char *p;
+ size_t k;
+
+ p = path_startswith(image, path);
+ if (!p)
+ continue;
+
+ /* Make sure there's a filename following */
+ k = strcspn(p, "/");
+ if (k == 0)
+ continue;
+
+ p += k;
+
+ /* Accept trailing slashes */
+ if (p[strspn(p, "/")] == 0)
+ return true;
+
+ }
+
+ return false;
+}
+
+static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
+ [IMAGE_DIRECTORY] = "directory",
+ [IMAGE_SUBVOLUME] = "subvolume",
+ [IMAGE_RAW] = "raw",
+ [IMAGE_BLOCK] = "block",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);
diff --git a/src/shared/machine-image.h b/src/shared/machine-image.h
new file mode 100644
index 0000000..9fd4589
--- /dev/null
+++ b/src/shared/machine-image.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "sd-id128.h"
+
+#include "hashmap.h"
+#include "lockfile-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+typedef enum ImageClass {
+ IMAGE_MACHINE,
+ IMAGE_PORTABLE,
+ _IMAGE_CLASS_MAX,
+ _IMAGE_CLASS_INVALID = -1
+} ImageClass;
+
+typedef enum ImageType {
+ IMAGE_DIRECTORY,
+ IMAGE_SUBVOLUME,
+ IMAGE_RAW,
+ IMAGE_BLOCK,
+ _IMAGE_TYPE_MAX,
+ _IMAGE_TYPE_INVALID = -1
+} ImageType;
+
+typedef struct Image {
+ unsigned n_ref;
+
+ ImageType type;
+ char *name;
+ char *path;
+ bool read_only;
+
+ usec_t crtime;
+ usec_t mtime;
+
+ uint64_t usage;
+ uint64_t usage_exclusive;
+ uint64_t limit;
+ uint64_t limit_exclusive;
+
+ char *hostname;
+ sd_id128_t machine_id;
+ char **machine_info;
+ char **os_release;
+
+ bool metadata_valid:1;
+ bool discoverable:1; /* true if we know for sure that image_find() would find the image given just the short name */
+
+ void *userdata;
+} Image;
+
+Image *image_unref(Image *i);
+Image *image_ref(Image *i);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Image*, image_unref);
+
+int image_find(ImageClass class, const char *name, Image **ret);
+int image_from_path(const char *path, Image **ret);
+int image_find_harder(ImageClass class, const char *name_or_path, Image **ret);
+int image_discover(ImageClass class, Hashmap *map);
+
+int image_remove(Image *i);
+int image_rename(Image *i, const char *new_name);
+int image_clone(Image *i, const char *new_name, bool read_only);
+int image_read_only(Image *i, bool b);
+
+const char* image_type_to_string(ImageType t) _const_;
+ImageType image_type_from_string(const char *s) _pure_;
+
+bool image_name_is_valid(const char *s) _pure_;
+
+int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local);
+int image_name_lock(const char *name, int operation, LockFile *ret);
+
+int image_set_limit(Image *i, uint64_t referenced_max);
+
+int image_read_metadata(Image *i);
+
+bool image_in_search_path(ImageClass class, const char *image);
+
+static inline bool IMAGE_IS_HIDDEN(const struct Image *i) {
+ assert(i);
+
+ return i->name && i->name[0] == '.';
+}
+
+static inline bool IMAGE_IS_VENDOR(const struct Image *i) {
+ assert(i);
+
+ return i->path && path_startswith(i->path, "/usr");
+}
+
+static inline bool IMAGE_IS_HOST(const struct Image *i) {
+ assert(i);
+
+ if (i->name && streq(i->name, ".host"))
+ return true;
+
+ if (i->path && path_equal(i->path, "/"))
+ return true;
+
+ return false;
+}
+
+extern const struct hash_ops image_hash_ops;
diff --git a/src/shared/machine-pool.c b/src/shared/machine-pool.c
new file mode 100644
index 0000000..de4f704
--- /dev/null
+++ b/src/shared/machine-pool.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/statfs.h>
+
+#include "btrfs-util.h"
+#include "label.h"
+#include "machine-pool.h"
+#include "missing.h"
+#include "stat-util.h"
+
+static int check_btrfs(void) {
+ struct statfs sfs;
+
+ if (statfs("/var/lib/machines", &sfs) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ if (statfs("/var/lib", &sfs) < 0)
+ return -errno;
+ }
+
+ return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
+}
+
+int setup_machine_directory(sd_bus_error *error) {
+ int r;
+
+ r = check_btrfs();
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to determine whether /var/lib/machines is located on btrfs: %m");
+ if (r == 0)
+ return 0;
+
+ (void) btrfs_subvol_make_label("/var/lib/machines");
+
+ r = btrfs_quota_enable("/var/lib/machines", true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable quota for /var/lib/machines, ignoring: %m");
+
+ r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, ignoring: %m");
+
+ return 1;
+}
diff --git a/src/shared/machine-pool.h b/src/shared/machine-pool.h
new file mode 100644
index 0000000..6f59a18
--- /dev/null
+++ b/src/shared/machine-pool.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdint.h>
+
+#include "sd-bus.h"
+
+int setup_machine_directory(sd_bus_error *error);
diff --git a/src/shared/main-func.h b/src/shared/main-func.h
new file mode 100644
index 0000000..3c182e8
--- /dev/null
+++ b/src/shared/main-func.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdlib.h>
+
+#include "pager.h"
+#include "selinux-util.h"
+#include "spawn-ask-password-agent.h"
+#include "spawn-polkit-agent.h"
+#include "static-destruct.h"
+
+#define _DEFINE_MAIN_FUNCTION(intro, impl, ret) \
+ int main(int argc, char *argv[]) { \
+ int r; \
+ intro; \
+ r = impl; \
+ static_destruct(); \
+ ask_password_agent_close(); \
+ polkit_agent_close(); \
+ mac_selinux_finish(); \
+ pager_close(); \
+ return ret; \
+ }
+
+/* Negative return values from impl are mapped to EXIT_FAILURE, and
+ * everything else means success! */
+#define DEFINE_MAIN_FUNCTION(impl) \
+ _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : EXIT_SUCCESS)
+
+/* Zero is mapped to EXIT_SUCCESS, negative values are mapped to EXIT_FAILURE,
+ * and postive values are propagated.
+ * Note: "true" means failure! */
+#define DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(impl) \
+ _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : r)
diff --git a/src/shared/meson.build b/src/shared/meson.build
new file mode 100644
index 0000000..99d6ba1
--- /dev/null
+++ b/src/shared/meson.build
@@ -0,0 +1,277 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+shared_sources = files('''
+ acl-util.h
+ acpi-fpdt.c
+ acpi-fpdt.h
+ apparmor-util.c
+ apparmor-util.h
+ ask-password-api.c
+ ask-password-api.h
+ barrier.c
+ barrier.h
+ base-filesystem.c
+ base-filesystem.h
+ bitmap.c
+ bitmap.h
+ blkid-util.h
+ boot-timestamps.c
+ boot-timestamps.h
+ bootspec.c
+ bootspec.h
+ bpf-program.c
+ bpf-program.h
+ bus-unit-util.c
+ bus-unit-util.h
+ bus-util.c
+ bus-util.h
+ calendarspec.c
+ calendarspec.h
+ cgroup-show.c
+ cgroup-show.h
+ clean-ipc.c
+ clean-ipc.h
+ clock-util.c
+ clock-util.h
+ condition.c
+ condition.h
+ conf-parser.c
+ conf-parser.h
+ cpu-set-util.c
+ cpu-set-util.h
+ crypt-util.c
+ crypt-util.h
+ daemon-util.h
+ dev-setup.c
+ dev-setup.h
+ dissect-image.c
+ dissect-image.h
+ dns-domain.c
+ dns-domain.h
+ dropin.c
+ dropin.h
+ efivars.c
+ efivars.h
+ enable-mempool.c
+ env-file-label.c
+ env-file-label.h
+ exec-util.c
+ exec-util.h
+ exit-status.c
+ exit-status.h
+ fdset.c
+ fdset.h
+ fileio-label.c
+ fileio-label.h
+ firewall-util.h
+ format-table.c
+ format-table.h
+ fstab-util.c
+ fstab-util.h
+ generator.c
+ generator.h
+ gpt.h
+ id128-print.c
+ id128-print.h
+ ima-util.c
+ ima-util.h
+ import-util.c
+ import-util.h
+ initreq.h
+ install-printf.c
+ install-printf.h
+ install.c
+ install.h
+ ip-protocol-list.c
+ ip-protocol-list.h
+ journal-importer.c
+ journal-importer.h
+ journal-util.c
+ journal-util.h
+ json-internal.h
+ json.c
+ json.h
+ lockfile-util.c
+ lockfile-util.h
+ logs-show.c
+ logs-show.h
+ loop-util.c
+ loop-util.h
+ machine-image.c
+ machine-image.h
+ machine-pool.c
+ machine-pool.h
+ main-func.h
+ module-util.h
+ mount-util.c
+ mount-util.h
+ nscd-flush.c
+ nscd-flush.h
+ nsflags.c
+ nsflags.h
+ os-util.c
+ os-util.h
+ output-mode.c
+ output-mode.h
+ pager.c
+ pager.h
+ path-lookup.c
+ path-lookup.h
+ pretty-print.c
+ pretty-print.h
+ ptyfwd.c
+ ptyfwd.h
+ reboot-util.c
+ reboot-util.h
+ resolve-util.c
+ resolve-util.h
+ seccomp-util.h
+ securebits-util.c
+ securebits-util.h
+ serialize.c
+ serialize.h
+ sleep-config.c
+ sleep-config.h
+ spawn-ask-password-agent.c
+ spawn-ask-password-agent.h
+ spawn-polkit-agent.c
+ spawn-polkit-agent.h
+ specifier.c
+ specifier.h
+ switch-root.c
+ switch-root.h
+ sysctl-util.c
+ sysctl-util.h
+ tmpfile-util-label.c
+ tmpfile-util-label.h
+ tomoyo-util.c
+ tomoyo-util.h
+ udev-util.c
+ udev-util.h
+ uid-range.c
+ uid-range.h
+ utmp-wtmp.h
+ verbs.c
+ verbs.h
+ vlan-util.c
+ vlan-util.h
+ volatile-util.c
+ volatile-util.h
+ watchdog.c
+ watchdog.h
+ web-util.c
+ web-util.h
+ wireguard-netlink.h
+ xml.c
+ xml.h
+'''.split())
+
+if get_option('tests') != 'false'
+ shared_sources += files('tests.c', 'tests.h')
+endif
+
+test_tables_h = files('test-tables.h')
+shared_sources += test_tables_h
+
+if conf.get('HAVE_ACL') == 1
+ shared_sources += files('acl-util.c')
+endif
+
+if conf.get('ENABLE_UTMP') == 1
+ shared_sources += files('utmp-wtmp.c')
+endif
+
+if conf.get('HAVE_SECCOMP') == 1
+ shared_sources += files('seccomp-util.c')
+endif
+
+if conf.get('HAVE_LIBIPTC') == 1
+ shared_sources += files('firewall-util.c')
+endif
+
+if conf.get('HAVE_KMOD') == 1
+ shared_sources += files('module-util.c')
+endif
+
+generate_ip_protocol_list = find_program('generate-ip-protocol-list.sh')
+ip_protocol_list_txt = custom_target(
+ 'ip-protocol-list.txt',
+ output : 'ip-protocol-list.txt',
+ command : [generate_ip_protocol_list, cpp],
+ capture : true)
+
+fname = 'ip-protocol-from-name.gperf'
+gperf_file = custom_target(
+ fname,
+ input : ip_protocol_list_txt,
+ output : fname,
+ command : [generate_gperfs, 'ip_protocol', 'IPPROTO_', '@INPUT@'],
+ capture : true)
+
+fname = 'ip-protocol-from-name.h'
+target1 = custom_target(
+ fname,
+ input : gperf_file,
+ output : fname,
+ command : [gperf,
+ '-L', 'ANSI-C', '-t', '--ignore-case',
+ '-N', 'lookup_ip_protocol',
+ '-H', 'hash_ip_protocol_name',
+ '-p', '-C',
+ '@INPUT@'],
+ capture : true)
+
+fname = 'ip-protocol-to-name.h'
+awkscript = 'ip-protocol-to-name.awk'
+target2 = custom_target(
+ fname,
+ input : [awkscript, ip_protocol_list_txt],
+ output : fname,
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+shared_generated_gperf_headers = [target1, target2]
+shared_sources += shared_generated_gperf_headers
+
+libshared_name = 'systemd-shared-@0@'.format(meson.project_version())
+
+libshared_deps = [threads,
+ librt,
+ libcap,
+ libacl,
+ libcryptsetup,
+ libgcrypt,
+ libiptc,
+ libkmod,
+ libmount,
+ libseccomp,
+ libselinux,
+ libidn,
+ libxz,
+ liblz4,
+ libblkid]
+
+libshared_sym_path = '@0@/libshared.sym'.format(meson.current_source_dir())
+
+libshared_static = static_library(
+ libshared_name,
+ shared_sources,
+ include_directories : includes,
+ dependencies : libshared_deps,
+ c_args : ['-fvisibility=default'])
+
+libshared = shared_library(
+ libshared_name,
+ libudev_sources,
+ include_directories : includes,
+ link_args : ['-shared',
+ '-Wl,--version-script=' + libshared_sym_path],
+ link_whole : [libshared_static,
+ libbasic,
+ libbasic_gcrypt,
+ libsystemd_static,
+ libjournal_client],
+ c_args : ['-fvisibility=default'],
+ dependencies : libshared_deps,
+ install : true,
+ install_dir : rootlibexecdir)
diff --git a/src/shared/module-util.c b/src/shared/module-util.c
new file mode 100644
index 0000000..a34fe8f
--- /dev/null
+++ b/src/shared/module-util.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "module-util.h"
+
+int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose) {
+ const int probe_flags = KMOD_PROBE_APPLY_BLACKLIST;
+ struct kmod_list *itr;
+ _cleanup_(kmod_module_unref_listp) struct kmod_list *modlist = NULL;
+ int r = 0;
+
+ /* verbose==true means we should log at non-debug level if we
+ * fail to find or load the module. */
+
+ log_debug("Loading module: %s", module);
+
+ r = kmod_module_new_from_lookup(ctx, module, &modlist);
+ if (r < 0)
+ return log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r,
+ "Failed to lookup module alias '%s': %m", module);
+
+ if (!modlist) {
+ log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r,
+ "Failed to find module '%s'", module);
+ return -ENOENT;
+ }
+
+ kmod_list_foreach(itr, modlist) {
+ _cleanup_(kmod_module_unrefp) struct kmod_module *mod = NULL;
+ int state, err;
+
+ mod = kmod_module_get_module(itr);
+ state = kmod_module_get_initstate(mod);
+
+ switch (state) {
+ case KMOD_MODULE_BUILTIN:
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Module '%s' is builtin", kmod_module_get_name(mod));
+ break;
+
+ case KMOD_MODULE_LIVE:
+ log_debug("Module '%s' is already loaded", kmod_module_get_name(mod));
+ break;
+
+ default:
+ err = kmod_module_probe_insert_module(mod, probe_flags,
+ NULL, NULL, NULL, NULL);
+ if (err == 0)
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Inserted module '%s'", kmod_module_get_name(mod));
+ else if (err == KMOD_PROBE_APPLY_BLACKLIST)
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Module '%s' is blacklisted", kmod_module_get_name(mod));
+ else {
+ assert(err < 0);
+
+ log_full_errno(!verbose ? LOG_DEBUG :
+ err == -ENODEV ? LOG_NOTICE :
+ err == -ENOENT ? LOG_WARNING :
+ LOG_ERR,
+ err,
+ "Failed to insert module '%s': %m",
+ kmod_module_get_name(mod));
+ if (!IN_SET(err, -ENODEV, -ENOENT))
+ r = err;
+ }
+ }
+ }
+
+ return r;
+}
diff --git a/src/shared/module-util.h b/src/shared/module-util.h
new file mode 100644
index 0000000..c386c5b
--- /dev/null
+++ b/src/shared/module-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <libkmod.h>
+
+#include "macro.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_ctx*, kmod_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_module*, kmod_module_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_list*, kmod_module_unref_list);
+
+int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose);
diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c
new file mode 100644
index 0000000..9fa995f
--- /dev/null
+++ b/src/shared/mount-util.c
@@ -0,0 +1,570 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio_ext.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+/* Include later */
+#include <libmount.h>
+
+#include "alloc-util.h"
+#include "escape.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int umount_recursive(const char *prefix, int flags) {
+ bool again;
+ int n = 0, r;
+
+ /* Try to umount everything recursively below a
+ * directory. Also, take care of stacked mounts, and keep
+ * unmounting them until they are gone. */
+
+ do {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+
+ again = false;
+ r = 0;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
+
+ for (;;) {
+ _cleanup_free_ char *path = NULL, *p = NULL;
+ int k;
+
+ k = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) mount options */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%*s " /* (9) file system type */
+ "%*s" /* (10) mount source */
+ "%*s" /* (11) mount options 2 */
+ "%*[^\n]", /* some rubbish at the end */
+ &path);
+ if (k != 1) {
+ if (k == EOF)
+ break;
+
+ continue;
+ }
+
+ r = cunescape(path, UNESCAPE_RELAX, &p);
+ if (r < 0)
+ return r;
+
+ if (!path_startswith(p, prefix))
+ continue;
+
+ if (umount2(p, flags) < 0) {
+ r = log_debug_errno(errno, "Failed to umount %s: %m", p);
+ continue;
+ }
+
+ log_debug("Successfully unmounted %s", p);
+
+ again = true;
+ n++;
+
+ break;
+ }
+
+ } while (again);
+
+ return r ? r : n;
+}
+
+static int get_mount_flags(const char *path, unsigned long *flags) {
+ struct statvfs buf;
+
+ if (statvfs(path, &buf) < 0)
+ return -errno;
+ *flags = buf.f_flag;
+ return 0;
+}
+
+/* Use this function only if do you have direct access to /proc/self/mountinfo
+ * and need the caller to open it for you. This is the case when /proc is
+ * masked or not mounted. Otherwise, use bind_remount_recursive. */
+int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
+ _cleanup_set_free_free_ Set *done = NULL;
+ _cleanup_free_ char *cleaned = NULL;
+ int r;
+
+ assert(proc_self_mountinfo);
+
+ /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
+ * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
+ * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
+ * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
+ * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
+ * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
+ * future submounts that have been triggered via autofs.
+ *
+ * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
+ * remount operation. Note that we'll ignore the blacklist for the top-level path. */
+
+ cleaned = strdup(prefix);
+ if (!cleaned)
+ return -ENOMEM;
+
+ path_simplify(cleaned, false);
+
+ done = set_new(&path_hash_ops);
+ if (!done)
+ return -ENOMEM;
+
+ for (;;) {
+ _cleanup_set_free_free_ Set *todo = NULL;
+ bool top_autofs = false;
+ char *x;
+ unsigned long orig_flags;
+
+ todo = set_new(&path_hash_ops);
+ if (!todo)
+ return -ENOMEM;
+
+ rewind(proc_self_mountinfo);
+
+ for (;;) {
+ _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
+ int k;
+
+ k = fscanf(proc_self_mountinfo,
+ "%*s " /* (1) mount id */
+ "%*s " /* (2) parent id */
+ "%*s " /* (3) major:minor */
+ "%*s " /* (4) root */
+ "%ms " /* (5) mount point */
+ "%*s" /* (6) mount options (superblock) */
+ "%*[^-]" /* (7) optional fields */
+ "- " /* (8) separator */
+ "%ms " /* (9) file system type */
+ "%*s" /* (10) mount source */
+ "%*s" /* (11) mount options (bind mount) */
+ "%*[^\n]", /* some rubbish at the end */
+ &path,
+ &type);
+ if (k != 2) {
+ if (k == EOF)
+ break;
+
+ continue;
+ }
+
+ r = cunescape(path, UNESCAPE_RELAX, &p);
+ if (r < 0)
+ return r;
+
+ if (!path_startswith(p, cleaned))
+ continue;
+
+ /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
+ * operate on. */
+ if (!path_equal(cleaned, p)) {
+ bool blacklisted = false;
+ char **i;
+
+ STRV_FOREACH(i, blacklist) {
+
+ if (path_equal(*i, cleaned))
+ continue;
+
+ if (!path_startswith(*i, cleaned))
+ continue;
+
+ if (path_startswith(p, *i)) {
+ blacklisted = true;
+ log_debug("Not remounting %s blacklisted by %s, called for %s", p, *i, cleaned);
+ break;
+ }
+ }
+ if (blacklisted)
+ continue;
+ }
+
+ /* Let's ignore autofs mounts. If they aren't
+ * triggered yet, we want to avoid triggering
+ * them, as we don't make any guarantees for
+ * future submounts anyway. If they are
+ * already triggered, then we will find
+ * another entry for this. */
+ if (streq(type, "autofs")) {
+ top_autofs = top_autofs || path_equal(cleaned, p);
+ continue;
+ }
+
+ if (!set_contains(done, p)) {
+ r = set_consume(todo, p);
+ p = NULL;
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* If we have no submounts to process anymore and if
+ * the root is either already done, or an autofs, we
+ * are done */
+ if (set_isempty(todo) &&
+ (top_autofs || set_contains(done, cleaned)))
+ return 0;
+
+ if (!set_contains(done, cleaned) &&
+ !set_contains(todo, cleaned)) {
+ /* The prefix directory itself is not yet a mount, make it one. */
+ if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
+ return -errno;
+
+ orig_flags = 0;
+ (void) get_mount_flags(cleaned, &orig_flags);
+ orig_flags &= ~MS_RDONLY;
+
+ if (mount(NULL, cleaned, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
+ return -errno;
+
+ log_debug("Made top-level directory %s a mount point.", prefix);
+
+ x = strdup(cleaned);
+ if (!x)
+ return -ENOMEM;
+
+ r = set_consume(done, x);
+ if (r < 0)
+ return r;
+ }
+
+ while ((x = set_steal_first(todo))) {
+
+ r = set_consume(done, x);
+ if (IN_SET(r, 0, -EEXIST))
+ continue;
+ if (r < 0)
+ return r;
+
+ /* Deal with mount points that are obstructed by a later mount */
+ r = path_is_mount_point(x, NULL, 0);
+ if (IN_SET(r, 0, -ENOENT))
+ continue;
+ if (IN_SET(r, -EACCES, -EPERM)) {
+ /* Even if root user invoke this, submounts under private FUSE or NFS mount points
+ * may not be acceessed. E.g.,
+ *
+ * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
+ * $ bindfs --no-allow-other ~/mnt ~/mnt
+ *
+ * Then, root user cannot access the mount point ~/mnt/mnt.
+ * In such cases, the submounts are ignored, as we have no way to manage them. */
+ log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
+ continue;
+ }
+ if (r < 0)
+ return r;
+
+ /* Try to reuse the original flag set */
+ orig_flags = 0;
+ (void) get_mount_flags(x, &orig_flags);
+ orig_flags &= ~MS_RDONLY;
+
+ if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
+ return -errno;
+
+ log_debug("Remounted %s read-only.", x);
+ }
+ }
+}
+
+int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+
+ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!proc_self_mountinfo)
+ return -errno;
+
+ (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
+
+ return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
+}
+
+int mount_move_root(const char *path) {
+ assert(path);
+
+ if (chdir(path) < 0)
+ return -errno;
+
+ if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
+ return -errno;
+
+ if (chroot(".") < 0)
+ return -errno;
+
+ if (chdir("/") < 0)
+ return -errno;
+
+ return 0;
+}
+
+int repeat_unmount(const char *path, int flags) {
+ bool done = false;
+
+ assert(path);
+
+ /* If there are multiple mounts on a mount point, this
+ * removes them all */
+
+ for (;;) {
+ if (umount2(path, flags) < 0) {
+
+ if (errno == EINVAL)
+ return done;
+
+ return -errno;
+ }
+
+ done = true;
+ }
+}
+
+const char* mode_to_inaccessible_node(mode_t mode) {
+ /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
+ * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
+ * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
+ * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
+ * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
+ * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
+
+ switch(mode & S_IFMT) {
+ case S_IFREG:
+ return "/run/systemd/inaccessible/reg";
+
+ case S_IFDIR:
+ return "/run/systemd/inaccessible/dir";
+
+ case S_IFCHR:
+ if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
+ return "/run/systemd/inaccessible/chr";
+ return "/run/systemd/inaccessible/sock";
+
+ case S_IFBLK:
+ if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
+ return "/run/systemd/inaccessible/blk";
+ return "/run/systemd/inaccessible/sock";
+
+ case S_IFIFO:
+ return "/run/systemd/inaccessible/fifo";
+
+ case S_IFSOCK:
+ return "/run/systemd/inaccessible/sock";
+ }
+ return NULL;
+}
+
+#define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
+static char* mount_flags_to_string(long unsigned flags) {
+ char *x;
+ _cleanup_free_ char *y = NULL;
+ long unsigned overflow;
+
+ overflow = flags & ~(MS_RDONLY |
+ MS_NOSUID |
+ MS_NODEV |
+ MS_NOEXEC |
+ MS_SYNCHRONOUS |
+ MS_REMOUNT |
+ MS_MANDLOCK |
+ MS_DIRSYNC |
+ MS_NOATIME |
+ MS_NODIRATIME |
+ MS_BIND |
+ MS_MOVE |
+ MS_REC |
+ MS_SILENT |
+ MS_POSIXACL |
+ MS_UNBINDABLE |
+ MS_PRIVATE |
+ MS_SLAVE |
+ MS_SHARED |
+ MS_RELATIME |
+ MS_KERNMOUNT |
+ MS_I_VERSION |
+ MS_STRICTATIME |
+ MS_LAZYTIME);
+
+ if (flags == 0 || overflow != 0)
+ if (asprintf(&y, "%lx", overflow) < 0)
+ return NULL;
+
+ x = strjoin(FLAG(MS_RDONLY),
+ FLAG(MS_NOSUID),
+ FLAG(MS_NODEV),
+ FLAG(MS_NOEXEC),
+ FLAG(MS_SYNCHRONOUS),
+ FLAG(MS_REMOUNT),
+ FLAG(MS_MANDLOCK),
+ FLAG(MS_DIRSYNC),
+ FLAG(MS_NOATIME),
+ FLAG(MS_NODIRATIME),
+ FLAG(MS_BIND),
+ FLAG(MS_MOVE),
+ FLAG(MS_REC),
+ FLAG(MS_SILENT),
+ FLAG(MS_POSIXACL),
+ FLAG(MS_UNBINDABLE),
+ FLAG(MS_PRIVATE),
+ FLAG(MS_SLAVE),
+ FLAG(MS_SHARED),
+ FLAG(MS_RELATIME),
+ FLAG(MS_KERNMOUNT),
+ FLAG(MS_I_VERSION),
+ FLAG(MS_STRICTATIME),
+ FLAG(MS_LAZYTIME),
+ y);
+ if (!x)
+ return NULL;
+ if (!y)
+ x[strlen(x) - 1] = '\0'; /* truncate the last | */
+ return x;
+}
+
+int mount_verbose(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options) {
+
+ _cleanup_free_ char *fl = NULL, *o = NULL;
+ unsigned long f;
+ int r;
+
+ r = mount_option_mangle(options, flags, &f, &o);
+ if (r < 0)
+ return log_full_errno(error_log_level, r,
+ "Failed to mangle mount options %s: %m",
+ strempty(options));
+
+ fl = mount_flags_to_string(f);
+
+ if ((f & MS_REMOUNT) && !what && !type)
+ log_debug("Remounting %s (%s \"%s\")...",
+ where, strnull(fl), strempty(o));
+ else if (!what && !type)
+ log_debug("Mounting %s (%s \"%s\")...",
+ where, strnull(fl), strempty(o));
+ else if ((f & MS_BIND) && !type)
+ log_debug("Bind-mounting %s on %s (%s \"%s\")...",
+ what, where, strnull(fl), strempty(o));
+ else if (f & MS_MOVE)
+ log_debug("Moving mount %s → %s (%s \"%s\")...",
+ what, where, strnull(fl), strempty(o));
+ else
+ log_debug("Mounting %s on %s (%s \"%s\")...",
+ strna(type), where, strnull(fl), strempty(o));
+ if (mount(what, where, type, f, o) < 0)
+ return log_full_errno(error_log_level, errno,
+ "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
+ strna(what), strna(type), where, strnull(fl), strempty(o));
+ return 0;
+}
+
+int umount_verbose(const char *what) {
+ log_debug("Umounting %s...", what);
+ if (umount(what) < 0)
+ return log_error_errno(errno, "Failed to unmount %s: %m", what);
+ return 0;
+}
+
+int mount_option_mangle(
+ const char *options,
+ unsigned long mount_flags,
+ unsigned long *ret_mount_flags,
+ char **ret_remaining_options) {
+
+ const struct libmnt_optmap *map;
+ _cleanup_free_ char *ret = NULL;
+ const char *p;
+ int r;
+
+ /* This extracts mount flags from the mount options, and store
+ * non-mount-flag options to '*ret_remaining_options'.
+ * E.g.,
+ * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
+ * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
+ * "size=1630748k,mode=700,uid=1000,gid=1000".
+ * See more examples in test-mount-utils.c.
+ *
+ * Note that if 'options' does not contain any non-mount-flag options,
+ * then '*ret_remaining_options' is set to NULL instread of empty string.
+ * Note that this does not check validity of options stored in
+ * '*ret_remaining_options'.
+ * Note that if 'options' is NULL, then this just copies 'mount_flags'
+ * to '*ret_mount_flags'. */
+
+ assert(ret_mount_flags);
+ assert(ret_remaining_options);
+
+ map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
+ if (!map)
+ return -EINVAL;
+
+ p = options;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ const struct libmnt_optmap *ent;
+
+ r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ for (ent = map; ent->name; ent++) {
+ /* All entries in MNT_LINUX_MAP do not take any argument.
+ * Thus, ent->name does not contain "=" or "[=]". */
+ if (!streq(word, ent->name))
+ continue;
+
+ if (!(ent->mask & MNT_INVERT))
+ mount_flags |= ent->id;
+ else if (mount_flags & ent->id)
+ mount_flags ^= ent->id;
+
+ break;
+ }
+
+ /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
+ if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
+ return -ENOMEM;
+ }
+
+ *ret_mount_flags = mount_flags;
+ *ret_remaining_options = TAKE_PTR(ret);
+
+ return 0;
+}
diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h
new file mode 100644
index 0000000..00df1b0
--- /dev/null
+++ b/src/shared/mount-util.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <mntent.h>
+#include <stdio.h>
+
+#include "macro.h"
+
+int repeat_unmount(const char *path, int flags);
+int umount_recursive(const char *target, int flags);
+int bind_remount_recursive(const char *prefix, bool ro, char **blacklist);
+int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo);
+
+int mount_move_root(const char *path);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, endmntent);
+#define _cleanup_endmntent_ _cleanup_(endmntentp)
+
+int mount_verbose(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options);
+int umount_verbose(const char *where);
+
+int mount_option_mangle(
+ const char *options,
+ unsigned long mount_flags,
+ unsigned long *ret_mount_flags,
+ char **ret_remaining_options);
+
+const char* mode_to_inaccessible_node(mode_t mode);
diff --git a/src/shared/nscd-flush.c b/src/shared/nscd-flush.c
new file mode 100644
index 0000000..5a04468
--- /dev/null
+++ b/src/shared/nscd-flush.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#include <sys/poll.h>
+
+#include "fd-util.h"
+#include "io-util.h"
+#include "nscd-flush.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+#define NSCD_FLUSH_CACHE_TIMEOUT_USEC (5*USEC_PER_SEC)
+
+struct nscdInvalidateRequest {
+ int32_t version;
+ int32_t type; /* in glibc this is an enum. We don't replicate this here 1:1. Also, wtf, how unportable is that
+ * even? */
+ int32_t key_len;
+ char dbname[];
+};
+
+static const union sockaddr_union nscd_sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/nscd/socket",
+};
+
+static int nscd_flush_cache_one(const char *database, usec_t end) {
+ size_t req_size, has_written = 0, has_read = 0, l;
+ struct nscdInvalidateRequest *req;
+ _cleanup_close_ int fd = -1;
+ int32_t resp;
+ int events;
+
+ assert(database);
+
+ l = strlen(database);
+ req_size = offsetof(struct nscdInvalidateRequest, dbname) + l + 1;
+
+ req = alloca(req_size);
+ *req = (struct nscdInvalidateRequest) {
+ .version = 2,
+ .type = 10,
+ .key_len = l + 1,
+ };
+
+ strcpy(req->dbname, database);
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to allocate nscd socket: %m");
+
+ /* Note: connect() returns EINPROGRESS if O_NONBLOCK is set and establishing a connection takes time. The
+ * kernel lets us know this way that the connection is now being established, and we should watch with poll()
+ * to learn when it is fully established. That said, AF_UNIX on Linux never triggers this IRL (connect() is
+ * always instant on AF_UNIX), hence handling this is mostly just an excercise in defensive, protocol-agnostic
+ * programming.
+ *
+ * connect() returns EAGAIN if the socket's backlog limit has been reached. When we see this we give up right
+ * away, after all this entire function here is written in a defensive style so that a non-responding nscd
+ * doesn't stall us for good. (Even if we wanted to handle this better: the Linux kernel doesn't really have a
+ * nice way to connect() to a server synchronously with a time limit that would also cover dealing with the
+ * backlog limit. After all SO_RCVTIMEO and SR_SNDTIMEO don't apply to connect(), and alarm() is frickin' ugly
+ * and not really reasonably usable from threads-aware code.) */
+ if (connect(fd, &nscd_sa.sa, SOCKADDR_UN_LEN(nscd_sa.un)) < 0) {
+ if (errno == EAGAIN)
+ return log_debug_errno(errno, "nscd is overloaded (backlog limit reached) and refuses to take further connections: %m");
+ if (errno != EINPROGRESS)
+ return log_debug_errno(errno, "Failed to connect to nscd socket: %m");
+
+ /* Continue in case of EINPROGRESS, but don't bother with send() or recv() until being notified that
+ * establishing the connection is complete. */
+ events = 0;
+ } else
+ events = POLLIN|POLLOUT; /* Let's assume initially that we can write and read to the fd, to suppress
+ * one poll() invocation */
+ for (;;) {
+ usec_t p;
+
+ if (events & POLLOUT) {
+ ssize_t m;
+
+ assert(has_written < req_size);
+
+ m = send(fd, (uint8_t*) req + has_written, req_size - has_written, MSG_NOSIGNAL);
+ if (m < 0) {
+ if (errno != EAGAIN) /* Note that EAGAIN is returned by the kernel whenever it can't
+ * take the data right now, and that includes if the connect() is
+ * asynchronous and we saw EINPROGRESS on it, and it hasn't
+ * completed yet. */
+ return log_debug_errno(errno, "Failed to write to nscd socket: %m");
+ } else
+ has_written += m;
+ }
+
+ if (events & (POLLIN|POLLERR|POLLHUP)) {
+ ssize_t m;
+
+ if (has_read >= sizeof(resp))
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Response from nscd longer than expected: %m");
+
+ m = recv(fd, (uint8_t*) &resp + has_read, sizeof(resp) - has_read, 0);
+ if (m < 0) {
+ if (errno != EAGAIN)
+ return log_debug_errno(errno, "Failed to read from nscd socket: %m");
+ } else if (m == 0) { /* EOF */
+ if (has_read == 0 && has_written >= req_size) /* Older nscd immediately terminated the
+ * connection, accept that as OK */
+ return 1;
+
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "nscd prematurely ended connection.");
+ } else
+ has_read += m;
+ }
+
+ if (has_written >= req_size && has_read >= sizeof(resp)) { /* done? */
+ if (resp < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "nscd sent us a negative error numer: %i", resp);
+ if (resp > 0)
+ return log_debug_errno(resp, "nscd return failure code on invalidating '%s'.", database);
+ return 1;
+ }
+
+ p = now(CLOCK_MONOTONIC);
+ if (p >= end)
+ return -ETIMEDOUT;
+
+ events = fd_wait_for_event(fd, POLLIN | (has_written < req_size ? POLLOUT : 0), end - p);
+ if (events < 0)
+ return events;
+ }
+}
+
+int nscd_flush_cache(char **databases) {
+ usec_t end;
+ int r = 0;
+ char **i;
+
+ /* Tries to invalidate the specified database in nscd. We do this carefully, with a 5s time-out, so that we
+ * don't block indefinitely on another service. */
+
+ end = usec_add(now(CLOCK_MONOTONIC), NSCD_FLUSH_CACHE_TIMEOUT_USEC);
+
+ STRV_FOREACH(i, databases) {
+ int k;
+
+ k = nscd_flush_cache_one(*i, end);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
diff --git a/src/shared/nscd-flush.h b/src/shared/nscd-flush.h
new file mode 100644
index 0000000..22774bf
--- /dev/null
+++ b/src/shared/nscd-flush.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int nscd_flush_cache(char **databases);
diff --git a/src/shared/nsflags.c b/src/shared/nsflags.c
new file mode 100644
index 0000000..8cc2d08
--- /dev/null
+++ b/src/shared/nsflags.c
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "nsflags.h"
+#include "string-util.h"
+
+const struct namespace_flag_map namespace_flag_map[] = {
+ { CLONE_NEWCGROUP, "cgroup" },
+ { CLONE_NEWIPC, "ipc" },
+ { CLONE_NEWNET, "net" },
+ /* So, the mount namespace flag is called CLONE_NEWNS for historical reasons. Let's expose it here under a more
+ * explanatory name: "mnt". This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
+ { CLONE_NEWNS, "mnt" },
+ { CLONE_NEWPID, "pid" },
+ { CLONE_NEWUSER, "user" },
+ { CLONE_NEWUTS, "uts" },
+ {}
+};
+
+int namespace_flags_from_string(const char *name, unsigned long *ret) {
+ unsigned long flags = 0;
+ int r;
+
+ assert_se(ret);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ unsigned long f = 0;
+ unsigned i;
+
+ r = extract_first_word(&name, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ for (i = 0; namespace_flag_map[i].name; i++)
+ if (streq(word, namespace_flag_map[i].name)) {
+ f = namespace_flag_map[i].flag;
+ break;
+ }
+
+ if (f == 0)
+ return -EINVAL;
+
+ flags |= f;
+ }
+
+ *ret = flags;
+ return 0;
+}
+
+int namespace_flags_to_string(unsigned long flags, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ unsigned i;
+
+ for (i = 0; namespace_flag_map[i].name; i++) {
+ if ((flags & namespace_flag_map[i].flag) != namespace_flag_map[i].flag)
+ continue;
+
+ if (!strextend_with_separator(&s, " ", namespace_flag_map[i].name, NULL))
+ return -ENOMEM;
+ }
+
+ if (!s) {
+ s = strdup("");
+ if (!s)
+ return -ENOMEM;
+ }
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
diff --git a/src/shared/nsflags.h b/src/shared/nsflags.h
new file mode 100644
index 0000000..0aeb0bc
--- /dev/null
+++ b/src/shared/nsflags.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "missing_sched.h"
+
+/* The combination of all namespace flags defined by the kernel. The right type for this isn't clear. setns() and
+ * unshare() expect these flags to be passed as (signed) "int", while clone() wants them as "unsigned long". The latter
+ * is definitely more appropriate for a flags parameter, and also the larger type of the two, hence let's stick to that
+ * here. */
+#define NAMESPACE_FLAGS_ALL \
+ ((unsigned long) (CLONE_NEWCGROUP| \
+ CLONE_NEWIPC| \
+ CLONE_NEWNET| \
+ CLONE_NEWNS| \
+ CLONE_NEWPID| \
+ CLONE_NEWUSER| \
+ CLONE_NEWUTS))
+
+#define NAMESPACE_FLAGS_INITIAL ((unsigned long) -1)
+
+int namespace_flags_from_string(const char *name, unsigned long *ret);
+int namespace_flags_to_string(unsigned long flags, char **ret);
+
+struct namespace_flag_map {
+ unsigned long flag;
+ const char *name;
+};
+
+extern const struct namespace_flag_map namespace_flag_map[];
diff --git a/src/shared/os-util.c b/src/shared/os-util.c
new file mode 100644
index 0000000..b2d5ce3
--- /dev/null
+++ b/src/shared/os-util.c
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "os-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int path_is_os_tree(const char *path) {
+ int r;
+
+ assert(path);
+
+ /* Does the path exist at all? If not, generate an error immediately. This is useful so that a missing root dir
+ * always results in -ENOENT, and we can properly distuingish the case where the whole root doesn't exist from
+ * the case where just the os-release file is missing. */
+ if (laccess(path, F_OK) < 0)
+ return -errno;
+
+ /* We use {/etc|/usr/lib}/os-release as flag file if something is an OS */
+ r = open_os_release(path, NULL, NULL);
+ if (r == -ENOENT) /* We got nothing */
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int open_os_release(const char *root, char **ret_path, int *ret_fd) {
+ _cleanup_free_ char *q = NULL;
+ const char *p;
+ int k;
+
+ FOREACH_STRING(p, "/etc/os-release", "/usr/lib/os-release") {
+ k = chase_symlinks(p, root, CHASE_PREFIX_ROOT|(ret_fd ? CHASE_OPEN : 0), (ret_path ? &q : NULL));
+ if (k != -ENOENT)
+ break;
+ }
+ if (k < 0)
+ return k;
+
+ if (ret_fd) {
+ int real_fd;
+
+ /* Convert the O_PATH fd into a proper, readable one */
+ real_fd = fd_reopen(k, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ safe_close(k);
+ if (real_fd < 0)
+ return real_fd;
+
+ *ret_fd = real_fd;
+ }
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(q);
+
+ return 0;
+}
+
+int fopen_os_release(const char *root, char **ret_path, FILE **ret_file) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ FILE *f;
+ int r;
+
+ if (!ret_file)
+ return open_os_release(root, ret_path, NULL);
+
+ r = open_os_release(root, ret_path ? &p : NULL, &fd);
+ if (r < 0)
+ return r;
+
+ f = fdopen(fd, "r");
+ if (!f)
+ return -errno;
+ fd = -1;
+
+ *ret_file = f;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ return 0;
+}
+
+int parse_os_release(const char *root, ...) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ r = fopen_os_release(root, &p, &f);
+ if (r < 0)
+ return r;
+
+ va_start(ap, root);
+ r = parse_env_filev(f, p, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int load_os_release_pairs(const char *root, char ***ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = fopen_os_release(root, &p, &f);
+ if (r < 0)
+ return r;
+
+ return load_env_file_pairs(f, p, ret);
+}
diff --git a/src/shared/os-util.h b/src/shared/os-util.h
new file mode 100644
index 0000000..27ec7ac
--- /dev/null
+++ b/src/shared/os-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+int path_is_os_tree(const char *path);
+
+int open_os_release(const char *root, char **ret_path, int *ret_fd);
+int fopen_os_release(const char *root, char **ret_path, FILE **ret_file);
+
+int parse_os_release(const char *root, ...) _sentinel_;
+int load_os_release_pairs(const char *root, char ***ret);
diff --git a/src/shared/output-mode.c b/src/shared/output-mode.c
new file mode 100644
index 0000000..107b345
--- /dev/null
+++ b/src/shared/output-mode.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "output-mode.h"
+#include "string-table.h"
+
+JsonFormatFlags output_mode_to_json_format_flags(OutputMode m) {
+
+ switch (m) {
+
+ case OUTPUT_JSON_SSE:
+ return JSON_FORMAT_SSE;
+
+ case OUTPUT_JSON_SEQ:
+ return JSON_FORMAT_SEQ;
+
+ case OUTPUT_JSON_PRETTY:
+ return JSON_FORMAT_PRETTY;
+
+ default:
+ return JSON_FORMAT_NEWLINE;
+ }
+}
+
+static const char *const output_mode_table[_OUTPUT_MODE_MAX] = {
+ [OUTPUT_SHORT] = "short",
+ [OUTPUT_SHORT_FULL] = "short-full",
+ [OUTPUT_SHORT_ISO] = "short-iso",
+ [OUTPUT_SHORT_ISO_PRECISE] = "short-iso-precise",
+ [OUTPUT_SHORT_PRECISE] = "short-precise",
+ [OUTPUT_SHORT_MONOTONIC] = "short-monotonic",
+ [OUTPUT_SHORT_UNIX] = "short-unix",
+ [OUTPUT_VERBOSE] = "verbose",
+ [OUTPUT_EXPORT] = "export",
+ [OUTPUT_JSON] = "json",
+ [OUTPUT_JSON_PRETTY] = "json-pretty",
+ [OUTPUT_JSON_SSE] = "json-sse",
+ [OUTPUT_JSON_SEQ] = "json-seq",
+ [OUTPUT_CAT] = "cat",
+ [OUTPUT_WITH_UNIT] = "with-unit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(output_mode, OutputMode);
diff --git a/src/shared/output-mode.h b/src/shared/output-mode.h
new file mode 100644
index 0000000..00b6032
--- /dev/null
+++ b/src/shared/output-mode.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "json.h"
+#include "macro.h"
+
+typedef enum OutputMode {
+ OUTPUT_SHORT,
+ OUTPUT_SHORT_FULL,
+ OUTPUT_SHORT_ISO,
+ OUTPUT_SHORT_ISO_PRECISE,
+ OUTPUT_SHORT_PRECISE,
+ OUTPUT_SHORT_MONOTONIC,
+ OUTPUT_SHORT_UNIX,
+ OUTPUT_VERBOSE,
+ OUTPUT_EXPORT,
+ OUTPUT_JSON,
+ OUTPUT_JSON_PRETTY,
+ OUTPUT_JSON_SSE,
+ OUTPUT_JSON_SEQ,
+ OUTPUT_CAT,
+ OUTPUT_WITH_UNIT,
+ _OUTPUT_MODE_MAX,
+ _OUTPUT_MODE_INVALID = -1
+} OutputMode;
+
+static inline bool OUTPUT_MODE_IS_JSON(OutputMode m) {
+ return IN_SET(m, OUTPUT_JSON, OUTPUT_JSON_PRETTY, OUTPUT_JSON_SSE, OUTPUT_JSON_SEQ);
+}
+
+/* The output flags definitions are shared by the logs and process tree output. Some apply to both, some only to the
+ * logs output, others only to the process tree output. */
+
+typedef enum OutputFlags {
+ OUTPUT_SHOW_ALL = 1 << 0,
+ OUTPUT_FOLLOW = 1 << 1,
+ OUTPUT_WARN_CUTOFF = 1 << 2,
+ OUTPUT_FULL_WIDTH = 1 << 3,
+ OUTPUT_COLOR = 1 << 4,
+ OUTPUT_CATALOG = 1 << 5,
+ OUTPUT_BEGIN_NEWLINE = 1 << 6,
+ OUTPUT_UTC = 1 << 7,
+ OUTPUT_KERNEL_THREADS = 1 << 8,
+ OUTPUT_NO_HOSTNAME = 1 << 9,
+} OutputFlags;
+
+JsonFormatFlags output_mode_to_json_format_flags(OutputMode m);
+
+const char* output_mode_to_string(OutputMode m) _const_;
+OutputMode output_mode_from_string(const char *s) _pure_;
diff --git a/src/shared/pager.c b/src/shared/pager.c
new file mode 100644
index 0000000..bf2597e
--- /dev/null
+++ b/src/shared/pager.c
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "copy.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "pager.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+
+static pid_t pager_pid = 0;
+
+static int stored_stdout = -1;
+static int stored_stderr = -1;
+static bool stdout_redirected = false;
+static bool stderr_redirected = false;
+
+_noreturn_ static void pager_fallback(void) {
+ int r;
+
+ r = copy_bytes(STDIN_FILENO, STDOUT_FILENO, (uint64_t) -1, 0);
+ if (r < 0) {
+ log_error_errno(r, "Internal pager failed: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+}
+
+static int no_quit_on_interrupt(int exe_name_fd, const char *less_opts) {
+ _cleanup_fclose_ FILE *file = NULL;
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ assert(exe_name_fd >= 0);
+ assert(less_opts);
+
+ /* This takes ownership of exe_name_fd */
+ file = fdopen(exe_name_fd, "r");
+ if (!file) {
+ safe_close(exe_name_fd);
+ return log_error_errno(errno, "Failed to create FILE object: %m");
+ }
+
+ /* Find the last line */
+ for (;;) {
+ _cleanup_free_ char *t = NULL;
+
+ r = read_line(file, LONG_LINE_MAX, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from socket: %m");
+ if (r == 0)
+ break;
+
+ free_and_replace(line, t);
+ }
+
+ /* We only treat "less" specially.
+ * Return true whenever option K is *not* set. */
+ r = streq_ptr(line, "less") && !strchr(less_opts, 'K');
+
+ log_debug("Pager executable is \"%s\", options \"%s\", quit_on_interrupt: %s",
+ strnull(line), less_opts, yes_no(!r));
+ return r;
+}
+
+int pager_open(PagerFlags flags) {
+ _cleanup_close_pair_ int fd[2] = { -1, -1 }, exe_name_pipe[2] = { -1, -1 };
+ _cleanup_strv_free_ char **pager_args = NULL;
+ const char *pager, *less_opts;
+ int r;
+
+ if (flags & PAGER_DISABLE)
+ return 0;
+
+ if (pager_pid > 0)
+ return 1;
+
+ if (terminal_is_dumb())
+ return 0;
+
+ if (!is_main_thread())
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Pager invoked from wrong thread.");
+
+ pager = getenv("SYSTEMD_PAGER");
+ if (!pager)
+ pager = getenv("PAGER");
+
+ if (pager) {
+ pager_args = strv_split(pager, WHITESPACE);
+ if (!pager_args)
+ return log_oom();
+
+ /* If the pager is explicitly turned off, honour it */
+ if (strv_isempty(pager_args) || strv_equal(pager_args, STRV_MAKE("cat")))
+ return 0;
+ }
+
+ /* Determine and cache number of columns/lines before we spawn the pager so that we get the value from the
+ * actual tty */
+ (void) columns();
+ (void) lines();
+
+ if (pipe2(fd, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create pager pipe: %m");
+
+ /* This is a pipe to feed the name of the executed pager binary into the parent */
+ if (pipe2(exe_name_pipe, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create exe_name pipe: %m");
+
+ /* Initialize a good set of less options */
+ less_opts = getenv("SYSTEMD_LESS");
+ if (!less_opts)
+ less_opts = "FRSXMK";
+ if (flags & PAGER_JUMP_TO_END)
+ less_opts = strjoina(less_opts, " +G");
+
+ r = safe_fork("(pager)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pager_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *less_charset, *exe;
+
+ /* In the child start the pager */
+
+ if (dup2(fd[0], STDIN_FILENO) < 0) {
+ log_error_errno(errno, "Failed to duplicate file descriptor to STDIN: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close_pair(fd);
+
+ if (setenv("LESS", less_opts, 1) < 0) {
+ log_error_errno(errno, "Failed to set environment variable LESS: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* Initialize a good charset for less. This is
+ * particularly important if we output UTF-8
+ * characters. */
+ less_charset = getenv("SYSTEMD_LESSCHARSET");
+ if (!less_charset && is_locale_utf8())
+ less_charset = "utf-8";
+ if (less_charset &&
+ setenv("LESSCHARSET", less_charset, 1) < 0) {
+ log_error_errno(errno, "Failed to set environment variable LESSCHARSET: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (pager_args) {
+ r = loop_write(exe_name_pipe[1], pager_args[0], strlen(pager_args[0]) + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ execvp(pager_args[0], pager_args);
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to execute '%s', using fallback pagers: %m", pager_args[0]);
+ }
+
+ /* Debian's alternatives command for pagers is
+ * called 'pager'. Note that we do not call
+ * sensible-pagers here, since that is just a
+ * shell script that implements a logic that
+ * is similar to this one anyway, but is
+ * Debian-specific. */
+ FOREACH_STRING(exe, "pager", "less", "more") {
+ r = loop_write(exe_name_pipe[1], exe, strlen(exe) + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+ execlp(exe, exe, NULL);
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to execute '%s', using next fallback pager: %m", exe);
+ }
+
+ r = loop_write(exe_name_pipe[1], "(built-in)", strlen("(built-in") + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+ pager_fallback();
+ /* not reached */
+ }
+
+ /* Return in the parent */
+ stored_stdout = fcntl(STDOUT_FILENO, F_DUPFD_CLOEXEC, 3);
+ if (dup2(fd[1], STDOUT_FILENO) < 0) {
+ stored_stdout = safe_close(stored_stdout);
+ return log_error_errno(errno, "Failed to duplicate pager pipe: %m");
+ }
+ stdout_redirected = true;
+
+ stored_stderr = fcntl(STDERR_FILENO, F_DUPFD_CLOEXEC, 3);
+ if (dup2(fd[1], STDERR_FILENO) < 0) {
+ stored_stderr = safe_close(stored_stderr);
+ return log_error_errno(errno, "Failed to duplicate pager pipe: %m");
+ }
+ stderr_redirected = true;
+
+ exe_name_pipe[1] = safe_close(exe_name_pipe[1]);
+
+ r = no_quit_on_interrupt(TAKE_FD(exe_name_pipe[0]), less_opts);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ (void) ignore_signals(SIGINT, -1);
+
+ return 1;
+}
+
+void pager_close(void) {
+
+ if (pager_pid <= 0)
+ return;
+
+ /* Inform pager that we are done */
+ (void) fflush(stdout);
+ if (stdout_redirected)
+ if (stored_stdout < 0 || dup2(stored_stdout, STDOUT_FILENO) < 0)
+ (void) close(STDOUT_FILENO);
+ stored_stdout = safe_close(stored_stdout);
+ (void) fflush(stderr);
+ if (stderr_redirected)
+ if (stored_stderr < 0 || dup2(stored_stderr, STDERR_FILENO) < 0)
+ (void) close(STDERR_FILENO);
+ stored_stderr = safe_close(stored_stderr);
+ stdout_redirected = stderr_redirected = false;
+
+ (void) kill(pager_pid, SIGCONT);
+ (void) wait_for_terminate(pager_pid, NULL);
+ pager_pid = 0;
+}
+
+bool pager_have(void) {
+ return pager_pid > 0;
+}
+
+int show_man_page(const char *desc, bool null_stdio) {
+ const char *args[4] = { "man", NULL, NULL, NULL };
+ char *e = NULL;
+ pid_t pid;
+ size_t k;
+ int r;
+
+ k = strlen(desc);
+
+ if (desc[k-1] == ')')
+ e = strrchr(desc, '(');
+
+ if (e) {
+ char *page = NULL, *section = NULL;
+
+ page = strndupa(desc, e - desc);
+ section = strndupa(e + 1, desc + k - e - 2);
+
+ args[1] = section;
+ args[2] = page;
+ } else
+ args[1] = desc;
+
+ r = safe_fork("(man)", FORK_RESET_SIGNALS|FORK_DEATHSIG|(null_stdio ? FORK_NULL_STDIO : 0)|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execvp(args[0], (char**) args);
+ log_error_errno(errno, "Failed to execute man: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ return wait_for_terminate_and_check(NULL, pid, 0);
+}
diff --git a/src/shared/pager.h b/src/shared/pager.h
new file mode 100644
index 0000000..8299e23
--- /dev/null
+++ b/src/shared/pager.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+typedef enum PagerFlags {
+ PAGER_DISABLE = 1 << 0,
+ PAGER_JUMP_TO_END = 1 << 1,
+} PagerFlags;
+
+int pager_open(PagerFlags flags);
+void pager_close(void);
+bool pager_have(void) _pure_;
+
+int show_man_page(const char *page, bool null_stdio);
diff --git a/src/shared/path-lookup.c b/src/shared/path-lookup.c
new file mode 100644
index 0000000..442fde7
--- /dev/null
+++ b/src/shared/path-lookup.c
@@ -0,0 +1,903 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "fs-util.h"
+#include "install.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "user-util.h"
+#include "util.h"
+
+int xdg_user_runtime_dir(char **ret, const char *suffix) {
+ const char *e;
+ char *j;
+
+ assert(ret);
+ assert(suffix);
+
+ e = getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return -ENXIO;
+
+ j = strappend(e, suffix);
+ if (!j)
+ return -ENOMEM;
+
+ *ret = j;
+ return 0;
+}
+
+int xdg_user_config_dir(char **ret, const char *suffix) {
+ const char *e;
+ char *j;
+ int r;
+
+ assert(ret);
+
+ e = getenv("XDG_CONFIG_HOME");
+ if (e)
+ j = strappend(e, suffix);
+ else {
+ _cleanup_free_ char *home = NULL;
+
+ r = get_home_dir(&home);
+ if (r < 0)
+ return r;
+
+ j = strjoin(home, "/.config", suffix);
+ }
+
+ if (!j)
+ return -ENOMEM;
+
+ *ret = j;
+ return 0;
+}
+
+int xdg_user_data_dir(char **ret, const char *suffix) {
+ const char *e;
+ char *j;
+ int r;
+
+ assert(ret);
+ assert(suffix);
+
+ /* We don't treat /etc/xdg/systemd here as the spec
+ * suggests because we assume that is a link to
+ * /etc/systemd/ anyway. */
+
+ e = getenv("XDG_DATA_HOME");
+ if (e)
+ j = strappend(e, suffix);
+ else {
+ _cleanup_free_ char *home = NULL;
+
+ r = get_home_dir(&home);
+ if (r < 0)
+ return r;
+
+ j = strjoin(home, "/.local/share", suffix);
+ }
+ if (!j)
+ return -ENOMEM;
+
+ *ret = j;
+ return 1;
+}
+
+static const char* const user_data_unit_paths[] = {
+ "/usr/local/lib/systemd/user",
+ "/usr/local/share/systemd/user",
+ USER_DATA_UNIT_PATH,
+ "/usr/lib/systemd/user",
+ "/usr/share/systemd/user",
+ NULL
+};
+
+static const char* const user_config_unit_paths[] = {
+ USER_CONFIG_UNIT_PATH,
+ "/etc/systemd/user",
+ NULL
+};
+
+int xdg_user_dirs(char ***ret_config_dirs, char ***ret_data_dirs) {
+ /* Implement the mechanisms defined in
+ *
+ * http://standards.freedesktop.org/basedir-spec/basedir-spec-0.6.html
+ *
+ * We look in both the config and the data dirs because we
+ * want to encourage that distributors ship their unit files
+ * as data, and allow overriding as configuration.
+ */
+ const char *e;
+ _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL;
+
+ e = getenv("XDG_CONFIG_DIRS");
+ if (e) {
+ config_dirs = strv_split(e, ":");
+ if (!config_dirs)
+ return -ENOMEM;
+ }
+
+ e = getenv("XDG_DATA_DIRS");
+ if (e)
+ data_dirs = strv_split(e, ":");
+ else
+ data_dirs = strv_new("/usr/local/share",
+ "/usr/share");
+ if (!data_dirs)
+ return -ENOMEM;
+
+ *ret_config_dirs = TAKE_PTR(config_dirs);
+ *ret_data_dirs = TAKE_PTR(data_dirs);
+
+ return 0;
+}
+
+static char** user_dirs(
+ const char *persistent_config,
+ const char *runtime_config,
+ const char *global_persistent_config,
+ const char *global_runtime_config,
+ const char *generator,
+ const char *generator_early,
+ const char *generator_late,
+ const char *transient,
+ const char *persistent_control,
+ const char *runtime_control) {
+
+ _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL;
+ _cleanup_free_ char *data_home = NULL;
+ _cleanup_strv_free_ char **res = NULL;
+ int r;
+
+ r = xdg_user_dirs(&config_dirs, &data_dirs);
+ if (r < 0)
+ return NULL;
+
+ r = xdg_user_data_dir(&data_home, "/systemd/user");
+ if (r < 0 && r != -ENXIO)
+ return NULL;
+
+ /* Now merge everything we found. */
+ if (strv_extend(&res, persistent_control) < 0)
+ return NULL;
+
+ if (strv_extend(&res, runtime_control) < 0)
+ return NULL;
+
+ if (strv_extend(&res, transient) < 0)
+ return NULL;
+
+ if (strv_extend(&res, generator_early) < 0)
+ return NULL;
+
+ if (strv_extend_strv_concat(&res, config_dirs, "/systemd/user") < 0)
+ return NULL;
+
+ if (strv_extend(&res, persistent_config) < 0)
+ return NULL;
+
+ /* global config has lower priority than the user config of the same type */
+ if (strv_extend(&res, global_persistent_config) < 0)
+ return NULL;
+
+ if (strv_extend_strv(&res, (char**) user_config_unit_paths, false) < 0)
+ return NULL;
+
+ if (strv_extend(&res, runtime_config) < 0)
+ return NULL;
+
+ if (strv_extend(&res, global_runtime_config) < 0)
+ return NULL;
+
+ if (strv_extend(&res, generator) < 0)
+ return NULL;
+
+ if (strv_extend(&res, data_home) < 0)
+ return NULL;
+
+ if (strv_extend_strv_concat(&res, data_dirs, "/systemd/user") < 0)
+ return NULL;
+
+ if (strv_extend_strv(&res, (char**) user_data_unit_paths, false) < 0)
+ return NULL;
+
+ if (strv_extend(&res, generator_late) < 0)
+ return NULL;
+
+ if (path_strv_make_absolute_cwd(res) < 0)
+ return NULL;
+
+ return TAKE_PTR(res);
+}
+
+bool path_is_user_data_dir(const char *path) {
+ assert(path);
+
+ return strv_contains((char**) user_data_unit_paths, path);
+}
+
+bool path_is_user_config_dir(const char *path) {
+ assert(path);
+
+ return strv_contains((char**) user_config_unit_paths, path);
+}
+
+static int acquire_generator_dirs(
+ UnitFileScope scope,
+ const char *tempdir,
+ char **generator,
+ char **generator_early,
+ char **generator_late) {
+
+ _cleanup_free_ char *x = NULL, *y = NULL, *z = NULL;
+ const char *prefix;
+
+ assert(generator);
+ assert(generator_early);
+ assert(generator_late);
+ assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER, UNIT_FILE_GLOBAL));
+
+ if (scope == UNIT_FILE_GLOBAL)
+ return -EOPNOTSUPP;
+
+ if (tempdir)
+ prefix = tempdir;
+ else if (scope == UNIT_FILE_SYSTEM)
+ prefix = "/run/systemd";
+ else {
+ /* UNIT_FILE_USER */
+ const char *e;
+
+ e = getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return -ENXIO;
+
+ prefix = strjoina(e, "/systemd");
+ }
+
+ x = strappend(prefix, "/generator");
+ if (!x)
+ return -ENOMEM;
+
+ y = strappend(prefix, "/generator.early");
+ if (!y)
+ return -ENOMEM;
+
+ z = strappend(prefix, "/generator.late");
+ if (!z)
+ return -ENOMEM;
+
+ *generator = TAKE_PTR(x);
+ *generator_early = TAKE_PTR(y);
+ *generator_late = TAKE_PTR(z);
+
+ return 0;
+}
+
+static int acquire_transient_dir(
+ UnitFileScope scope,
+ const char *tempdir,
+ char **ret) {
+
+ char *transient;
+
+ assert(ret);
+ assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER, UNIT_FILE_GLOBAL));
+
+ if (scope == UNIT_FILE_GLOBAL)
+ return -EOPNOTSUPP;
+
+ if (tempdir)
+ transient = strjoin(tempdir, "/transient");
+ else if (scope == UNIT_FILE_SYSTEM)
+ transient = strdup("/run/systemd/transient");
+ else
+ return xdg_user_runtime_dir(ret, "/systemd/transient");
+
+ if (!transient)
+ return -ENOMEM;
+ *ret = transient;
+ return 0;
+}
+
+static int acquire_config_dirs(UnitFileScope scope, char **persistent, char **runtime) {
+ _cleanup_free_ char *a = NULL, *b = NULL;
+ int r;
+
+ assert(persistent);
+ assert(runtime);
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM:
+ a = strdup(SYSTEM_CONFIG_UNIT_PATH);
+ b = strdup("/run/systemd/system");
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ a = strdup(USER_CONFIG_UNIT_PATH);
+ b = strdup("/run/systemd/user");
+ break;
+
+ case UNIT_FILE_USER:
+ r = xdg_user_config_dir(&a, "/systemd/user");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = xdg_user_runtime_dir(runtime, "/systemd/user");
+ if (r < 0) {
+ if (r != -ENXIO)
+ return r;
+
+ /* If XDG_RUNTIME_DIR is not set, don't consider that fatal, simply initialize the runtime
+ * directory to NULL */
+ *runtime = NULL;
+ }
+
+ *persistent = TAKE_PTR(a);
+
+ return 0;
+
+ default:
+ assert_not_reached("Hmm, unexpected scope value.");
+ }
+
+ if (!a || !b)
+ return -ENOMEM;
+
+ *persistent = TAKE_PTR(a);
+ *runtime = TAKE_PTR(b);
+
+ return 0;
+}
+
+static int acquire_control_dirs(UnitFileScope scope, char **persistent, char **runtime) {
+ _cleanup_free_ char *a = NULL;
+ int r;
+
+ assert(persistent);
+ assert(runtime);
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM: {
+ _cleanup_free_ char *b = NULL;
+
+ a = strdup("/etc/systemd/system.control");
+ if (!a)
+ return -ENOMEM;
+
+ b = strdup("/run/systemd/system.control");
+ if (!b)
+ return -ENOMEM;
+
+ *runtime = TAKE_PTR(b);
+
+ break;
+ }
+
+ case UNIT_FILE_USER:
+ r = xdg_user_config_dir(&a, "/systemd/user.control");
+ if (r < 0 && r != -ENXIO)
+ return r;
+
+ r = xdg_user_runtime_dir(runtime, "/systemd/user.control");
+ if (r < 0) {
+ if (r != -ENXIO)
+ return r;
+
+ /* If XDG_RUNTIME_DIR is not set, don't consider this fatal, simply initialize the directory to
+ * NULL */
+ *runtime = NULL;
+ }
+
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ return -EOPNOTSUPP;
+
+ default:
+ assert_not_reached("Hmm, unexpected scope value.");
+ }
+
+ *persistent = TAKE_PTR(a);
+
+ return 0;
+}
+
+static int acquire_attached_dirs(
+ UnitFileScope scope,
+ char **ret_persistent,
+ char **ret_runtime) {
+
+ _cleanup_free_ char *a = NULL, *b = NULL;
+
+ assert(ret_persistent);
+ assert(ret_runtime);
+
+ /* Portable services are not available to regular users for now. */
+ if (scope != UNIT_FILE_SYSTEM)
+ return -EOPNOTSUPP;
+
+ a = strdup("/etc/systemd/system.attached");
+ if (!a)
+ return -ENOMEM;
+
+ b = strdup("/run/systemd/system.attached");
+ if (!b)
+ return -ENOMEM;
+
+ *ret_persistent = TAKE_PTR(a);
+ *ret_runtime = TAKE_PTR(b);
+
+ return 0;
+}
+
+static int patch_root_prefix(char **p, const char *root_dir) {
+ char *c;
+
+ assert(p);
+
+ if (!*p)
+ return 0;
+
+ c = prefix_root(root_dir, *p);
+ if (!c)
+ return -ENOMEM;
+
+ free(*p);
+ *p = c;
+
+ return 0;
+}
+
+static int patch_root_prefix_strv(char **l, const char *root_dir) {
+ char **i;
+ int r;
+
+ if (!root_dir)
+ return 0;
+
+ STRV_FOREACH(i, l) {
+ r = patch_root_prefix(i, root_dir);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int lookup_paths_init(
+ LookupPaths *p,
+ UnitFileScope scope,
+ LookupPathsFlags flags,
+ const char *root_dir) {
+
+ _cleanup_(rmdir_and_freep) char *tempdir = NULL;
+ _cleanup_free_ char
+ *root = NULL,
+ *persistent_config = NULL, *runtime_config = NULL,
+ *global_persistent_config = NULL, *global_runtime_config = NULL,
+ *generator = NULL, *generator_early = NULL, *generator_late = NULL,
+ *transient = NULL,
+ *persistent_control = NULL, *runtime_control = NULL,
+ *persistent_attached = NULL, *runtime_attached = NULL;
+ bool append = false; /* Add items from SYSTEMD_UNIT_PATH before normal directories */
+ _cleanup_strv_free_ char **paths = NULL;
+ const char *e;
+ int r;
+
+ assert(p);
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+#if HAVE_SPLIT_USR
+ flags |= LOOKUP_PATHS_SPLIT_USR;
+#endif
+
+ if (!empty_or_root(root_dir)) {
+ if (scope == UNIT_FILE_USER)
+ return -EINVAL;
+
+ r = is_dir(root_dir, true);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -ENOTDIR;
+
+ root = strdup(root_dir);
+ if (!root)
+ return -ENOMEM;
+ }
+
+ if (flags & LOOKUP_PATHS_TEMPORARY_GENERATED) {
+ r = mkdtemp_malloc("/tmp/systemd-temporary-XXXXXX", &tempdir);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create temporary directory: %m");
+ }
+
+ /* Note: when XDG_RUNTIME_DIR is not set this will not return -ENXIO, but simply set runtime_config to NULL */
+ r = acquire_config_dirs(scope, &persistent_config, &runtime_config);
+ if (r < 0)
+ return r;
+
+ if (scope == UNIT_FILE_USER) {
+ r = acquire_config_dirs(UNIT_FILE_GLOBAL, &global_persistent_config, &global_runtime_config);
+ if (r < 0)
+ return r;
+ }
+
+ if ((flags & LOOKUP_PATHS_EXCLUDE_GENERATED) == 0) {
+ /* Note: if XDG_RUNTIME_DIR is not set, this will fail completely with ENXIO */
+ r = acquire_generator_dirs(scope, tempdir,
+ &generator, &generator_early, &generator_late);
+ if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENXIO))
+ return r;
+ }
+
+ /* Note: if XDG_RUNTIME_DIR is not set, this will fail completely with ENXIO */
+ r = acquire_transient_dir(scope, tempdir, &transient);
+ if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENXIO))
+ return r;
+
+ /* Note: when XDG_RUNTIME_DIR is not set this will not return -ENXIO, but simply set runtime_control to NULL */
+ r = acquire_control_dirs(scope, &persistent_control, &runtime_control);
+ if (r < 0 && r != -EOPNOTSUPP)
+ return r;
+
+ r = acquire_attached_dirs(scope, &persistent_attached, &runtime_attached);
+ if (r < 0 && r != -EOPNOTSUPP)
+ return r;
+
+ /* First priority is whatever has been passed to us via env vars */
+ e = getenv("SYSTEMD_UNIT_PATH");
+ if (e) {
+ const char *k;
+
+ k = endswith(e, ":");
+ if (k) {
+ e = strndupa(e, k - e);
+ append = true;
+ }
+
+ /* FIXME: empty components in other places should be rejected. */
+
+ r = path_split_and_make_absolute(e, &paths);
+ if (r < 0)
+ return r;
+ }
+
+ if (!paths || append) {
+ /* Let's figure something out. */
+
+ _cleanup_strv_free_ char **add = NULL;
+
+ /* For the user units we include share/ in the search
+ * path in order to comply with the XDG basedir spec.
+ * For the system stuff we avoid such nonsense. OTOH
+ * we include /lib in the search path for the system
+ * stuff but avoid it for user stuff. */
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM:
+ add = strv_new(
+ /* If you modify this you also want to modify
+ * systemdsystemunitpath= in systemd.pc.in! */
+ STRV_IFNOTNULL(persistent_control),
+ STRV_IFNOTNULL(runtime_control),
+ STRV_IFNOTNULL(transient),
+ STRV_IFNOTNULL(generator_early),
+ persistent_config,
+ SYSTEM_CONFIG_UNIT_PATH,
+ "/etc/systemd/system",
+ STRV_IFNOTNULL(persistent_attached),
+ runtime_config,
+ "/run/systemd/system",
+ STRV_IFNOTNULL(runtime_attached),
+ STRV_IFNOTNULL(generator),
+ "/usr/local/lib/systemd/system",
+ SYSTEM_DATA_UNIT_PATH,
+ "/usr/lib/systemd/system",
+ STRV_IFNOTNULL(flags & LOOKUP_PATHS_SPLIT_USR ? "/lib/systemd/system" : NULL),
+ STRV_IFNOTNULL(generator_late));
+ break;
+
+ case UNIT_FILE_GLOBAL:
+ add = strv_new(
+ /* If you modify this you also want to modify
+ * systemduserunitpath= in systemd.pc.in, and
+ * the arrays in user_dirs() above! */
+ STRV_IFNOTNULL(persistent_control),
+ STRV_IFNOTNULL(runtime_control),
+ STRV_IFNOTNULL(transient),
+ STRV_IFNOTNULL(generator_early),
+ persistent_config,
+ USER_CONFIG_UNIT_PATH,
+ "/etc/systemd/user",
+ runtime_config,
+ "/run/systemd/user",
+ STRV_IFNOTNULL(generator),
+ "/usr/local/share/systemd/user",
+ "/usr/share/systemd/user",
+ "/usr/local/lib/systemd/user",
+ USER_DATA_UNIT_PATH,
+ "/usr/lib/systemd/user",
+ STRV_IFNOTNULL(generator_late));
+ break;
+
+ case UNIT_FILE_USER:
+ add = user_dirs(persistent_config, runtime_config,
+ global_persistent_config, global_runtime_config,
+ generator, generator_early, generator_late,
+ transient,
+ persistent_control, runtime_control);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unexpected scope?");
+ }
+
+ if (!add)
+ return -ENOMEM;
+
+ if (paths) {
+ r = strv_extend_strv(&paths, add, true);
+ if (r < 0)
+ return r;
+ } else
+ /* Small optimization: if paths is NULL (and it usually is), we can simply assign 'add' to it,
+ * and don't have to copy anything */
+ paths = TAKE_PTR(add);
+ }
+
+ r = patch_root_prefix(&persistent_config, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&runtime_config, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&generator, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&generator_early, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&generator_late, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&transient, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&persistent_control, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&runtime_control, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix(&persistent_attached, root);
+ if (r < 0)
+ return r;
+ r = patch_root_prefix(&runtime_attached, root);
+ if (r < 0)
+ return r;
+
+ r = patch_root_prefix_strv(paths, root);
+ if (r < 0)
+ return -ENOMEM;
+
+ *p = (LookupPaths) {
+ .search_path = strv_uniq(paths),
+
+ .persistent_config = TAKE_PTR(persistent_config),
+ .runtime_config = TAKE_PTR(runtime_config),
+
+ .generator = TAKE_PTR(generator),
+ .generator_early = TAKE_PTR(generator_early),
+ .generator_late = TAKE_PTR(generator_late),
+
+ .transient = TAKE_PTR(transient),
+
+ .persistent_control = TAKE_PTR(persistent_control),
+ .runtime_control = TAKE_PTR(runtime_control),
+
+ .persistent_attached = TAKE_PTR(persistent_attached),
+ .runtime_attached = TAKE_PTR(runtime_attached),
+
+ .root_dir = TAKE_PTR(root),
+ .temporary_dir = TAKE_PTR(tempdir),
+ };
+
+ paths = NULL;
+ return 0;
+}
+
+void lookup_paths_free(LookupPaths *p) {
+ if (!p)
+ return;
+
+ p->search_path = strv_free(p->search_path);
+
+ p->persistent_config = mfree(p->persistent_config);
+ p->runtime_config = mfree(p->runtime_config);
+
+ p->persistent_attached = mfree(p->persistent_attached);
+ p->runtime_attached = mfree(p->runtime_attached);
+
+ p->generator = mfree(p->generator);
+ p->generator_early = mfree(p->generator_early);
+ p->generator_late = mfree(p->generator_late);
+
+ p->transient = mfree(p->transient);
+
+ p->persistent_control = mfree(p->persistent_control);
+ p->runtime_control = mfree(p->runtime_control);
+
+ p->root_dir = mfree(p->root_dir);
+ p->temporary_dir = mfree(p->temporary_dir);
+}
+
+int lookup_paths_reduce(LookupPaths *p) {
+ _cleanup_free_ struct stat *stats = NULL;
+ size_t n_stats = 0, allocated = 0;
+ size_t c = 0;
+ int r;
+
+ assert(p);
+
+ /* Drop duplicates and non-existing directories from the search path. We figure out whether two directories are
+ * the same by comparing their device and inode numbers. */
+
+ if (!p->search_path)
+ return 0;
+
+ while (p->search_path[c]) {
+ struct stat st;
+ size_t k;
+
+ /* Never strip the transient and control directories from the path */
+ if (path_equal_ptr(p->search_path[c], p->transient) ||
+ path_equal_ptr(p->search_path[c], p->persistent_control) ||
+ path_equal_ptr(p->search_path[c], p->runtime_control)) {
+ c++;
+ continue;
+ }
+
+ r = chase_symlinks_and_stat(p->search_path[c], p->root_dir, 0, NULL, &st);
+ if (r == -ENOENT)
+ goto remove_item;
+ if (r < 0) {
+ /* If something we don't grok happened, let's better leave it in. */
+ log_debug_errno(r, "Failed to chase and stat %s: %m", p->search_path[c]);
+ c++;
+ continue;
+ }
+
+ for (k = 0; k < n_stats; k++)
+ if (stats[k].st_dev == st.st_dev &&
+ stats[k].st_ino == st.st_ino)
+ break;
+
+ if (k < n_stats) /* Is there already an entry with the same device/inode? */
+ goto remove_item;
+
+ if (!GREEDY_REALLOC(stats, allocated, n_stats+1))
+ return -ENOMEM;
+
+ stats[n_stats++] = st;
+ c++;
+ continue;
+
+ remove_item:
+ free(p->search_path[c]);
+ memmove(p->search_path + c,
+ p->search_path + c + 1,
+ (strv_length(p->search_path + c + 1) + 1) * sizeof(char*));
+ }
+
+ if (strv_isempty(p->search_path)) {
+ log_debug("Ignoring unit files.");
+ p->search_path = strv_free(p->search_path);
+ } else {
+ _cleanup_free_ char *t;
+
+ t = strv_join(p->search_path, "\n\t");
+ if (!t)
+ return -ENOMEM;
+
+ log_debug("Looking for unit files in (higher priority first):\n\t%s", t);
+ }
+
+ return 0;
+}
+
+int lookup_paths_mkdir_generator(LookupPaths *p) {
+ int r, q;
+
+ assert(p);
+
+ if (!p->generator || !p->generator_early || !p->generator_late)
+ return -EINVAL;
+
+ r = mkdir_p_label(p->generator, 0755);
+
+ q = mkdir_p_label(p->generator_early, 0755);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ q = mkdir_p_label(p->generator_late, 0755);
+ if (q < 0 && r >= 0)
+ r = q;
+
+ return r;
+}
+
+void lookup_paths_trim_generator(LookupPaths *p) {
+ assert(p);
+
+ /* Trim empty dirs */
+
+ if (p->generator)
+ (void) rmdir(p->generator);
+ if (p->generator_early)
+ (void) rmdir(p->generator_early);
+ if (p->generator_late)
+ (void) rmdir(p->generator_late);
+}
+
+void lookup_paths_flush_generator(LookupPaths *p) {
+ assert(p);
+
+ /* Flush the generated unit files in full */
+
+ if (p->generator)
+ (void) rm_rf(p->generator, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (p->generator_early)
+ (void) rm_rf(p->generator_early, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (p->generator_late)
+ (void) rm_rf(p->generator_late, REMOVE_ROOT|REMOVE_PHYSICAL);
+
+ if (p->temporary_dir)
+ (void) rm_rf(p->temporary_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
+}
+
+char **generator_binary_paths(UnitFileScope scope) {
+
+ switch (scope) {
+
+ case UNIT_FILE_SYSTEM:
+ return strv_new("/run/systemd/system-generators",
+ "/etc/systemd/system-generators",
+ "/usr/local/lib/systemd/system-generators",
+ SYSTEM_GENERATOR_PATH);
+
+ case UNIT_FILE_GLOBAL:
+ case UNIT_FILE_USER:
+ return strv_new("/run/systemd/user-generators",
+ "/etc/systemd/user-generators",
+ "/usr/local/lib/systemd/user-generators",
+ USER_GENERATOR_PATH);
+
+ default:
+ assert_not_reached("Hmm, unexpected scope.");
+ }
+}
diff --git a/src/shared/path-lookup.h b/src/shared/path-lookup.h
new file mode 100644
index 0000000..cb7d4d5
--- /dev/null
+++ b/src/shared/path-lookup.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+typedef struct LookupPaths LookupPaths;
+
+#include "install.h"
+#include "macro.h"
+
+typedef enum LookupPathsFlags {
+ LOOKUP_PATHS_EXCLUDE_GENERATED = 1 << 0,
+ LOOKUP_PATHS_TEMPORARY_GENERATED = 1 << 1,
+ LOOKUP_PATHS_SPLIT_USR = 1 << 2,
+} LookupPathsFlags;
+
+struct LookupPaths {
+ /* Where we look for unit files. This includes the individual special paths below, but also any vendor
+ * supplied, static unit file paths. */
+ char **search_path;
+
+ /* Where we shall create or remove our installation symlinks, aka "configuration", and where the user/admin
+ * shall place his own unit files. */
+ char *persistent_config;
+ char *runtime_config;
+
+ /* Where units from a portable service image shall be placed. */
+ char *persistent_attached;
+ char *runtime_attached;
+
+ /* Where to place generated unit files (i.e. those a "generator" tool generated). Note the special semantics of
+ * this directory: the generators are flushed each time a "systemctl daemon-reload" is issued. The user should
+ * not alter these directories directly. */
+ char *generator;
+ char *generator_early;
+ char *generator_late;
+
+ /* Where to place transient unit files (i.e. those created dynamically via the bus API). Note the special
+ * semantics of this directory: all units created transiently have their unit files removed as the transient
+ * unit is unloaded. The user should not alter this directory directly. */
+ char *transient;
+
+ /* Where the snippets created by "systemctl set-property" are placed. Note that for transient units, the
+ * snippets are placed in the transient directory though (see above). The user should not alter this directory
+ * directly. */
+ char *persistent_control;
+ char *runtime_control;
+
+ /* The root directory prepended to all items above, or NULL */
+ char *root_dir;
+
+ /* A temporary directory when running in test mode, to be nuked */
+ char *temporary_dir;
+};
+
+int lookup_paths_init(LookupPaths *p, UnitFileScope scope, LookupPathsFlags flags, const char *root_dir);
+
+int xdg_user_dirs(char ***ret_config_dirs, char ***ret_data_dirs);
+int xdg_user_runtime_dir(char **ret, const char *suffix);
+int xdg_user_config_dir(char **ret, const char *suffix);
+int xdg_user_data_dir(char **ret, const char *suffix);
+
+bool path_is_user_data_dir(const char *path);
+bool path_is_user_config_dir(const char *path);
+
+int lookup_paths_reduce(LookupPaths *p);
+
+int lookup_paths_mkdir_generator(LookupPaths *p);
+void lookup_paths_trim_generator(LookupPaths *p);
+void lookup_paths_flush_generator(LookupPaths *p);
+
+void lookup_paths_free(LookupPaths *p);
+
+char **generator_binary_paths(UnitFileScope scope);
diff --git a/src/shared/pretty-print.c b/src/shared/pretty-print.c
new file mode 100644
index 0000000..de6274a
--- /dev/null
+++ b/src/shared/pretty-print.c
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/utsname.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "def.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static bool urlify_enabled(void) {
+ static int cached_urlify_enabled = -1;
+
+ /* Unfortunately 'less' doesn't support links like this yet 😭, hence let's disable this as long as there's a
+ * pager in effect. Let's drop this check as soon as less got fixed a and enough time passed so that it's safe
+ * to assume that a link-enabled 'less' version has hit most installations. */
+
+ if (cached_urlify_enabled < 0) {
+ int val;
+
+ val = getenv_bool("SYSTEMD_URLIFY");
+ if (val >= 0)
+ cached_urlify_enabled = val;
+ else
+ cached_urlify_enabled = colors_enabled() && !pager_have();
+ }
+
+ return cached_urlify_enabled;
+}
+
+int terminal_urlify(const char *url, const char *text, char **ret) {
+ char *n;
+
+ assert(url);
+
+ /* Takes an URL and a pretty string and formats it as clickable link for the terminal. See
+ * https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda for details. */
+
+ if (isempty(text))
+ text = url;
+
+ if (urlify_enabled())
+ n = strjoin("\x1B]8;;", url, "\a", text, "\x1B]8;;\a");
+ else
+ n = strdup(text);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int file_url_from_path(const char *path, char **ret) {
+ _cleanup_free_ char *absolute = NULL;
+ struct utsname u;
+ char *url = NULL;
+ int r;
+
+ if (uname(&u) < 0)
+ return -errno;
+
+ if (!path_is_absolute(path)) {
+ r = path_make_absolute_cwd(path, &absolute);
+ if (r < 0)
+ return r;
+
+ path = absolute;
+ }
+
+ /* As suggested by https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda, let's include the local
+ * hostname here. Note that we don't use gethostname_malloc() or gethostname_strict() since we are interested
+ * in the raw string the kernel has set, whatever it may be, under the assumption that terminals are not overly
+ * careful with validating the strings either. */
+
+ url = strjoin("file://", u.nodename, path);
+ if (!url)
+ return -ENOMEM;
+
+ *ret = url;
+ return 0;
+}
+
+int terminal_urlify_path(const char *path, const char *text, char **ret) {
+ _cleanup_free_ char *url = NULL;
+ int r;
+
+ assert(path);
+
+ /* Much like terminal_urlify() above, but takes a file system path as input
+ * and turns it into a proper file:// URL first. */
+
+ if (isempty(path))
+ return -EINVAL;
+
+ if (isempty(text))
+ text = path;
+
+ if (!urlify_enabled()) {
+ char *n;
+
+ n = strdup(text);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+ }
+
+ r = file_url_from_path(path, &url);
+ if (r < 0)
+ return r;
+
+ return terminal_urlify(url, text, ret);
+}
+
+int terminal_urlify_man(const char *page, const char *section, char **ret) {
+ const char *url, *text;
+
+ url = strjoina("man:", page, "(", section, ")");
+ text = strjoina(page, "(", section, ") man page");
+
+ return terminal_urlify(url, text, ret);
+}
+
+static int cat_file(const char *filename, bool newline) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *urlified = NULL;
+ int r;
+
+ f = fopen(filename, "re");
+ if (!f)
+ return -errno;
+
+ r = terminal_urlify_path(filename, NULL, &urlified);
+ if (r < 0)
+ return r;
+
+ printf("%s%s# %s%s\n",
+ newline ? "\n" : "",
+ ansi_highlight_blue(),
+ urlified,
+ ansi_normal());
+ fflush(stdout);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read \"%s\": %m", filename);
+ if (r == 0)
+ break;
+
+ puts(line);
+ }
+
+ return 0;
+}
+
+int cat_files(const char *file, char **dropins, CatFlags flags) {
+ char **path;
+ int r;
+
+ if (file) {
+ r = cat_file(file, false);
+ if (r == -ENOENT && (flags & CAT_FLAGS_MAIN_FILE_OPTIONAL))
+ printf("%s# config file %s not found%s\n",
+ ansi_highlight_magenta(),
+ file,
+ ansi_normal());
+ else if (r < 0)
+ return log_warning_errno(r, "Failed to cat %s: %m", file);
+ }
+
+ STRV_FOREACH(path, dropins) {
+ r = cat_file(*path, file || path != dropins);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to cat %s: %m", *path);
+ }
+
+ return 0;
+}
+
+void print_separator(void) {
+
+ /* Outputs a separator line that resolves to whitespace when copied from the terminal. We do that by outputting
+ * one line filled with spaces with ANSI underline set, followed by a second (empty) line. */
+
+ if (underline_enabled()) {
+ size_t i, c;
+
+ c = columns();
+
+ flockfile(stdout);
+ fputs_unlocked(ANSI_UNDERLINE, stdout);
+
+ for (i = 0; i < c; i++)
+ fputc_unlocked(' ', stdout);
+
+ fputs_unlocked(ANSI_NORMAL "\n\n", stdout);
+ funlockfile(stdout);
+ } else
+ fputs("\n\n", stdout);
+}
+
+int conf_files_cat(const char *root, const char *name) {
+ _cleanup_strv_free_ char **dirs = NULL, **files = NULL;
+ _cleanup_free_ char *path = NULL;
+ const char *dir;
+ char **t;
+ int r;
+
+ NULSTR_FOREACH(dir, CONF_PATHS_NULSTR("")) {
+ assert(endswith(dir, "/"));
+ r = strv_extendf(&dirs, "%s%s.d", dir, name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build directory list: %m");
+ }
+
+ r = conf_files_list_strv(&files, ".conf", root, 0, (const char* const*) dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query file list: %m");
+
+ path = path_join(root, "/etc", name);
+ if (!path)
+ return log_oom();
+
+ if (DEBUG_LOGGING) {
+ log_debug("Looking for configuration in:");
+ log_debug(" %s", path);
+ STRV_FOREACH(t, dirs)
+ log_debug(" %s/*.conf", *t);
+ }
+
+ /* show */
+ return cat_files(path, files, CAT_FLAGS_MAIN_FILE_OPTIONAL);
+}
diff --git a/src/shared/pretty-print.h b/src/shared/pretty-print.h
new file mode 100644
index 0000000..12ab9ac
--- /dev/null
+++ b/src/shared/pretty-print.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+void print_separator(void);
+
+int file_url_from_path(const char *path, char **ret);
+
+int terminal_urlify(const char *url, const char *text, char **ret);
+int terminal_urlify_path(const char *path, const char *text, char **ret);
+int terminal_urlify_man(const char *page, const char *section, char **ret);
+
+typedef enum CatFlags {
+ CAT_FLAGS_MAIN_FILE_OPTIONAL = 1 << 0,
+} CatFlags;
+
+int cat_files(const char *file, char **dropins, CatFlags flags);
+int conf_files_cat(const char *root, const char *name);
diff --git a/src/shared/ptyfwd.c b/src/shared/ptyfwd.c
new file mode 100644
index 0000000..fe17b37
--- /dev/null
+++ b/src/shared/ptyfwd.c
@@ -0,0 +1,631 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "ptyfwd.h"
+#include "terminal-util.h"
+#include "time-util.h"
+
+struct PTYForward {
+ sd_event *event;
+
+ int master;
+
+ PTYForwardFlags flags;
+
+ sd_event_source *stdin_event_source;
+ sd_event_source *stdout_event_source;
+ sd_event_source *master_event_source;
+
+ sd_event_source *sigwinch_event_source;
+
+ struct termios saved_stdin_attr;
+ struct termios saved_stdout_attr;
+
+ bool saved_stdin:1;
+ bool saved_stdout:1;
+
+ bool stdin_readable:1;
+ bool stdin_hangup:1;
+ bool stdout_writable:1;
+ bool stdout_hangup:1;
+ bool master_readable:1;
+ bool master_writable:1;
+ bool master_hangup:1;
+
+ bool read_from_master:1;
+
+ bool done:1;
+ bool drain:1;
+
+ bool last_char_set:1;
+ char last_char;
+
+ char in_buffer[LINE_MAX], out_buffer[LINE_MAX];
+ size_t in_buffer_full, out_buffer_full;
+
+ usec_t escape_timestamp;
+ unsigned escape_counter;
+
+ PTYForwardHandler handler;
+ void *userdata;
+};
+
+#define ESCAPE_USEC (1*USEC_PER_SEC)
+
+static void pty_forward_disconnect(PTYForward *f) {
+
+ if (f) {
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ f->stdout_event_source = sd_event_source_unref(f->stdout_event_source);
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source);
+ f->event = sd_event_unref(f->event);
+
+ if (f->saved_stdout)
+ tcsetattr(STDOUT_FILENO, TCSANOW, &f->saved_stdout_attr);
+ if (f->saved_stdin)
+ tcsetattr(STDIN_FILENO, TCSANOW, &f->saved_stdin_attr);
+
+ f->saved_stdout = f->saved_stdin = false;
+ }
+
+ /* STDIN/STDOUT should not be nonblocking normally, so let's unconditionally reset it */
+ (void) fd_nonblock(STDIN_FILENO, false);
+ (void) fd_nonblock(STDOUT_FILENO, false);
+}
+
+static int pty_forward_done(PTYForward *f, int rcode) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ assert(f);
+
+ if (f->done)
+ return 0;
+
+ e = sd_event_ref(f->event);
+
+ f->done = true;
+ pty_forward_disconnect(f);
+
+ if (f->handler)
+ return f->handler(f, rcode, f->userdata);
+ else
+ return sd_event_exit(e, rcode < 0 ? EXIT_FAILURE : rcode);
+}
+
+static bool look_for_escape(PTYForward *f, const char *buffer, size_t n) {
+ const char *p;
+
+ assert(f);
+ assert(buffer);
+ assert(n > 0);
+
+ for (p = buffer; p < buffer + n; p++) {
+
+ /* Check for ^] */
+ if (*p == 0x1D) {
+ usec_t nw = now(CLOCK_MONOTONIC);
+
+ if (f->escape_counter == 0 || nw > f->escape_timestamp + ESCAPE_USEC) {
+ f->escape_timestamp = nw;
+ f->escape_counter = 1;
+ } else {
+ (f->escape_counter)++;
+
+ if (f->escape_counter >= 3)
+ return true;
+ }
+ } else {
+ f->escape_timestamp = 0;
+ f->escape_counter = 0;
+ }
+ }
+
+ return false;
+}
+
+static bool ignore_vhangup(PTYForward *f) {
+ assert(f);
+
+ if (f->flags & PTY_FORWARD_IGNORE_VHANGUP)
+ return true;
+
+ if ((f->flags & PTY_FORWARD_IGNORE_INITIAL_VHANGUP) && !f->read_from_master)
+ return true;
+
+ return false;
+}
+
+static bool drained(PTYForward *f) {
+ int q = 0;
+
+ assert(f);
+
+ if (f->out_buffer_full > 0)
+ return false;
+
+ if (f->master_readable)
+ return false;
+
+ if (ioctl(f->master, TIOCINQ, &q) < 0)
+ log_debug_errno(errno, "TIOCINQ failed on master: %m");
+ else if (q > 0)
+ return false;
+
+ if (ioctl(f->master, TIOCOUTQ, &q) < 0)
+ log_debug_errno(errno, "TIOCOUTQ failed on master: %m");
+ else if (q > 0)
+ return false;
+
+ return true;
+}
+
+static int shovel(PTYForward *f) {
+ ssize_t k;
+
+ assert(f);
+
+ while ((f->stdin_readable && f->in_buffer_full <= 0) ||
+ (f->master_writable && f->in_buffer_full > 0) ||
+ (f->master_readable && f->out_buffer_full <= 0) ||
+ (f->stdout_writable && f->out_buffer_full > 0)) {
+
+ if (f->stdin_readable && f->in_buffer_full < LINE_MAX) {
+
+ k = read(STDIN_FILENO, f->in_buffer + f->in_buffer_full, LINE_MAX - f->in_buffer_full);
+ if (k < 0) {
+
+ if (errno == EAGAIN)
+ f->stdin_readable = false;
+ else if (IN_SET(errno, EIO, EPIPE, ECONNRESET)) {
+ f->stdin_readable = false;
+ f->stdin_hangup = true;
+
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ } else {
+ log_error_errno(errno, "read(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else if (k == 0) {
+ /* EOF on stdin */
+ f->stdin_readable = false;
+ f->stdin_hangup = true;
+
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ } else {
+ /* Check if ^] has been pressed three times within one second. If we get this we quite
+ * immediately. */
+ if (look_for_escape(f, f->in_buffer + f->in_buffer_full, k))
+ return pty_forward_done(f, -ECANCELED);
+
+ f->in_buffer_full += (size_t) k;
+ }
+ }
+
+ if (f->master_writable && f->in_buffer_full > 0) {
+
+ k = write(f->master, f->in_buffer, f->in_buffer_full);
+ if (k < 0) {
+
+ if (IN_SET(errno, EAGAIN, EIO))
+ f->master_writable = false;
+ else if (IN_SET(errno, EPIPE, ECONNRESET)) {
+ f->master_writable = f->master_readable = false;
+ f->master_hangup = true;
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ } else {
+ log_error_errno(errno, "write(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else {
+ assert(f->in_buffer_full >= (size_t) k);
+ memmove(f->in_buffer, f->in_buffer + k, f->in_buffer_full - k);
+ f->in_buffer_full -= k;
+ }
+ }
+
+ if (f->master_readable && f->out_buffer_full < LINE_MAX) {
+
+ k = read(f->master, f->out_buffer + f->out_buffer_full, LINE_MAX - f->out_buffer_full);
+ if (k < 0) {
+
+ /* Note that EIO on the master device
+ * might be caused by vhangup() or
+ * temporary closing of everything on
+ * the other side, we treat it like
+ * EAGAIN here and try again, unless
+ * ignore_vhangup is off. */
+
+ if (errno == EAGAIN || (errno == EIO && ignore_vhangup(f)))
+ f->master_readable = false;
+ else if (IN_SET(errno, EPIPE, ECONNRESET, EIO)) {
+ f->master_readable = f->master_writable = false;
+ f->master_hangup = true;
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ } else {
+ log_error_errno(errno, "read(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else {
+ f->read_from_master = true;
+ f->out_buffer_full += (size_t) k;
+ }
+ }
+
+ if (f->stdout_writable && f->out_buffer_full > 0) {
+
+ k = write(STDOUT_FILENO, f->out_buffer, f->out_buffer_full);
+ if (k < 0) {
+
+ if (errno == EAGAIN)
+ f->stdout_writable = false;
+ else if (IN_SET(errno, EIO, EPIPE, ECONNRESET)) {
+ f->stdout_writable = false;
+ f->stdout_hangup = true;
+ f->stdout_event_source = sd_event_source_unref(f->stdout_event_source);
+ } else {
+ log_error_errno(errno, "write(): %m");
+ return pty_forward_done(f, -errno);
+ }
+
+ } else {
+
+ if (k > 0) {
+ f->last_char = f->out_buffer[k-1];
+ f->last_char_set = true;
+ }
+
+ assert(f->out_buffer_full >= (size_t) k);
+ memmove(f->out_buffer, f->out_buffer + k, f->out_buffer_full - k);
+ f->out_buffer_full -= k;
+ }
+ }
+ }
+
+ if (f->stdin_hangup || f->stdout_hangup || f->master_hangup) {
+ /* Exit the loop if any side hung up and if there's
+ * nothing more to write or nothing we could write. */
+
+ if ((f->out_buffer_full <= 0 || f->stdout_hangup) &&
+ (f->in_buffer_full <= 0 || f->master_hangup))
+ return pty_forward_done(f, 0);
+ }
+
+ /* If we were asked to drain, and there's nothing more to handle from the master, then call the callback
+ * too. */
+ if (f->drain && drained(f))
+ return pty_forward_done(f, 0);
+
+ return 0;
+}
+
+static int on_master_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->master_event_source);
+ assert(fd >= 0);
+ assert(fd == f->master);
+
+ if (revents & (EPOLLIN|EPOLLHUP))
+ f->master_readable = true;
+
+ if (revents & (EPOLLOUT|EPOLLHUP))
+ f->master_writable = true;
+
+ return shovel(f);
+}
+
+static int on_stdin_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->stdin_event_source);
+ assert(fd >= 0);
+ assert(fd == STDIN_FILENO);
+
+ if (revents & (EPOLLIN|EPOLLHUP))
+ f->stdin_readable = true;
+
+ return shovel(f);
+}
+
+static int on_stdout_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->stdout_event_source);
+ assert(fd >= 0);
+ assert(fd == STDOUT_FILENO);
+
+ if (revents & (EPOLLOUT|EPOLLHUP))
+ f->stdout_writable = true;
+
+ return shovel(f);
+}
+
+static int on_sigwinch_event(sd_event_source *e, const struct signalfd_siginfo *si, void *userdata) {
+ PTYForward *f = userdata;
+ struct winsize ws;
+
+ assert(f);
+ assert(e);
+ assert(e == f->sigwinch_event_source);
+
+ /* The window size changed, let's forward that. */
+ if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &ws) >= 0)
+ (void) ioctl(f->master, TIOCSWINSZ, &ws);
+
+ return 0;
+}
+
+int pty_forward_new(
+ sd_event *event,
+ int master,
+ PTYForwardFlags flags,
+ PTYForward **ret) {
+
+ _cleanup_(pty_forward_freep) PTYForward *f = NULL;
+ struct winsize ws;
+ int r;
+
+ f = new(PTYForward, 1);
+ if (!f)
+ return -ENOMEM;
+
+ *f = (struct PTYForward) {
+ .flags = flags,
+ .master = -1,
+ };
+
+ if (event)
+ f->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&f->event);
+ if (r < 0)
+ return r;
+ }
+
+ if (!(flags & PTY_FORWARD_READ_ONLY)) {
+ r = fd_nonblock(STDIN_FILENO, true);
+ if (r < 0)
+ return r;
+
+ r = fd_nonblock(STDOUT_FILENO, true);
+ if (r < 0)
+ return r;
+ }
+
+ r = fd_nonblock(master, true);
+ if (r < 0)
+ return r;
+
+ f->master = master;
+
+ if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &ws) < 0) {
+ /* If we can't get the resolution from the output fd, then use our internal, regular width/height,
+ * i.e. something derived from $COLUMNS and $LINES if set. */
+
+ ws = (struct winsize) {
+ .ws_row = lines(),
+ .ws_col = columns(),
+ };
+ }
+
+ (void) ioctl(master, TIOCSWINSZ, &ws);
+
+ if (!(flags & PTY_FORWARD_READ_ONLY)) {
+ if (tcgetattr(STDIN_FILENO, &f->saved_stdin_attr) >= 0) {
+ struct termios raw_stdin_attr;
+
+ f->saved_stdin = true;
+
+ raw_stdin_attr = f->saved_stdin_attr;
+ cfmakeraw(&raw_stdin_attr);
+ raw_stdin_attr.c_oflag = f->saved_stdin_attr.c_oflag;
+ tcsetattr(STDIN_FILENO, TCSANOW, &raw_stdin_attr);
+ }
+
+ if (tcgetattr(STDOUT_FILENO, &f->saved_stdout_attr) >= 0) {
+ struct termios raw_stdout_attr;
+
+ f->saved_stdout = true;
+
+ raw_stdout_attr = f->saved_stdout_attr;
+ cfmakeraw(&raw_stdout_attr);
+ raw_stdout_attr.c_iflag = f->saved_stdout_attr.c_iflag;
+ raw_stdout_attr.c_lflag = f->saved_stdout_attr.c_lflag;
+ tcsetattr(STDOUT_FILENO, TCSANOW, &raw_stdout_attr);
+ }
+
+ r = sd_event_add_io(f->event, &f->stdin_event_source, STDIN_FILENO, EPOLLIN|EPOLLET, on_stdin_event, f);
+ if (r < 0 && r != -EPERM)
+ return r;
+
+ if (r >= 0)
+ (void) sd_event_source_set_description(f->stdin_event_source, "ptyfwd-stdin");
+ }
+
+ r = sd_event_add_io(f->event, &f->stdout_event_source, STDOUT_FILENO, EPOLLOUT|EPOLLET, on_stdout_event, f);
+ if (r == -EPERM)
+ /* stdout without epoll support. Likely redirected to regular file. */
+ f->stdout_writable = true;
+ else if (r < 0)
+ return r;
+ else
+ (void) sd_event_source_set_description(f->stdout_event_source, "ptyfwd-stdout");
+
+ r = sd_event_add_io(f->event, &f->master_event_source, master, EPOLLIN|EPOLLOUT|EPOLLET, on_master_event, f);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(f->master_event_source, "ptyfwd-master");
+
+ r = sd_event_add_signal(f->event, &f->sigwinch_event_source, SIGWINCH, on_sigwinch_event, f);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(f->sigwinch_event_source, "ptyfwd-sigwinch");
+
+ *ret = TAKE_PTR(f);
+
+ return 0;
+}
+
+PTYForward *pty_forward_free(PTYForward *f) {
+ pty_forward_disconnect(f);
+ return mfree(f);
+}
+
+int pty_forward_get_last_char(PTYForward *f, char *ch) {
+ assert(f);
+ assert(ch);
+
+ if (!f->last_char_set)
+ return -ENXIO;
+
+ *ch = f->last_char;
+ return 0;
+}
+
+int pty_forward_set_ignore_vhangup(PTYForward *f, bool b) {
+ int r;
+
+ assert(f);
+
+ if (!!(f->flags & PTY_FORWARD_IGNORE_VHANGUP) == b)
+ return 0;
+
+ SET_FLAG(f->flags, PTY_FORWARD_IGNORE_VHANGUP, b);
+
+ if (!ignore_vhangup(f)) {
+
+ /* We shall now react to vhangup()s? Let's check
+ * immediately if we might be in one */
+
+ f->master_readable = true;
+ r = shovel(f);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+bool pty_forward_get_ignore_vhangup(PTYForward *f) {
+ assert(f);
+
+ return !!(f->flags & PTY_FORWARD_IGNORE_VHANGUP);
+}
+
+bool pty_forward_is_done(PTYForward *f) {
+ assert(f);
+
+ return f->done;
+}
+
+void pty_forward_set_handler(PTYForward *f, PTYForwardHandler cb, void *userdata) {
+ assert(f);
+
+ f->handler = cb;
+ f->userdata = userdata;
+}
+
+bool pty_forward_drain(PTYForward *f) {
+ assert(f);
+
+ /* Starts draining the forwarder. Specifically:
+ *
+ * - Returns true if there are no unprocessed bytes from the pty, false otherwise
+ *
+ * - Makes sure the handler function is called the next time the number of unprocessed bytes hits zero
+ */
+
+ f->drain = true;
+ return drained(f);
+}
+
+int pty_forward_set_priority(PTYForward *f, int64_t priority) {
+ int r;
+ assert(f);
+
+ r = sd_event_source_set_priority(f->stdin_event_source, priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(f->stdout_event_source, priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(f->master_event_source, priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(f->sigwinch_event_source, priority);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height) {
+ struct winsize ws;
+
+ assert(f);
+
+ if (width == (unsigned) -1 && height == (unsigned) -1)
+ return 0; /* noop */
+
+ if (width != (unsigned) -1 &&
+ (width == 0 || width > USHRT_MAX))
+ return -ERANGE;
+
+ if (height != (unsigned) -1 &&
+ (height == 0 || height > USHRT_MAX))
+ return -ERANGE;
+
+ if (width == (unsigned) -1 || height == (unsigned) -1) {
+ if (ioctl(f->master, TIOCGWINSZ, &ws) < 0)
+ return -errno;
+
+ if (width != (unsigned) -1)
+ ws.ws_col = width;
+ if (height != (unsigned) -1)
+ ws.ws_row = height;
+ } else
+ ws = (struct winsize) {
+ .ws_row = height,
+ .ws_col = width,
+ };
+
+ if (ioctl(f->master, TIOCSWINSZ, &ws) < 0)
+ return -errno;
+
+ /* Make sure we ignore SIGWINCH window size events from now on */
+ f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source);
+
+ return 0;
+}
diff --git a/src/shared/ptyfwd.h b/src/shared/ptyfwd.h
new file mode 100644
index 0000000..887d3cb
--- /dev/null
+++ b/src/shared/ptyfwd.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-event.h"
+
+#include "macro.h"
+
+typedef struct PTYForward PTYForward;
+
+typedef enum PTYForwardFlags {
+ PTY_FORWARD_READ_ONLY = 1,
+
+ /* Continue reading after hangup? */
+ PTY_FORWARD_IGNORE_VHANGUP = 2,
+
+ /* Continue reading after hangup but only if we never read anything else? */
+ PTY_FORWARD_IGNORE_INITIAL_VHANGUP = 4,
+} PTYForwardFlags;
+
+typedef int (*PTYForwardHandler)(PTYForward *f, int rcode, void *userdata);
+
+int pty_forward_new(sd_event *event, int master, PTYForwardFlags flags, PTYForward **f);
+PTYForward *pty_forward_free(PTYForward *f);
+
+int pty_forward_get_last_char(PTYForward *f, char *ch);
+
+int pty_forward_set_ignore_vhangup(PTYForward *f, bool ignore_vhangup);
+bool pty_forward_get_ignore_vhangup(PTYForward *f);
+
+bool pty_forward_is_done(PTYForward *f);
+
+void pty_forward_set_handler(PTYForward *f, PTYForwardHandler handler, void *userdata);
+
+bool pty_forward_drain(PTYForward *f);
+
+int pty_forward_set_priority(PTYForward *f, int64_t priority);
+
+int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(PTYForward*, pty_forward_free);
diff --git a/src/shared/reboot-util.c b/src/shared/reboot-util.c
new file mode 100644
index 0000000..ca40159
--- /dev/null
+++ b/src/shared/reboot-util.c
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "raw-reboot.h"
+#include "reboot-util.h"
+#include "string-util.h"
+#include "umask-util.h"
+#include "virt.h"
+
+int update_reboot_parameter_and_warn(const char *parameter) {
+ int r;
+
+ if (isempty(parameter)) {
+ if (unlink("/run/systemd/reboot-param") < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
+ }
+
+ return 0;
+ }
+
+ RUN_WITH_UMASK(0022) {
+ r = write_string_file("/run/systemd/reboot-param", parameter,
+ WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to write reboot parameter file: %m");
+ }
+
+ return 0;
+}
+
+int reboot_with_parameter(RebootFlags flags) {
+ int r;
+
+ /* Reboots the system with a parameter that is read from /run/systemd/reboot-param. Returns 0 if REBOOT_DRY_RUN
+ * was set and the actual reboot operation was hence skipped. If REBOOT_FALLBACK is set and the reboot with
+ * parameter doesn't work out a fallback to classic reboot() is attempted. If REBOOT_FALLBACK is not set, 0 is
+ * returned instead, which should be considered indication for the caller to fall back to reboot() on its own,
+ * or somehow else deal with this. If REBOOT_LOG is specified will log about what it is going to do, as well as
+ * all errors. */
+
+ if (detect_container() == 0) {
+ _cleanup_free_ char *parameter = NULL;
+
+ r = read_one_line_file("/run/systemd/reboot-param", &parameter);
+ if (r < 0 && r != -ENOENT)
+ log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, r,
+ "Failed to read reboot parameter file, ignoring: %m");
+
+ if (!isempty(parameter)) {
+
+ log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG,
+ "Rebooting with argument '%s'.", parameter);
+
+ if (flags & REBOOT_DRY_RUN)
+ return 0;
+
+ (void) raw_reboot(LINUX_REBOOT_CMD_RESTART2, parameter);
+
+ log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, errno,
+ "Failed to reboot with parameter, retrying without: %m");
+ }
+ }
+
+ if (!(flags & REBOOT_FALLBACK))
+ return 0;
+
+ log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG, "Rebooting.");
+
+ if (flags & REBOOT_DRY_RUN)
+ return 0;
+
+ (void) reboot(RB_AUTOBOOT);
+
+ return log_full_errno(flags & REBOOT_LOG ? LOG_ERR : LOG_DEBUG, errno, "Failed to reboot: %m");
+}
diff --git a/src/shared/reboot-util.h b/src/shared/reboot-util.h
new file mode 100644
index 0000000..d459333
--- /dev/null
+++ b/src/shared/reboot-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int update_reboot_parameter_and_warn(const char *parameter);
+
+typedef enum RebootFlags {
+ REBOOT_LOG = 1 << 0, /* log about what we are going to do and all errors */
+ REBOOT_DRY_RUN = 1 << 1, /* return 0 right before actually doing the reboot */
+ REBOOT_FALLBACK = 1 << 2, /* fallback to plain reboot() if argument-based reboot doesn't work, isn't configured or doesn't apply otherwise */
+} RebootFlags;
+
+int reboot_with_parameter(RebootFlags flags);
diff --git a/src/shared/resolve-util.c b/src/shared/resolve-util.c
new file mode 100644
index 0000000..a5d4a14
--- /dev/null
+++ b/src/shared/resolve-util.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "conf-parser.h"
+#include "resolve-util.h"
+#include "string-table.h"
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_resolve_support, resolve_support, ResolveSupport, "Failed to parse resolve support setting");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dnssec_mode, dnssec_mode, DnssecMode, "Failed to parse DNSSEC mode setting");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dns_over_tls_mode, dns_over_tls_mode, DnsOverTlsMode, "Failed to parse DNS-over-TLS mode setting");
+
+static const char* const resolve_support_table[_RESOLVE_SUPPORT_MAX] = {
+ [RESOLVE_SUPPORT_NO] = "no",
+ [RESOLVE_SUPPORT_YES] = "yes",
+ [RESOLVE_SUPPORT_RESOLVE] = "resolve",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(resolve_support, ResolveSupport, RESOLVE_SUPPORT_YES);
+
+static const char* const dnssec_mode_table[_DNSSEC_MODE_MAX] = {
+ [DNSSEC_NO] = "no",
+ [DNSSEC_ALLOW_DOWNGRADE] = "allow-downgrade",
+ [DNSSEC_YES] = "yes",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dnssec_mode, DnssecMode, DNSSEC_YES);
+
+static const char* const dns_over_tls_mode_table[_DNS_OVER_TLS_MODE_MAX] = {
+ [DNS_OVER_TLS_NO] = "no",
+ [DNS_OVER_TLS_OPPORTUNISTIC] = "opportunistic",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_over_tls_mode, DnsOverTlsMode, _DNS_OVER_TLS_MODE_INVALID);
diff --git a/src/shared/resolve-util.h b/src/shared/resolve-util.h
new file mode 100644
index 0000000..5883342
--- /dev/null
+++ b/src/shared/resolve-util.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "conf-parser.h"
+#include "macro.h"
+
+typedef enum ResolveSupport ResolveSupport;
+typedef enum DnssecMode DnssecMode;
+typedef enum DnsOverTlsMode DnsOverTlsMode;
+
+enum ResolveSupport {
+ RESOLVE_SUPPORT_NO,
+ RESOLVE_SUPPORT_YES,
+ RESOLVE_SUPPORT_RESOLVE,
+ _RESOLVE_SUPPORT_MAX,
+ _RESOLVE_SUPPORT_INVALID = -1
+};
+
+enum DnssecMode {
+ /* No DNSSEC validation is done */
+ DNSSEC_NO,
+
+ /* Validate locally, if the server knows DO, but if not,
+ * don't. Don't trust the AD bit. If the server doesn't do
+ * DNSSEC properly, downgrade to non-DNSSEC operation. Of
+ * course, we then are vulnerable to a downgrade attack, but
+ * that's life and what is configured. */
+ DNSSEC_ALLOW_DOWNGRADE,
+
+ /* Insist on DNSSEC server support, and rather fail than downgrading. */
+ DNSSEC_YES,
+
+ _DNSSEC_MODE_MAX,
+ _DNSSEC_MODE_INVALID = -1
+};
+
+enum DnsOverTlsMode {
+ /* No connection is made for DNS-over-TLS */
+ DNS_OVER_TLS_NO,
+
+ /* Try to connect using DNS-over-TLS, but if connection fails,
+ * fallback to using an unencrypted connection */
+ DNS_OVER_TLS_OPPORTUNISTIC,
+
+ _DNS_OVER_TLS_MODE_MAX,
+ _DNS_OVER_TLS_MODE_INVALID = -1
+};
+
+CONFIG_PARSER_PROTOTYPE(config_parse_resolve_support);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssec_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_dns_over_tls_mode);
+
+const char* resolve_support_to_string(ResolveSupport p) _const_;
+ResolveSupport resolve_support_from_string(const char *s) _pure_;
+
+const char* dnssec_mode_to_string(DnssecMode p) _const_;
+DnssecMode dnssec_mode_from_string(const char *s) _pure_;
+
+const char* dns_over_tls_mode_to_string(DnsOverTlsMode p) _const_;
+DnsOverTlsMode dns_over_tls_mode_from_string(const char *s) _pure_;
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
new file mode 100644
index 0000000..cc58b3c
--- /dev/null
+++ b/src/shared/seccomp-util.c
@@ -0,0 +1,1764 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <linux/seccomp.h>
+#include <seccomp.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/shm.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "macro.h"
+#include "nsflags.h"
+#include "process-util.h"
+#include "seccomp-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+#include "errno-list.h"
+
+const uint32_t seccomp_local_archs[] = {
+
+ /* Note: always list the native arch we are compiled as last, so that users can blacklist seccomp(), but our own calls to it still succeed */
+
+#if defined(__x86_64__) && defined(__ILP32__)
+ SCMP_ARCH_X86,
+ SCMP_ARCH_X86_64,
+ SCMP_ARCH_X32, /* native */
+#elif defined(__x86_64__) && !defined(__ILP32__)
+ SCMP_ARCH_X86,
+ SCMP_ARCH_X32,
+ SCMP_ARCH_X86_64, /* native */
+#elif defined(__i386__)
+ SCMP_ARCH_X86,
+#elif defined(__aarch64__)
+ SCMP_ARCH_ARM,
+ SCMP_ARCH_AARCH64, /* native */
+#elif defined(__arm__)
+ SCMP_ARCH_ARM,
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64N32, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64N32, /* native */
+#elif defined(__powerpc64__) && __BYTE_ORDER == __BIG_ENDIAN
+ SCMP_ARCH_PPC,
+ SCMP_ARCH_PPC64LE,
+ SCMP_ARCH_PPC64, /* native */
+#elif defined(__powerpc64__) && __BYTE_ORDER == __LITTLE_ENDIAN
+ SCMP_ARCH_PPC,
+ SCMP_ARCH_PPC64,
+ SCMP_ARCH_PPC64LE, /* native */
+#elif defined(__powerpc__)
+ SCMP_ARCH_PPC,
+#elif defined(__s390x__)
+ SCMP_ARCH_S390,
+ SCMP_ARCH_S390X, /* native */
+#elif defined(__s390__)
+ SCMP_ARCH_S390,
+#endif
+ (uint32_t) -1
+ };
+
+const char* seccomp_arch_to_string(uint32_t c) {
+ /* Maintain order used in <seccomp.h>.
+ *
+ * Names used here should be the same as those used for ConditionArchitecture=,
+ * except for "subarchitectures" like x32. */
+
+ switch(c) {
+ case SCMP_ARCH_NATIVE:
+ return "native";
+ case SCMP_ARCH_X86:
+ return "x86";
+ case SCMP_ARCH_X86_64:
+ return "x86-64";
+ case SCMP_ARCH_X32:
+ return "x32";
+ case SCMP_ARCH_ARM:
+ return "arm";
+ case SCMP_ARCH_AARCH64:
+ return "arm64";
+ case SCMP_ARCH_MIPS:
+ return "mips";
+ case SCMP_ARCH_MIPS64:
+ return "mips64";
+ case SCMP_ARCH_MIPS64N32:
+ return "mips64-n32";
+ case SCMP_ARCH_MIPSEL:
+ return "mips-le";
+ case SCMP_ARCH_MIPSEL64:
+ return "mips64-le";
+ case SCMP_ARCH_MIPSEL64N32:
+ return "mips64-le-n32";
+ case SCMP_ARCH_PPC:
+ return "ppc";
+ case SCMP_ARCH_PPC64:
+ return "ppc64";
+ case SCMP_ARCH_PPC64LE:
+ return "ppc64-le";
+ case SCMP_ARCH_S390:
+ return "s390";
+ case SCMP_ARCH_S390X:
+ return "s390x";
+ default:
+ return NULL;
+ }
+}
+
+int seccomp_arch_from_string(const char *n, uint32_t *ret) {
+ if (!n)
+ return -EINVAL;
+
+ assert(ret);
+
+ if (streq(n, "native"))
+ *ret = SCMP_ARCH_NATIVE;
+ else if (streq(n, "x86"))
+ *ret = SCMP_ARCH_X86;
+ else if (streq(n, "x86-64"))
+ *ret = SCMP_ARCH_X86_64;
+ else if (streq(n, "x32"))
+ *ret = SCMP_ARCH_X32;
+ else if (streq(n, "arm"))
+ *ret = SCMP_ARCH_ARM;
+ else if (streq(n, "arm64"))
+ *ret = SCMP_ARCH_AARCH64;
+ else if (streq(n, "mips"))
+ *ret = SCMP_ARCH_MIPS;
+ else if (streq(n, "mips64"))
+ *ret = SCMP_ARCH_MIPS64;
+ else if (streq(n, "mips64-n32"))
+ *ret = SCMP_ARCH_MIPS64N32;
+ else if (streq(n, "mips-le"))
+ *ret = SCMP_ARCH_MIPSEL;
+ else if (streq(n, "mips64-le"))
+ *ret = SCMP_ARCH_MIPSEL64;
+ else if (streq(n, "mips64-le-n32"))
+ *ret = SCMP_ARCH_MIPSEL64N32;
+ else if (streq(n, "ppc"))
+ *ret = SCMP_ARCH_PPC;
+ else if (streq(n, "ppc64"))
+ *ret = SCMP_ARCH_PPC64;
+ else if (streq(n, "ppc64-le"))
+ *ret = SCMP_ARCH_PPC64LE;
+ else if (streq(n, "s390"))
+ *ret = SCMP_ARCH_S390;
+ else if (streq(n, "s390x"))
+ *ret = SCMP_ARCH_S390X;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action) {
+ scmp_filter_ctx seccomp;
+ int r;
+
+ /* Much like seccomp_init(), but initializes the filter for one specific architecture only, without affecting
+ * any others. Also, turns off the NNP fiddling. */
+
+ seccomp = seccomp_init(default_action);
+ if (!seccomp)
+ return -ENOMEM;
+
+ if (arch != SCMP_ARCH_NATIVE &&
+ arch != seccomp_arch_native()) {
+
+ r = seccomp_arch_remove(seccomp, seccomp_arch_native());
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_arch_add(seccomp, arch);
+ if (r < 0)
+ goto finish;
+
+ assert(seccomp_arch_exist(seccomp, arch) >= 0);
+ assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) == -EEXIST);
+ assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) == -EEXIST);
+ } else {
+ assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) >= 0);
+ assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) >= 0);
+ }
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_ACT_BADARCH, SCMP_ACT_ALLOW);
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ goto finish;
+
+ *ret = seccomp;
+ return 0;
+
+finish:
+ seccomp_release(seccomp);
+ return r;
+}
+
+static bool is_basic_seccomp_available(void) {
+ return prctl(PR_GET_SECCOMP, 0, 0, 0, 0) >= 0;
+}
+
+static bool is_seccomp_filter_available(void) {
+ return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0) < 0 &&
+ errno == EFAULT;
+}
+
+bool is_seccomp_available(void) {
+ static int cached_enabled = -1;
+
+ if (cached_enabled < 0)
+ cached_enabled =
+ is_basic_seccomp_available() &&
+ is_seccomp_filter_available();
+
+ return cached_enabled;
+}
+
+const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
+ [SYSCALL_FILTER_SET_DEFAULT] = {
+ .name = "@default",
+ .help = "System calls that are always permitted",
+ .value =
+ "clock_getres\0"
+ "clock_gettime\0"
+ "clock_nanosleep\0"
+ "execve\0"
+ "exit\0"
+ "exit_group\0"
+ "futex\0"
+ "get_robust_list\0"
+ "get_thread_area\0"
+ "getegid\0"
+ "getegid32\0"
+ "geteuid\0"
+ "geteuid32\0"
+ "getgid\0"
+ "getgid32\0"
+ "getgroups\0"
+ "getgroups32\0"
+ "getpgid\0"
+ "getpgrp\0"
+ "getpid\0"
+ "getppid\0"
+ "getresgid\0"
+ "getresgid32\0"
+ "getresuid\0"
+ "getresuid32\0"
+ "getrlimit\0" /* make sure processes can query stack size and such */
+ "getsid\0"
+ "gettid\0"
+ "gettimeofday\0"
+ "getuid\0"
+ "getuid32\0"
+ "membarrier\0"
+ "nanosleep\0"
+ "pause\0"
+ "prlimit64\0"
+ "restart_syscall\0"
+ "rt_sigreturn\0"
+ "sched_yield\0"
+ "set_robust_list\0"
+ "set_thread_area\0"
+ "set_tid_address\0"
+ "set_tls\0"
+ "sigreturn\0"
+ "time\0"
+ "ugetrlimit\0"
+ },
+ [SYSCALL_FILTER_SET_AIO] = {
+ .name = "@aio",
+ .help = "Asynchronous IO",
+ .value =
+ "io_cancel\0"
+ "io_destroy\0"
+ "io_getevents\0"
+ "io_pgetevents\0"
+ "io_setup\0"
+ "io_submit\0"
+ },
+ [SYSCALL_FILTER_SET_BASIC_IO] = {
+ .name = "@basic-io",
+ .help = "Basic IO",
+ .value =
+ "_llseek\0"
+ "close\0"
+ "dup\0"
+ "dup2\0"
+ "dup3\0"
+ "lseek\0"
+ "pread64\0"
+ "preadv\0"
+ "preadv2\0"
+ "pwrite64\0"
+ "pwritev\0"
+ "pwritev2\0"
+ "read\0"
+ "readv\0"
+ "write\0"
+ "writev\0"
+ },
+ [SYSCALL_FILTER_SET_CHOWN] = {
+ .name = "@chown",
+ .help = "Change ownership of files and directories",
+ .value =
+ "chown\0"
+ "chown32\0"
+ "fchown\0"
+ "fchown32\0"
+ "fchownat\0"
+ "lchown\0"
+ "lchown32\0"
+ },
+ [SYSCALL_FILTER_SET_CLOCK] = {
+ .name = "@clock",
+ .help = "Change the system time",
+ .value =
+ "adjtimex\0"
+ "clock_adjtime\0"
+ "clock_settime\0"
+ "settimeofday\0"
+ "stime\0"
+ },
+ [SYSCALL_FILTER_SET_CPU_EMULATION] = {
+ .name = "@cpu-emulation",
+ .help = "System calls for CPU emulation functionality",
+ .value =
+ "modify_ldt\0"
+ "subpage_prot\0"
+ "switch_endian\0"
+ "vm86\0"
+ "vm86old\0"
+ },
+ [SYSCALL_FILTER_SET_DEBUG] = {
+ .name = "@debug",
+ .help = "Debugging, performance monitoring and tracing functionality",
+ .value =
+ "lookup_dcookie\0"
+ "perf_event_open\0"
+ "ptrace\0"
+ "rtas\0"
+#ifdef __NR_s390_runtime_instr
+ "s390_runtime_instr\0"
+#endif
+ "sys_debug_setcontext\0"
+ },
+ [SYSCALL_FILTER_SET_FILE_SYSTEM] = {
+ .name = "@file-system",
+ .help = "File system operations",
+ .value =
+ "access\0"
+ "chdir\0"
+ "chmod\0"
+ "close\0"
+ "creat\0"
+ "faccessat\0"
+ "fallocate\0"
+ "fchdir\0"
+ "fchmod\0"
+ "fchmodat\0"
+ "fcntl\0"
+ "fcntl64\0"
+ "fgetxattr\0"
+ "flistxattr\0"
+ "fremovexattr\0"
+ "fsetxattr\0"
+ "fstat\0"
+ "fstat64\0"
+ "fstatat64\0"
+ "fstatfs\0"
+ "fstatfs64\0"
+ "ftruncate\0"
+ "ftruncate64\0"
+ "futimesat\0"
+ "getcwd\0"
+ "getdents\0"
+ "getdents64\0"
+ "getxattr\0"
+ "inotify_add_watch\0"
+ "inotify_init\0"
+ "inotify_init1\0"
+ "inotify_rm_watch\0"
+ "lgetxattr\0"
+ "link\0"
+ "linkat\0"
+ "listxattr\0"
+ "llistxattr\0"
+ "lremovexattr\0"
+ "lsetxattr\0"
+ "lstat\0"
+ "lstat64\0"
+ "mkdir\0"
+ "mkdirat\0"
+ "mknod\0"
+ "mknodat\0"
+ "mmap\0"
+ "mmap2\0"
+ "munmap\0"
+ "newfstatat\0"
+ "oldfstat\0"
+ "oldlstat\0"
+ "oldstat\0"
+ "open\0"
+ "openat\0"
+ "readlink\0"
+ "readlinkat\0"
+ "removexattr\0"
+ "rename\0"
+ "renameat\0"
+ "renameat2\0"
+ "rmdir\0"
+ "setxattr\0"
+ "stat\0"
+ "stat64\0"
+ "statfs\0"
+ "statfs64\0"
+#ifdef __NR_statx
+ "statx\0"
+#endif
+ "symlink\0"
+ "symlinkat\0"
+ "truncate\0"
+ "truncate64\0"
+ "unlink\0"
+ "unlinkat\0"
+ "utime\0"
+ "utimensat\0"
+ "utimes\0"
+ },
+ [SYSCALL_FILTER_SET_IO_EVENT] = {
+ .name = "@io-event",
+ .help = "Event loop system calls",
+ .value =
+ "_newselect\0"
+ "epoll_create\0"
+ "epoll_create1\0"
+ "epoll_ctl\0"
+ "epoll_ctl_old\0"
+ "epoll_pwait\0"
+ "epoll_wait\0"
+ "epoll_wait_old\0"
+ "eventfd\0"
+ "eventfd2\0"
+ "poll\0"
+ "ppoll\0"
+ "pselect6\0"
+ "select\0"
+ },
+ [SYSCALL_FILTER_SET_IPC] = {
+ .name = "@ipc",
+ .help = "SysV IPC, POSIX Message Queues or other IPC",
+ .value =
+ "ipc\0"
+ "memfd_create\0"
+ "mq_getsetattr\0"
+ "mq_notify\0"
+ "mq_open\0"
+ "mq_timedreceive\0"
+ "mq_timedsend\0"
+ "mq_unlink\0"
+ "msgctl\0"
+ "msgget\0"
+ "msgrcv\0"
+ "msgsnd\0"
+ "pipe\0"
+ "pipe2\0"
+ "process_vm_readv\0"
+ "process_vm_writev\0"
+ "semctl\0"
+ "semget\0"
+ "semop\0"
+ "semtimedop\0"
+ "shmat\0"
+ "shmctl\0"
+ "shmdt\0"
+ "shmget\0"
+ },
+ [SYSCALL_FILTER_SET_KEYRING] = {
+ .name = "@keyring",
+ .help = "Kernel keyring access",
+ .value =
+ "add_key\0"
+ "keyctl\0"
+ "request_key\0"
+ },
+ [SYSCALL_FILTER_SET_MEMLOCK] = {
+ .name = "@memlock",
+ .help = "Memory locking control",
+ .value =
+ "mlock\0"
+ "mlock2\0"
+ "mlockall\0"
+ "munlock\0"
+ "munlockall\0"
+ },
+ [SYSCALL_FILTER_SET_MODULE] = {
+ .name = "@module",
+ .help = "Loading and unloading of kernel modules",
+ .value =
+ "delete_module\0"
+ "finit_module\0"
+ "init_module\0"
+ },
+ [SYSCALL_FILTER_SET_MOUNT] = {
+ .name = "@mount",
+ .help = "Mounting and unmounting of file systems",
+ .value =
+ "chroot\0"
+ "mount\0"
+ "pivot_root\0"
+ "umount\0"
+ "umount2\0"
+ },
+ [SYSCALL_FILTER_SET_NETWORK_IO] = {
+ .name = "@network-io",
+ .help = "Network or Unix socket IO, should not be needed if not network facing",
+ .value =
+ "accept\0"
+ "accept4\0"
+ "bind\0"
+ "connect\0"
+ "getpeername\0"
+ "getsockname\0"
+ "getsockopt\0"
+ "listen\0"
+ "recv\0"
+ "recvfrom\0"
+ "recvmmsg\0"
+ "recvmsg\0"
+ "send\0"
+ "sendmmsg\0"
+ "sendmsg\0"
+ "sendto\0"
+ "setsockopt\0"
+ "shutdown\0"
+ "socket\0"
+ "socketcall\0"
+ "socketpair\0"
+ },
+ [SYSCALL_FILTER_SET_OBSOLETE] = {
+ /* some unknown even to libseccomp */
+ .name = "@obsolete",
+ .help = "Unusual, obsolete or unimplemented system calls",
+ .value =
+ "_sysctl\0"
+ "afs_syscall\0"
+ "bdflush\0"
+ "break\0"
+ "create_module\0"
+ "ftime\0"
+ "get_kernel_syms\0"
+ "getpmsg\0"
+ "gtty\0"
+ "idle\0"
+ "lock\0"
+ "mpx\0"
+ "prof\0"
+ "profil\0"
+ "putpmsg\0"
+ "query_module\0"
+ "security\0"
+ "sgetmask\0"
+ "ssetmask\0"
+ "stty\0"
+ "sysfs\0"
+ "tuxcall\0"
+ "ulimit\0"
+ "uselib\0"
+ "ustat\0"
+ "vserver\0"
+ },
+ [SYSCALL_FILTER_SET_PRIVILEGED] = {
+ .name = "@privileged",
+ .help = "All system calls which need super-user capabilities",
+ .value =
+ "@chown\0"
+ "@clock\0"
+ "@module\0"
+ "@raw-io\0"
+ "@reboot\0"
+ "@swap\0"
+ "_sysctl\0"
+ "acct\0"
+ "bpf\0"
+ "capset\0"
+ "chroot\0"
+ "fanotify_init\0"
+ "nfsservctl\0"
+ "open_by_handle_at\0"
+ "pivot_root\0"
+ "quotactl\0"
+ "setdomainname\0"
+ "setfsuid\0"
+ "setfsuid32\0"
+ "setgroups\0"
+ "setgroups32\0"
+ "sethostname\0"
+ "setresuid\0"
+ "setresuid32\0"
+ "setreuid\0"
+ "setreuid32\0"
+ "setuid\0" /* We list the explicit system calls here, as @setuid also includes setgid() which is not necessarily privileged */
+ "setuid32\0"
+ "vhangup\0"
+ },
+ [SYSCALL_FILTER_SET_PROCESS] = {
+ .name = "@process",
+ .help = "Process control, execution, namespaceing operations",
+ .value =
+ "arch_prctl\0"
+ "capget\0" /* Able to query arbitrary processes */
+ "clone\0"
+ "execveat\0"
+ "fork\0"
+ "getrusage\0"
+ "kill\0"
+ "prctl\0"
+ "rt_sigqueueinfo\0"
+ "rt_tgsigqueueinfo\0"
+ "setns\0"
+ "swapcontext\0" /* Some archs e.g. powerpc32 are using it to do userspace context switches */
+ "tgkill\0"
+ "times\0"
+ "tkill\0"
+ "unshare\0"
+ "vfork\0"
+ "wait4\0"
+ "waitid\0"
+ "waitpid\0"
+ },
+ [SYSCALL_FILTER_SET_RAW_IO] = {
+ .name = "@raw-io",
+ .help = "Raw I/O port access",
+ .value =
+ "ioperm\0"
+ "iopl\0"
+ "pciconfig_iobase\0"
+ "pciconfig_read\0"
+ "pciconfig_write\0"
+#ifdef __NR_s390_pci_mmio_read
+ "s390_pci_mmio_read\0"
+#endif
+#ifdef __NR_s390_pci_mmio_write
+ "s390_pci_mmio_write\0"
+#endif
+ },
+ [SYSCALL_FILTER_SET_REBOOT] = {
+ .name = "@reboot",
+ .help = "Reboot and reboot preparation/kexec",
+ .value =
+ "kexec_file_load\0"
+ "kexec_load\0"
+ "reboot\0"
+ },
+ [SYSCALL_FILTER_SET_RESOURCES] = {
+ .name = "@resources",
+ .help = "Alter resource settings",
+ .value =
+ "ioprio_set\0"
+ "mbind\0"
+ "migrate_pages\0"
+ "move_pages\0"
+ "nice\0"
+ "sched_setaffinity\0"
+ "sched_setattr\0"
+ "sched_setparam\0"
+ "sched_setscheduler\0"
+ "set_mempolicy\0"
+ "setpriority\0"
+ "setrlimit\0"
+ },
+ [SYSCALL_FILTER_SET_SETUID] = {
+ .name = "@setuid",
+ .help = "Operations for changing user/group credentials",
+ .value =
+ "setgid\0"
+ "setgid32\0"
+ "setgroups\0"
+ "setgroups32\0"
+ "setregid\0"
+ "setregid32\0"
+ "setresgid\0"
+ "setresgid32\0"
+ "setresuid\0"
+ "setresuid32\0"
+ "setreuid\0"
+ "setreuid32\0"
+ "setuid\0"
+ "setuid32\0"
+ },
+ [SYSCALL_FILTER_SET_SIGNAL] = {
+ .name = "@signal",
+ .help = "Process signal handling",
+ .value =
+ "rt_sigaction\0"
+ "rt_sigpending\0"
+ "rt_sigprocmask\0"
+ "rt_sigsuspend\0"
+ "rt_sigtimedwait\0"
+ "sigaction\0"
+ "sigaltstack\0"
+ "signal\0"
+ "signalfd\0"
+ "signalfd4\0"
+ "sigpending\0"
+ "sigprocmask\0"
+ "sigsuspend\0"
+ },
+ [SYSCALL_FILTER_SET_SWAP] = {
+ .name = "@swap",
+ .help = "Enable/disable swap devices",
+ .value =
+ "swapoff\0"
+ "swapon\0"
+ },
+ [SYSCALL_FILTER_SET_SYNC] = {
+ .name = "@sync",
+ .help = "Synchronize files and memory to storage",
+ .value =
+ "fdatasync\0"
+ "fsync\0"
+ "msync\0"
+ "sync\0"
+ "sync_file_range\0"
+ "syncfs\0"
+ },
+ [SYSCALL_FILTER_SET_SYSTEM_SERVICE] = {
+ .name = "@system-service",
+ .help = "General system service operations",
+ .value =
+ "@aio\0"
+ "@basic-io\0"
+ "@chown\0"
+ "@default\0"
+ "@file-system\0"
+ "@io-event\0"
+ "@ipc\0"
+ "@keyring\0"
+ "@memlock\0"
+ "@network-io\0"
+ "@process\0"
+ "@resources\0"
+ "@setuid\0"
+ "@signal\0"
+ "@sync\0"
+ "@timer\0"
+ "brk\0"
+ "capget\0"
+ "capset\0"
+ "copy_file_range\0"
+ "fadvise64\0"
+ "fadvise64_64\0"
+ "flock\0"
+ "get_mempolicy\0"
+ "getcpu\0"
+ "getpriority\0"
+ "getrandom\0"
+ "ioctl\0"
+ "ioprio_get\0"
+ "kcmp\0"
+ "madvise\0"
+ "mprotect\0"
+ "mremap\0"
+ "name_to_handle_at\0"
+ "oldolduname\0"
+ "olduname\0"
+ "personality\0"
+ "readahead\0"
+ "readdir\0"
+ "remap_file_pages\0"
+ "sched_get_priority_max\0"
+ "sched_get_priority_min\0"
+ "sched_getaffinity\0"
+ "sched_getattr\0"
+ "sched_getparam\0"
+ "sched_getscheduler\0"
+ "sched_rr_get_interval\0"
+ "sched_yield\0"
+ "sendfile\0"
+ "sendfile64\0"
+ "setfsgid\0"
+ "setfsgid32\0"
+ "setfsuid\0"
+ "setfsuid32\0"
+ "setpgid\0"
+ "setsid\0"
+ "splice\0"
+ "sysinfo\0"
+ "tee\0"
+ "umask\0"
+ "uname\0"
+ "userfaultfd\0"
+ "vmsplice\0"
+ },
+ [SYSCALL_FILTER_SET_TIMER] = {
+ .name = "@timer",
+ .help = "Schedule operations by time",
+ .value =
+ "alarm\0"
+ "getitimer\0"
+ "setitimer\0"
+ "timer_create\0"
+ "timer_delete\0"
+ "timer_getoverrun\0"
+ "timer_gettime\0"
+ "timer_settime\0"
+ "timerfd_create\0"
+ "timerfd_gettime\0"
+ "timerfd_settime\0"
+ "times\0"
+ },
+};
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name) {
+ unsigned i;
+
+ if (isempty(name) || name[0] != '@')
+ return NULL;
+
+ for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++)
+ if (streq(syscall_filter_sets[i].name, name))
+ return syscall_filter_sets + i;
+
+ return NULL;
+}
+
+static int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action, char **exclude, bool log_missing);
+
+int seccomp_add_syscall_filter_item(scmp_filter_ctx *seccomp, const char *name, uint32_t action, char **exclude, bool log_missing) {
+ assert(seccomp);
+ assert(name);
+
+ if (strv_contains(exclude, name))
+ return 0;
+
+ if (name[0] == '@') {
+ const SyscallFilterSet *other;
+
+ other = syscall_filter_set_find(name);
+ if (!other)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Filter set %s is not known!",
+ name);
+
+ return seccomp_add_syscall_filter_set(seccomp, other, action, exclude, log_missing);
+
+ } else {
+ int id, r;
+
+ id = seccomp_syscall_resolve_name(name);
+ if (id == __NR_SCMP_ERROR) {
+ if (log_missing)
+ log_debug("System call %s is not known, ignoring.", name);
+ return 0;
+ }
+
+ r = seccomp_rule_add_exact(seccomp, action, id, 0);
+ if (r < 0) {
+ /* If the system call is not known on this architecture, then that's fine, let's ignore it */
+ bool ignore = r == -EDOM;
+
+ if (!ignore || log_missing)
+ log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m",
+ name, id, ignore ? ", ignoring" : "");
+ if (!ignore)
+ return r;
+ }
+
+ return 0;
+ }
+}
+
+static int seccomp_add_syscall_filter_set(
+ scmp_filter_ctx seccomp,
+ const SyscallFilterSet *set,
+ uint32_t action,
+ char **exclude,
+ bool log_missing) {
+
+ const char *sys;
+ int r;
+
+ assert(seccomp);
+ assert(set);
+
+ NULSTR_FOREACH(sys, set->value) {
+ r = seccomp_add_syscall_filter_item(seccomp, sys, action, exclude, log_missing);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing) {
+ uint32_t arch;
+ int r;
+
+ assert(set);
+
+ /* The one-stop solution: allocate a seccomp object, add the specified filter to it, and apply it. Once for
+ * each local arch. */
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, default_action);
+ if (r < 0)
+ return r;
+
+ r = seccomp_add_syscall_filter_set(seccomp, set, action, NULL, log_missing);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add filter set: %m");
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing) {
+ uint32_t arch;
+ int r;
+
+ /* Similar to seccomp_load_syscall_filter_set(), but takes a raw Set* of syscalls, instead of a
+ * SyscallFilterSet* table. */
+
+ if (hashmap_isempty(set) && default_action == SCMP_ACT_ALLOW)
+ return 0;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ Iterator i;
+ void *syscall_id, *val;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, default_action);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(val, syscall_id, set, i) {
+ uint32_t a = action;
+ int id = PTR_TO_INT(syscall_id) - 1;
+ int error = PTR_TO_INT(val);
+
+ if (action != SCMP_ACT_ALLOW && error >= 0)
+ a = SCMP_ACT_ERRNO(error);
+
+ r = seccomp_rule_add_exact(seccomp, a, id, 0);
+ if (r < 0) {
+ /* If the system call is not known on this architecture, then that's fine, let's ignore it */
+ _cleanup_free_ char *n = NULL;
+ bool ignore;
+
+ n = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, id);
+ ignore = r == -EDOM;
+ if (!ignore || log_missing)
+ log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m",
+ strna(n), id, ignore ? ", ignoring" : "");
+ if (!ignore)
+ return r;
+ }
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_parse_syscall_filter_full(
+ const char *name,
+ int errno_num,
+ Hashmap *filter,
+ SeccompParseFlags flags,
+ const char *unit,
+ const char *filename,
+ unsigned line) {
+
+ int r;
+
+ assert(name);
+ assert(filter);
+
+ if (name[0] == '@') {
+ const SyscallFilterSet *set;
+ const char *i;
+
+ set = syscall_filter_set_find(name);
+ if (!set) {
+ if (!(flags & SECCOMP_PARSE_PERMISSIVE))
+ return -EINVAL;
+
+ log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
+ "Unknown system call group, ignoring: %s", name);
+ return 0;
+ }
+
+ NULSTR_FOREACH(i, set->value) {
+ /* Call ourselves again, for the group to parse. Note that we downgrade logging here (i.e. take
+ * away the SECCOMP_PARSE_LOG flag) since any issues in the group table are our own problem,
+ * not a problem in user configuration data and we shouldn't pretend otherwise by complaining
+ * about them. */
+ r = seccomp_parse_syscall_filter_full(i, errno_num, filter, flags &~ SECCOMP_PARSE_LOG, unit, filename, line);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ int id;
+
+ id = seccomp_syscall_resolve_name(name);
+ if (id == __NR_SCMP_ERROR) {
+ if (!(flags & SECCOMP_PARSE_PERMISSIVE))
+ return -EINVAL;
+
+ log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
+ "Failed to parse system call, ignoring: %s", name);
+ return 0;
+ }
+
+ /* If we previously wanted to forbid a syscall and now
+ * we want to allow it, then remove it from the list. */
+ if (!(flags & SECCOMP_PARSE_INVERT) == !!(flags & SECCOMP_PARSE_WHITELIST)) {
+ r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num));
+ if (r < 0)
+ switch (r) {
+ case -ENOMEM:
+ return flags & SECCOMP_PARSE_LOG ? log_oom() : -ENOMEM;
+ case -EEXIST:
+ assert_se(hashmap_update(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num)) == 0);
+ break;
+ default:
+ return r;
+ }
+ } else
+ (void) hashmap_remove(filter, INT_TO_PTR(id + 1));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_namespaces(unsigned long retain) {
+ uint32_t arch;
+ int r;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *s = NULL;
+
+ (void) namespace_flags_to_string(retain, &s);
+ log_debug("Restricting namespace to: %s.", strna(s));
+ }
+
+ /* NOOP? */
+ if ((retain & NAMESPACE_FLAGS_ALL) == NAMESPACE_FLAGS_ALL)
+ return 0;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ unsigned i;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ if ((retain & NAMESPACE_FLAGS_ALL) == 0)
+ /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall
+ * altogether. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 0);
+ else
+ /* Otherwise, block only the invocations with the appropriate flags in the loop below, but also the
+ * special invocation with a zero flags argument, right here. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 1,
+ SCMP_A1(SCMP_CMP_EQ, 0));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ for (i = 0; namespace_flag_map[i].name; i++) {
+ unsigned long f;
+
+ f = namespace_flag_map[i].flag;
+ if ((retain & f) == f) {
+ log_debug("Permitting %s.", namespace_flag_map[i].name);
+ continue;
+ }
+
+ log_debug("Blocking %s.", namespace_flag_map[i].name);
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(unshare),
+ 1,
+ SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add unshare() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+
+ /* On s390/s390x the first two parameters to clone are switched */
+ if (!IN_SET(arch, SCMP_ARCH_S390, SCMP_ARCH_S390X))
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ else
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+
+ if ((retain & NAMESPACE_FLAGS_ALL) != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+ }
+ }
+ if (r < 0)
+ continue;
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install namespace restriction rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_protect_sysctl(void) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ if (IN_SET(arch, SCMP_ARCH_X32, SCMP_ARCH_AARCH64))
+ /* No _sysctl syscall */
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(_sysctl),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add _sysctl() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install sysctl protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_address_families(Set *address_families, bool whitelist) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ bool supported;
+ Iterator i;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ switch (arch) {
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ case SCMP_ARCH_ARM:
+ case SCMP_ARCH_AARCH64:
+ case SCMP_ARCH_PPC:
+ case SCMP_ARCH_PPC64:
+ case SCMP_ARCH_PPC64LE:
+ case SCMP_ARCH_MIPSEL64N32:
+ case SCMP_ARCH_MIPS64N32:
+ case SCMP_ARCH_MIPSEL64:
+ case SCMP_ARCH_MIPS64:
+ /* These we know we support (i.e. are the ones that do not use socketcall()) */
+ supported = true;
+ break;
+
+ case SCMP_ARCH_S390:
+ case SCMP_ARCH_S390X:
+ case SCMP_ARCH_X86:
+ case SCMP_ARCH_MIPSEL:
+ case SCMP_ARCH_MIPS:
+ default:
+ /* These we either know we don't support (i.e. are the ones that do use socketcall()), or we
+ * don't know */
+ supported = false;
+ break;
+ }
+
+ if (!supported)
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ if (whitelist) {
+ int af, first = 0, last = 0;
+ void *afp;
+
+ /* If this is a whitelist, we first block the address families that are out of range and then
+ * everything that is not in the set. First, we find the lowest and highest address family in
+ * the set. */
+
+ SET_FOREACH(afp, address_families, i) {
+ af = PTR_TO_INT(afp);
+
+ if (af <= 0 || af >= af_max())
+ continue;
+
+ if (first == 0 || af < first)
+ first = af;
+
+ if (last == 0 || af > last)
+ last = af;
+ }
+
+ assert((first == 0) == (last == 0));
+
+ if (first == 0) {
+
+ /* No entries in the valid range, block everything */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ } else {
+
+ /* Block everything below the first entry */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_LT, first));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ /* Block everything above the last entry */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_GT, last));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ /* Block everything between the first and last entry */
+ for (af = 1; af < af_max(); af++) {
+
+ if (set_contains(address_families, INT_TO_PTR(af)))
+ continue;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_EQ, af));
+ if (r < 0)
+ break;
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ } else {
+ void *af;
+
+ /* If this is a blacklist, then generate one rule for
+ * each address family that are then combined in OR
+ * checks. */
+
+ SET_FOREACH(af, address_families, i) {
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
+ if (r < 0)
+ break;
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install socket family rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_realtime(void) {
+ static const int permitted_policies[] = {
+ SCHED_OTHER,
+ SCHED_BATCH,
+ SCHED_IDLE,
+ };
+
+ int r, max_policy = 0;
+ uint32_t arch;
+ unsigned i;
+
+ /* Determine the highest policy constant we want to allow */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] > max_policy)
+ max_policy = permitted_policies[i];
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int p;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ /* Go through all policies with lower values than that, and block them -- unless they appear in the
+ * whitelist. */
+ for (p = 0; p < max_policy; p++) {
+ bool good = false;
+
+ /* Check if this is in the whitelist. */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] == p) {
+ good = true;
+ break;
+ }
+
+ if (good)
+ continue;
+
+ /* Deny this policy */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_EQ, p));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ /* Blacklist all other policies, i.e. the ones with higher values. Note that all comparisons are
+ * unsigned here, hence no need no check for < 0 values. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_GT, max_policy));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install realtime protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+static int add_seccomp_syscall_filter(scmp_filter_ctx seccomp,
+ uint32_t arch,
+ int nr,
+ unsigned arg_cnt,
+ const struct scmp_arg_cmp arg) {
+ int r;
+
+ r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM), nr, arg_cnt, arg);
+ if (r < 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = seccomp_syscall_resolve_num_arch(arch, nr);
+ log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
+ strna(n),
+ seccomp_arch_to_string(arch));
+ }
+
+ return r;
+}
+
+/* For known architectures, check that syscalls are indeed defined or not. */
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+assert_cc(SCMP_SYS(shmget) > 0);
+assert_cc(SCMP_SYS(shmat) > 0);
+assert_cc(SCMP_SYS(shmdt) > 0);
+#elif defined(__i386__) || defined(__powerpc64__)
+assert_cc(SCMP_SYS(shmget) < 0);
+assert_cc(SCMP_SYS(shmat) < 0);
+assert_cc(SCMP_SYS(shmdt) < 0);
+#endif
+
+int seccomp_memory_deny_write_execute(void) {
+
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ switch (arch) {
+
+ case SCMP_ARCH_X86:
+ filter_syscall = SCMP_SYS(mmap2);
+ block_syscall = SCMP_SYS(mmap);
+ break;
+
+ case SCMP_ARCH_PPC:
+ case SCMP_ARCH_PPC64:
+ case SCMP_ARCH_PPC64LE:
+ filter_syscall = SCMP_SYS(mmap);
+
+ /* Note that shmat() isn't available, and the call is multiplexed through ipc().
+ * We ignore that here, which means there's still a way to get writable/executable
+ * memory, if an IPC key is mapped like this. That's a pity, but no total loss. */
+
+ break;
+
+ case SCMP_ARCH_ARM:
+ filter_syscall = SCMP_SYS(mmap2); /* arm has only mmap2 */
+ shmat_syscall = SCMP_SYS(shmat);
+ break;
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ case SCMP_ARCH_AARCH64:
+ filter_syscall = SCMP_SYS(mmap); /* amd64, x32, and arm64 have only mmap */
+ shmat_syscall = SCMP_SYS(shmat);
+ break;
+
+ /* Please add more definitions here, if you port systemd to other architectures! */
+
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__powerpc__) && !defined(__powerpc64__) && !defined(__arm__) && !defined(__aarch64__)
+#warning "Consider adding the right mmap() syscall definitions here!"
+#endif
+ }
+
+ /* Can't filter mmap() on this arch, then skip it */
+ if (filter_syscall == 0)
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = add_seccomp_syscall_filter(seccomp, arch, filter_syscall,
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
+ if (r < 0)
+ continue;
+
+ if (block_syscall != 0) {
+ r = add_seccomp_syscall_filter(seccomp, arch, block_syscall, 0, (const struct scmp_arg_cmp){} );
+ if (r < 0)
+ continue;
+ }
+
+ r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(mprotect),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
+ if (r < 0)
+ continue;
+
+#ifdef __NR_pkey_mprotect
+ r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(pkey_mprotect),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
+ if (r < 0)
+ continue;
+#endif
+
+ if (shmat_syscall != 0) {
+ r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(shmat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
+ if (r < 0)
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install MemoryDenyWriteExecute= rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_archs(Set *archs) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ Iterator i;
+ void *id;
+ int r;
+
+ /* This installs a filter with no rules, but that restricts the system call architectures to the specified
+ * list.
+ *
+ * There are some qualifications. However the most important use is to stop processes from bypassing
+ * system call restrictions, in case they used a broader (multiplexing) syscall which is only available
+ * in a non-native architecture. There are no holes in this use case, at least so far. */
+
+ /* Note libseccomp includes our "native" (current) architecture in the filter by default.
+ * We do not remove it. For example, our callers expect to be able to call execve() afterwards
+ * to run a program with the restrictions applied. */
+ seccomp = seccomp_init(SCMP_ACT_ALLOW);
+ if (!seccomp)
+ return -ENOMEM;
+
+ SET_FOREACH(id, archs, i) {
+ r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ /* The vdso for x32 assumes that x86-64 syscalls are available. Let's allow them, since x32
+ * x32 syscalls should basically match x86-64 for everything except the pointer type.
+ * The important thing is that you can block the old 32-bit x86 syscalls.
+ * https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=850047 */
+
+ if (seccomp_arch_native() == SCMP_ARCH_X32 ||
+ set_contains(archs, UINT32_TO_PTR(SCMP_ARCH_X32 + 1))) {
+
+ r = seccomp_arch_add(seccomp, SCMP_ARCH_X86_64);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ return r;
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to restrict system call architectures, skipping: %m");
+
+ return 0;
+}
+
+int parse_syscall_archs(char **l, Set **archs) {
+ _cleanup_set_free_ Set *_archs;
+ char **s;
+ int r;
+
+ assert(l);
+ assert(archs);
+
+ r = set_ensure_allocated(&_archs, NULL);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(s, l) {
+ uint32_t a;
+
+ r = seccomp_arch_from_string(*s, &a);
+ if (r < 0)
+ return -EINVAL;
+
+ r = set_put(_archs, UINT32_TO_PTR(a + 1));
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ *archs = TAKE_PTR(_archs);
+
+ return 0;
+}
+
+int seccomp_filter_set_add(Hashmap *filter, bool add, const SyscallFilterSet *set) {
+ const char *i;
+ int r;
+
+ assert(set);
+
+ NULSTR_FOREACH(i, set->value) {
+
+ if (i[0] == '@') {
+ const SyscallFilterSet *more;
+
+ more = syscall_filter_set_find(i);
+ if (!more)
+ return -ENXIO;
+
+ r = seccomp_filter_set_add(filter, add, more);
+ if (r < 0)
+ return r;
+ } else {
+ int id;
+
+ id = seccomp_syscall_resolve_name(i);
+ if (id == __NR_SCMP_ERROR) {
+ log_debug("Couldn't resolve system call, ignoring: %s", i);
+ continue;
+ }
+
+ if (add) {
+ r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(-1));
+ if (r < 0)
+ return r;
+ } else
+ (void) hashmap_remove(filter, INT_TO_PTR(id + 1));
+ }
+ }
+
+ return 0;
+}
+
+int seccomp_lock_personality(unsigned long personality) {
+ uint32_t arch;
+ int r;
+
+ if (personality >= PERSONALITY_INVALID)
+ return -EINVAL;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(personality),
+ 1,
+ SCMP_A0(SCMP_CMP_NE, personality));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (IN_SET(r, -EPERM, -EACCES))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to enable personality lock for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
new file mode 100644
index 0000000..d8a36c4
--- /dev/null
+++ b/src/shared/seccomp-util.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <seccomp.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "set.h"
+
+const char* seccomp_arch_to_string(uint32_t c);
+int seccomp_arch_from_string(const char *n, uint32_t *ret);
+
+int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action);
+
+bool is_seccomp_available(void);
+
+typedef struct SyscallFilterSet {
+ const char *name;
+ const char *help;
+ const char *value;
+} SyscallFilterSet;
+
+enum {
+ /* Please leave DEFAULT first, but sort the rest alphabetically */
+ SYSCALL_FILTER_SET_DEFAULT,
+ SYSCALL_FILTER_SET_AIO,
+ SYSCALL_FILTER_SET_BASIC_IO,
+ SYSCALL_FILTER_SET_CHOWN,
+ SYSCALL_FILTER_SET_CLOCK,
+ SYSCALL_FILTER_SET_CPU_EMULATION,
+ SYSCALL_FILTER_SET_DEBUG,
+ SYSCALL_FILTER_SET_FILE_SYSTEM,
+ SYSCALL_FILTER_SET_IO_EVENT,
+ SYSCALL_FILTER_SET_IPC,
+ SYSCALL_FILTER_SET_KEYRING,
+ SYSCALL_FILTER_SET_MEMLOCK,
+ SYSCALL_FILTER_SET_MODULE,
+ SYSCALL_FILTER_SET_MOUNT,
+ SYSCALL_FILTER_SET_NETWORK_IO,
+ SYSCALL_FILTER_SET_OBSOLETE,
+ SYSCALL_FILTER_SET_PRIVILEGED,
+ SYSCALL_FILTER_SET_PROCESS,
+ SYSCALL_FILTER_SET_RAW_IO,
+ SYSCALL_FILTER_SET_REBOOT,
+ SYSCALL_FILTER_SET_RESOURCES,
+ SYSCALL_FILTER_SET_SETUID,
+ SYSCALL_FILTER_SET_SIGNAL,
+ SYSCALL_FILTER_SET_SWAP,
+ SYSCALL_FILTER_SET_SYNC,
+ SYSCALL_FILTER_SET_SYSTEM_SERVICE,
+ SYSCALL_FILTER_SET_TIMER,
+ _SYSCALL_FILTER_SET_MAX
+};
+
+extern const SyscallFilterSet syscall_filter_sets[];
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name);
+
+int seccomp_filter_set_add(Hashmap *s, bool b, const SyscallFilterSet *set);
+
+int seccomp_add_syscall_filter_item(scmp_filter_ctx *ctx, const char *name, uint32_t action, char **exclude, bool log_missing);
+
+int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing);
+int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing);
+
+typedef enum SeccompParseFlags {
+ SECCOMP_PARSE_INVERT = 1 << 0,
+ SECCOMP_PARSE_WHITELIST = 1 << 1,
+ SECCOMP_PARSE_LOG = 1 << 2,
+ SECCOMP_PARSE_PERMISSIVE = 1 << 3,
+} SeccompParseFlags;
+
+int seccomp_parse_syscall_filter_full(
+ const char *name, int errno_num, Hashmap *filter, SeccompParseFlags flags,
+ const char *unit, const char *filename, unsigned line);
+
+static inline int seccomp_parse_syscall_filter(const char *name, int errno_num, Hashmap *filter, SeccompParseFlags flags) {
+ return seccomp_parse_syscall_filter_full(name, errno_num, filter, flags, NULL, NULL, 0);
+}
+
+int seccomp_restrict_archs(Set *archs);
+int seccomp_restrict_namespaces(unsigned long retain);
+int seccomp_protect_sysctl(void);
+int seccomp_restrict_address_families(Set *address_families, bool whitelist);
+int seccomp_restrict_realtime(void);
+int seccomp_memory_deny_write_execute(void);
+int seccomp_lock_personality(unsigned long personality);
+
+extern const uint32_t seccomp_local_archs[];
+
+#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
+ for (unsigned _i = ({ (arch) = seccomp_local_archs[0]; 0; }); \
+ seccomp_local_archs[_i] != (uint32_t) -1; \
+ (arch) = seccomp_local_archs[++_i])
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(scmp_filter_ctx, seccomp_release);
+
+int parse_syscall_archs(char **l, Set **archs);
diff --git a/src/shared/securebits-util.c b/src/shared/securebits-util.c
new file mode 100644
index 0000000..6d31dfe
--- /dev/null
+++ b/src/shared/securebits-util.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "securebits-util.h"
+#include "string-util.h"
+
+int secure_bits_to_string_alloc(int i, char **s) {
+ _cleanup_free_ char *str = NULL;
+ size_t len;
+ int r;
+
+ assert(s);
+
+ r = asprintf(&str, "%s%s%s%s%s%s",
+ (i & (1 << SECURE_KEEP_CAPS)) ? "keep-caps " : "",
+ (i & (1 << SECURE_KEEP_CAPS_LOCKED)) ? "keep-caps-locked " : "",
+ (i & (1 << SECURE_NO_SETUID_FIXUP)) ? "no-setuid-fixup " : "",
+ (i & (1 << SECURE_NO_SETUID_FIXUP_LOCKED)) ? "no-setuid-fixup-locked " : "",
+ (i & (1 << SECURE_NOROOT)) ? "noroot " : "",
+ (i & (1 << SECURE_NOROOT_LOCKED)) ? "noroot-locked " : "");
+ if (r < 0)
+ return -ENOMEM;
+
+ len = strlen(str);
+ if (len != 0)
+ str[len - 1] = '\0';
+
+ *s = TAKE_PTR(str);
+
+ return 0;
+}
+
+int secure_bits_from_string(const char *s) {
+ int secure_bits = 0;
+ const char *p;
+ int r;
+
+ for (p = s;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES);
+ if (r == -ENOMEM)
+ return r;
+ if (r <= 0)
+ break;
+
+ if (streq(word, "keep-caps"))
+ secure_bits |= 1 << SECURE_KEEP_CAPS;
+ else if (streq(word, "keep-caps-locked"))
+ secure_bits |= 1 << SECURE_KEEP_CAPS_LOCKED;
+ else if (streq(word, "no-setuid-fixup"))
+ secure_bits |= 1 << SECURE_NO_SETUID_FIXUP;
+ else if (streq(word, "no-setuid-fixup-locked"))
+ secure_bits |= 1 << SECURE_NO_SETUID_FIXUP_LOCKED;
+ else if (streq(word, "noroot"))
+ secure_bits |= 1 << SECURE_NOROOT;
+ else if (streq(word, "noroot-locked"))
+ secure_bits |= 1 << SECURE_NOROOT_LOCKED;
+ }
+
+ return secure_bits;
+}
diff --git a/src/shared/securebits-util.h b/src/shared/securebits-util.h
new file mode 100644
index 0000000..b5ec6ee
--- /dev/null
+++ b/src/shared/securebits-util.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "missing_securebits.h"
+
+int secure_bits_to_string_alloc(int i, char **s);
+int secure_bits_from_string(const char *s);
+
+static inline bool secure_bits_is_valid(int i) {
+ return ((SECURE_ALL_BITS | SECURE_ALL_LOCKS) & i) == i;
+}
+
+static inline int secure_bits_to_string_alloc_with_check(int n, char **s) {
+ if (!secure_bits_is_valid(n))
+ return -EINVAL;
+
+ return secure_bits_to_string_alloc(n, s);
+}
diff --git a/src/shared/serialize.c b/src/shared/serialize.c
new file mode 100644
index 0000000..0333f87
--- /dev/null
+++ b/src/shared/serialize.c
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/mman.h>
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "escape.h"
+#include "fileio.h"
+#include "missing.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+int serialize_item(FILE *f, const char *key, const char *value) {
+ assert(f);
+ assert(key);
+
+ if (!value)
+ return 0;
+
+ /* Make sure that anything we serialize we can also read back again with read_line() with a maximum line size
+ * of LONG_LINE_MAX. This is a safety net only. All code calling us should filter this out earlier anyway. */
+ if (strlen(key) + 1 + strlen(value) + 1 > LONG_LINE_MAX) {
+ log_warning("Attempted to serialize overly long item '%s', refusing.", key);
+ return -EINVAL;
+ }
+
+ fputs(key, f);
+ fputc('=', f);
+ fputs(value, f);
+ fputc('\n', f);
+
+ return 1;
+}
+
+int serialize_item_escaped(FILE *f, const char *key, const char *value) {
+ _cleanup_free_ char *c = NULL;
+
+ assert(f);
+ assert(key);
+
+ if (!value)
+ return 0;
+
+ c = cescape(value);
+ if (!c)
+ return log_oom();
+
+ return serialize_item(f, key, c);
+}
+
+int serialize_item_format(FILE *f, const char *key, const char *format, ...) {
+ char buf[LONG_LINE_MAX];
+ va_list ap;
+ int k;
+
+ assert(f);
+ assert(key);
+ assert(format);
+
+ va_start(ap, format);
+ k = vsnprintf(buf, sizeof(buf), format, ap);
+ va_end(ap);
+
+ if (k < 0 || (size_t) k >= sizeof(buf) || strlen(key) + 1 + k + 1 > LONG_LINE_MAX) {
+ log_warning("Attempted to serialize overly long item '%s', refusing.", key);
+ return -EINVAL;
+ }
+
+ fputs(key, f);
+ fputc('=', f);
+ fputs(buf, f);
+ fputc('\n', f);
+
+ return 1;
+}
+
+int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd) {
+ int copy;
+
+ assert(f);
+ assert(key);
+
+ if (fd < 0)
+ return 0;
+
+ copy = fdset_put_dup(fds, fd);
+ if (copy < 0)
+ return log_error_errno(copy, "Failed to add file descriptor to serialization set: %m");
+
+ return serialize_item_format(f, key, "%i", copy);
+}
+
+int serialize_usec(FILE *f, const char *key, usec_t usec) {
+ assert(f);
+ assert(key);
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ return serialize_item_format(f, key, USEC_FMT, usec);
+}
+
+int serialize_dual_timestamp(FILE *f, const char *name, const dual_timestamp *t) {
+ assert(f);
+ assert(name);
+ assert(t);
+
+ if (!dual_timestamp_is_set(t))
+ return 0;
+
+ return serialize_item_format(f, name, USEC_FMT " " USEC_FMT, t->realtime, t->monotonic);
+}
+
+int serialize_strv(FILE *f, const char *key, char **l) {
+ int ret = 0, r;
+ char **i;
+
+ /* Returns the first error, or positive if anything was serialized, 0 otherwise. */
+
+ STRV_FOREACH(i, l) {
+ r = serialize_item_escaped(f, key, *i);
+ if ((ret >= 0 && r < 0) ||
+ (ret == 0 && r > 0))
+ ret = r;
+ }
+
+ return ret;
+}
+
+int deserialize_usec(const char *value, usec_t *ret) {
+ int r;
+
+ assert(value);
+
+ r = safe_atou64(value, ret);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse usec value \"%s\": %m", value);
+
+ return 0;
+}
+
+int deserialize_dual_timestamp(const char *value, dual_timestamp *t) {
+ uint64_t a, b;
+ int r, pos;
+
+ assert(value);
+ assert(t);
+
+ pos = strspn(value, WHITESPACE);
+ if (value[pos] == '-')
+ return -EINVAL;
+ pos += strspn(value + pos, DIGITS);
+ pos += strspn(value + pos, WHITESPACE);
+ if (value[pos] == '-')
+ return -EINVAL;
+
+ r = sscanf(value, "%" PRIu64 "%" PRIu64 "%n", &a, &b, &pos);
+ if (r != 2)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse dual timestamp value \"%s\".",
+ value);
+
+ if (value[pos] != '\0')
+ /* trailing garbage */
+ return -EINVAL;
+
+ t->realtime = a;
+ t->monotonic = b;
+
+ return 0;
+}
+
+int deserialize_environment(const char *value, char ***list) {
+ _cleanup_free_ char *unescaped = NULL;
+ int r;
+
+ assert(value);
+ assert(list);
+
+ /* Changes the *environment strv inline. */
+
+ r = cunescape(value, 0, &unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape: %m");
+
+ r = strv_env_replace(list, unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append environment variable: %m");
+
+ unescaped = NULL; /* now part of 'list' */
+ return 0;
+}
+
+int open_serialization_fd(const char *ident) {
+ int fd;
+
+ fd = memfd_create(ident, MFD_CLOEXEC);
+ if (fd < 0) {
+ const char *path;
+
+ path = getpid_cached() == 1 ? "/run/systemd" : "/tmp";
+ fd = open_tmpfile_unlinkable(path, O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ log_debug("Serializing %s to %s.", ident, path);
+ } else
+ log_debug("Serializing %s to memfd.", ident);
+
+ return fd;
+}
diff --git a/src/shared/serialize.h b/src/shared/serialize.h
new file mode 100644
index 0000000..4cbd98b
--- /dev/null
+++ b/src/shared/serialize.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+#include "fdset.h"
+#include "macro.h"
+
+int serialize_item(FILE *f, const char *key, const char *value);
+int serialize_item_escaped(FILE *f, const char *key, const char *value);
+int serialize_item_format(FILE *f, const char *key, const char *value, ...) _printf_(3,4);
+int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd);
+int serialize_usec(FILE *f, const char *key, usec_t usec);
+int serialize_dual_timestamp(FILE *f, const char *key, const dual_timestamp *t);
+int serialize_strv(FILE *f, const char *key, char **l);
+
+static inline int serialize_bool(FILE *f, const char *key, bool b) {
+ return serialize_item(f, key, yes_no(b));
+}
+
+int deserialize_usec(const char *value, usec_t *timestamp);
+int deserialize_dual_timestamp(const char *value, dual_timestamp *t);
+int deserialize_environment(const char *value, char ***environment);
+
+int open_serialization_fd(const char *ident);
diff --git a/src/shared/sleep-config.c b/src/shared/sleep-config.c
new file mode 100644
index 0000000..2e22bd0
--- /dev/null
+++ b/src/shared/sleep-config.c
@@ -0,0 +1,436 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/***
+ Copyright © 2018 Dell Inc.
+***/
+
+#include <errno.h>
+#include <linux/fs.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "sleep-config.h"
+#include "string-util.h"
+#include "strv.h"
+
+int parse_sleep_config(const char *verb, bool *ret_allow, char ***ret_modes, char ***ret_states, usec_t *ret_delay) {
+ int allow_suspend = -1, allow_hibernate = -1,
+ allow_s2h = -1, allow_hybrid_sleep = -1;
+ bool allow;
+ _cleanup_strv_free_ char
+ **suspend_mode = NULL, **suspend_state = NULL,
+ **hibernate_mode = NULL, **hibernate_state = NULL,
+ **hybrid_mode = NULL, **hybrid_state = NULL;
+ _cleanup_strv_free_ char **modes, **states; /* always initialized below */
+ usec_t delay = 180 * USEC_PER_MINUTE;
+
+ const ConfigTableItem items[] = {
+ { "Sleep", "AllowSuspend", config_parse_tristate, 0, &allow_suspend },
+ { "Sleep", "AllowHibernation", config_parse_tristate, 0, &allow_hibernate },
+ { "Sleep", "AllowSuspendThenHibernate", config_parse_tristate, 0, &allow_s2h },
+ { "Sleep", "AllowHybridSleep", config_parse_tristate, 0, &allow_hybrid_sleep },
+
+ { "Sleep", "SuspendMode", config_parse_strv, 0, &suspend_mode },
+ { "Sleep", "SuspendState", config_parse_strv, 0, &suspend_state },
+ { "Sleep", "HibernateMode", config_parse_strv, 0, &hibernate_mode },
+ { "Sleep", "HibernateState", config_parse_strv, 0, &hibernate_state },
+ { "Sleep", "HybridSleepMode", config_parse_strv, 0, &hybrid_mode },
+ { "Sleep", "HybridSleepState", config_parse_strv, 0, &hybrid_state },
+
+ { "Sleep", "HibernateDelaySec", config_parse_sec, 0, &delay},
+ {}
+ };
+
+ (void) config_parse_many_nulstr(PKGSYSCONFDIR "/sleep.conf",
+ CONF_PATHS_NULSTR("systemd/sleep.conf.d"),
+ "Sleep\0", config_item_table_lookup, items,
+ CONFIG_PARSE_WARN, NULL);
+
+ if (streq(verb, "suspend")) {
+ allow = allow_suspend != 0;
+
+ /* empty by default */
+ modes = TAKE_PTR(suspend_mode);
+
+ if (suspend_state)
+ states = TAKE_PTR(suspend_state);
+ else
+ states = strv_new("mem", "standby", "freeze");
+
+ } else if (streq(verb, "hibernate")) {
+ allow = allow_hibernate != 0;
+
+ if (hibernate_mode)
+ modes = TAKE_PTR(hibernate_mode);
+ else
+ modes = strv_new("platform", "shutdown");
+
+ if (hibernate_state)
+ states = TAKE_PTR(hibernate_state);
+ else
+ states = strv_new("disk");
+
+ } else if (streq(verb, "hybrid-sleep")) {
+ allow = allow_hybrid_sleep > 0 ||
+ (allow_suspend != 0 && allow_hibernate != 0);
+
+ if (hybrid_mode)
+ modes = TAKE_PTR(hybrid_mode);
+ else
+ modes = strv_new("suspend", "platform", "shutdown");
+
+ if (hybrid_state)
+ states = TAKE_PTR(hybrid_state);
+ else
+ states = strv_new("disk");
+
+ } else if (streq(verb, "suspend-then-hibernate")) {
+ allow = allow_s2h > 0 ||
+ (allow_suspend != 0 && allow_hibernate != 0);
+
+ modes = states = NULL;
+ } else
+ assert_not_reached("what verb");
+
+ if ((!modes && STR_IN_SET(verb, "hibernate", "hybrid-sleep")) ||
+ (!states && !streq(verb, "suspend-then-hibernate")))
+ return log_oom();
+
+ if (ret_allow)
+ *ret_allow = allow;
+ if (ret_modes)
+ *ret_modes = TAKE_PTR(modes);
+ if (ret_states)
+ *ret_states = TAKE_PTR(states);
+ if (ret_delay)
+ *ret_delay = delay;
+
+ return 0;
+}
+
+int can_sleep_state(char **types) {
+ char **type;
+ int r;
+ _cleanup_free_ char *p = NULL;
+
+ if (strv_isempty(types))
+ return true;
+
+ /* If /sys is read-only we cannot sleep */
+ if (access("/sys/power/state", W_OK) < 0)
+ return false;
+
+ r = read_one_line_file("/sys/power/state", &p);
+ if (r < 0)
+ return false;
+
+ STRV_FOREACH(type, types) {
+ const char *word, *state;
+ size_t l, k;
+
+ k = strlen(*type);
+ FOREACH_WORD_SEPARATOR(word, l, p, WHITESPACE, state)
+ if (l == k && memcmp(word, *type, l) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+int can_sleep_disk(char **types) {
+ char **type;
+ int r;
+ _cleanup_free_ char *p = NULL;
+
+ if (strv_isempty(types))
+ return true;
+
+ /* If /sys is read-only we cannot sleep */
+ if (access("/sys/power/disk", W_OK) < 0) {
+ log_debug_errno(errno, "/sys/power/disk is not writable: %m");
+ return false;
+ }
+
+ r = read_one_line_file("/sys/power/disk", &p);
+ if (r < 0) {
+ log_debug_errno(r, "Couldn't read /sys/power/disk: %m");
+ return false;
+ }
+
+ STRV_FOREACH(type, types) {
+ const char *word, *state;
+ size_t l, k;
+
+ k = strlen(*type);
+ FOREACH_WORD_SEPARATOR(word, l, p, WHITESPACE, state) {
+ if (l == k && memcmp(word, *type, l) == 0)
+ return true;
+
+ if (l == k + 2 &&
+ word[0] == '[' &&
+ memcmp(word + 1, *type, l - 2) == 0 &&
+ word[l-1] == ']')
+ return true;
+ }
+ }
+
+ return false;
+}
+
+#define HIBERNATION_SWAP_THRESHOLD 0.98
+
+int find_hibernate_location(char **device, char **type, size_t *size, size_t *used) {
+ _cleanup_fclose_ FILE *f;
+ unsigned i;
+
+ f = fopen("/proc/swaps", "re");
+ if (!f) {
+ log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING,
+ "Failed to retrieve open /proc/swaps: %m");
+ assert(errno > 0);
+ return -errno;
+ }
+
+ (void) fscanf(f, "%*s %*s %*s %*s %*s\n");
+
+ for (i = 1;; i++) {
+ _cleanup_free_ char *dev_field = NULL, *type_field = NULL;
+ size_t size_field, used_field;
+ int k;
+
+ k = fscanf(f,
+ "%ms " /* device/file */
+ "%ms " /* type of swap */
+ "%zu " /* swap size */
+ "%zu " /* used */
+ "%*i\n", /* priority */
+ &dev_field, &type_field, &size_field, &used_field);
+ if (k == EOF)
+ break;
+ if (k != 4) {
+ log_warning("Failed to parse /proc/swaps:%u", i);
+ continue;
+ }
+
+ if (streq(type_field, "file")) {
+
+ if (endswith(dev_field, "\\040(deleted)")) {
+ log_warning("Ignoring deleted swap file '%s'.", dev_field);
+ continue;
+ }
+
+ } else if (streq(type_field, "partition")) {
+ const char *fn;
+
+ fn = path_startswith(dev_field, "/dev/");
+ if (fn && startswith(fn, "zram")) {
+ log_debug("Ignoring compressed RAM swap device '%s'.", dev_field);
+ continue;
+ }
+ }
+
+ if (device)
+ *device = TAKE_PTR(dev_field);
+ if (type)
+ *type = TAKE_PTR(type_field);
+ if (size)
+ *size = size_field;
+ if (used)
+ *used = used_field;
+ return 0;
+ }
+
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "No swap partitions were found.");
+}
+
+static bool enough_swap_for_hibernation(void) {
+ _cleanup_free_ char *active = NULL;
+ unsigned long long act = 0;
+ size_t size = 0, used = 0;
+ int r;
+
+ if (getenv_bool("SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK") > 0)
+ return true;
+
+ r = find_hibernate_location(NULL, NULL, &size, &used);
+ if (r < 0)
+ return false;
+
+ r = get_proc_field("/proc/meminfo", "Active(anon)", WHITESPACE, &active);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to retrieve Active(anon) from /proc/meminfo: %m");
+ return false;
+ }
+
+ r = safe_atollu(active, &act);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse Active(anon) from /proc/meminfo: %s: %m", active);
+ return false;
+ }
+
+ r = act <= (size - used) * HIBERNATION_SWAP_THRESHOLD;
+ log_debug("%s swap for hibernation, Active(anon)=%llu kB, size=%zu kB, used=%zu kB, threshold=%.2g%%",
+ r ? "Enough" : "Not enough", act, size, used, 100*HIBERNATION_SWAP_THRESHOLD);
+
+ return r;
+}
+
+int read_fiemap(int fd, struct fiemap **ret) {
+ _cleanup_free_ struct fiemap *fiemap = NULL, *result_fiemap = NULL;
+ struct stat statinfo;
+ uint32_t result_extents = 0;
+ uint64_t fiemap_start = 0, fiemap_length;
+ const size_t n_extra = DIV_ROUND_UP(sizeof(struct fiemap), sizeof(struct fiemap_extent));
+ size_t fiemap_allocated = n_extra, result_fiemap_allocated = n_extra;
+
+ if (fstat(fd, &statinfo) < 0)
+ return log_debug_errno(errno, "Cannot determine file size: %m");
+ if (!S_ISREG(statinfo.st_mode))
+ return -ENOTTY;
+ fiemap_length = statinfo.st_size;
+
+ /* Zero this out in case we run on a file with no extents */
+ fiemap = calloc(n_extra, sizeof(struct fiemap_extent));
+ if (!fiemap)
+ return -ENOMEM;
+
+ result_fiemap = malloc_multiply(n_extra, sizeof(struct fiemap_extent));
+ if (!result_fiemap)
+ return -ENOMEM;
+
+ /* XFS filesystem has incorrect implementation of fiemap ioctl and
+ * returns extents for only one block-group at a time, so we need
+ * to handle it manually, starting the next fiemap call from the end
+ * of the last extent
+ */
+ while (fiemap_start < fiemap_length) {
+ *fiemap = (struct fiemap) {
+ .fm_start = fiemap_start,
+ .fm_length = fiemap_length,
+ .fm_flags = FIEMAP_FLAG_SYNC,
+ };
+
+ /* Find out how many extents there are */
+ if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
+ return log_debug_errno(errno, "Failed to read extents: %m");
+
+ /* Nothing to process */
+ if (fiemap->fm_mapped_extents == 0)
+ break;
+
+ /* Resize fiemap to allow us to read in the extents, result fiemap has to hold all
+ * the extents for the whole file. Add space for the initial struct fiemap. */
+ if (!greedy_realloc0((void**) &fiemap, &fiemap_allocated,
+ n_extra + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent)))
+ return -ENOMEM;
+
+ fiemap->fm_extent_count = fiemap->fm_mapped_extents;
+ fiemap->fm_mapped_extents = 0;
+
+ if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
+ return log_debug_errno(errno, "Failed to read extents: %m");
+
+ /* Resize result_fiemap to allow us to copy in the extents */
+ if (!greedy_realloc((void**) &result_fiemap, &result_fiemap_allocated,
+ n_extra + result_extents + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent)))
+ return -ENOMEM;
+
+ memcpy(result_fiemap->fm_extents + result_extents,
+ fiemap->fm_extents,
+ sizeof(struct fiemap_extent) * fiemap->fm_mapped_extents);
+
+ result_extents += fiemap->fm_mapped_extents;
+
+ /* Highly unlikely that it is zero */
+ if (_likely_(fiemap->fm_mapped_extents > 0)) {
+ uint32_t i = fiemap->fm_mapped_extents - 1;
+
+ fiemap_start = fiemap->fm_extents[i].fe_logical +
+ fiemap->fm_extents[i].fe_length;
+
+ if (fiemap->fm_extents[i].fe_flags & FIEMAP_EXTENT_LAST)
+ break;
+ }
+ }
+
+ memcpy(result_fiemap, fiemap, sizeof(struct fiemap));
+ result_fiemap->fm_mapped_extents = result_extents;
+ *ret = TAKE_PTR(result_fiemap);
+ return 0;
+}
+
+static int can_sleep_internal(const char *verb, bool check_allowed);
+
+static bool can_s2h(void) {
+ const char *p;
+ int r;
+
+ r = access("/sys/class/rtc/rtc0/wakealarm", W_OK);
+ if (r < 0) {
+ log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING,
+ "/sys/class/rct/rct0/wakealarm is not writable %m");
+ return false;
+ }
+
+ FOREACH_STRING(p, "suspend", "hibernate") {
+ r = can_sleep_internal(p, false);
+ if (IN_SET(r, 0, -ENOSPC, -EADV)) {
+ log_debug("Unable to %s system.", p);
+ return false;
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to check if %s is possible: %m", p);
+ }
+
+ return true;
+}
+
+static int can_sleep_internal(const char *verb, bool check_allowed) {
+ bool allow;
+ _cleanup_strv_free_ char **modes = NULL, **states = NULL;
+ int r;
+
+ assert(STR_IN_SET(verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate"));
+
+ r = parse_sleep_config(verb, &allow, &modes, &states, NULL);
+ if (r < 0)
+ return false;
+
+ if (check_allowed && !allow) {
+ log_debug("Sleep mode \"%s\" is disabled by configuration.", verb);
+ return false;
+ }
+
+ if (streq(verb, "suspend-then-hibernate"))
+ return can_s2h();
+
+ if (!can_sleep_state(states) || !can_sleep_disk(modes))
+ return false;
+
+ if (streq(verb, "suspend"))
+ return true;
+
+ if (!enough_swap_for_hibernation())
+ return -ENOSPC;
+
+ return true;
+}
+
+int can_sleep(const char *verb) {
+ return can_sleep_internal(verb, true);
+}
diff --git a/src/shared/sleep-config.h b/src/shared/sleep-config.h
new file mode 100644
index 0000000..c584f44
--- /dev/null
+++ b/src/shared/sleep-config.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <linux/fiemap.h>
+#include "time-util.h"
+
+int read_fiemap(int fd, struct fiemap **ret);
+int parse_sleep_config(const char *verb, bool *ret_allow, char ***ret_modes, char ***ret_states, usec_t *ret_delay);
+int find_hibernate_location(char **device, char **type, size_t *size, size_t *used);
+
+int can_sleep(const char *verb);
+int can_sleep_disk(char **types);
+int can_sleep_state(char **types);
diff --git a/src/shared/spawn-ask-password-agent.c b/src/shared/spawn-ask-password-agent.c
new file mode 100644
index 0000000..309071c
--- /dev/null
+++ b/src/shared/spawn-ask-password-agent.c
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "process-util.h"
+#include "spawn-ask-password-agent.h"
+#include "util.h"
+
+static pid_t agent_pid = 0;
+
+int ask_password_agent_open(void) {
+ int r;
+
+ if (agent_pid > 0)
+ return 0;
+
+ /* We check STDIN here, not STDOUT, since this is about input,
+ * not output */
+ if (!isatty(STDIN_FILENO))
+ return 0;
+
+ if (!is_main_thread())
+ return -EPERM;
+
+ r = fork_agent("(sd-askpwagent)",
+ NULL, 0,
+ &agent_pid,
+ SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH,
+ SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH, "--watch", NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fork TTY ask password agent: %m");
+
+ return 1;
+}
+
+void ask_password_agent_close(void) {
+
+ if (agent_pid <= 0)
+ return;
+
+ /* Inform agent that we are done */
+ (void) kill_and_sigcont(agent_pid, SIGTERM);
+ (void) wait_for_terminate(agent_pid, NULL);
+ agent_pid = 0;
+}
diff --git a/src/shared/spawn-ask-password-agent.h b/src/shared/spawn-ask-password-agent.h
new file mode 100644
index 0000000..97e73bd
--- /dev/null
+++ b/src/shared/spawn-ask-password-agent.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+int ask_password_agent_open(void);
+void ask_password_agent_close(void);
diff --git a/src/shared/spawn-polkit-agent.c b/src/shared/spawn-polkit-agent.c
new file mode 100644
index 0000000..180cb79
--- /dev/null
+++ b/src/shared/spawn-polkit-agent.c
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "spawn-polkit-agent.h"
+#include "stdio-util.h"
+#include "time-util.h"
+#include "util.h"
+
+#if ENABLE_POLKIT
+static pid_t agent_pid = 0;
+
+int polkit_agent_open(void) {
+ char notify_fd[DECIMAL_STR_MAX(int) + 1];
+ int pipe_fd[2], r;
+
+ if (agent_pid > 0)
+ return 0;
+
+ /* Clients that run as root don't need to activate/query polkit */
+ if (geteuid() == 0)
+ return 0;
+
+ /* We check STDIN here, not STDOUT, since this is about input, not output */
+ if (!isatty(STDIN_FILENO))
+ return 0;
+
+ if (!is_main_thread())
+ return -EPERM;
+
+ if (pipe2(pipe_fd, 0) < 0)
+ return -errno;
+
+ xsprintf(notify_fd, "%i", pipe_fd[1]);
+
+ r = fork_agent("(polkit-agent)",
+ &pipe_fd[1], 1,
+ &agent_pid,
+ POLKIT_AGENT_BINARY_PATH,
+ POLKIT_AGENT_BINARY_PATH, "--notify-fd", notify_fd, "--fallback", NULL);
+
+ /* Close the writing side, because that's the one for the agent */
+ safe_close(pipe_fd[1]);
+
+ if (r < 0)
+ log_error_errno(r, "Failed to fork TTY ask password agent: %m");
+ else
+ /* Wait until the agent closes the fd */
+ fd_wait_for_event(pipe_fd[0], POLLHUP, USEC_INFINITY);
+
+ safe_close(pipe_fd[0]);
+
+ return r;
+}
+
+void polkit_agent_close(void) {
+
+ if (agent_pid <= 0)
+ return;
+
+ /* Inform agent that we are done */
+ (void) kill_and_sigcont(agent_pid, SIGTERM);
+ (void) wait_for_terminate(agent_pid, NULL);
+ agent_pid = 0;
+}
+
+#else
+
+int polkit_agent_open(void) {
+ return 0;
+}
+
+void polkit_agent_close(void) {
+}
+
+#endif
diff --git a/src/shared/spawn-polkit-agent.h b/src/shared/spawn-polkit-agent.h
new file mode 100644
index 0000000..190b970
--- /dev/null
+++ b/src/shared/spawn-polkit-agent.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "bus-util.h"
+
+int polkit_agent_open(void);
+void polkit_agent_close(void);
+
+static inline int polkit_agent_open_if_enabled(
+ BusTransport transport,
+ bool ask_password) {
+
+ /* Open the polkit agent as a child process if necessary */
+
+ if (transport != BUS_TRANSPORT_LOCAL)
+ return 0;
+
+ if (!ask_password)
+ return 0;
+
+ return polkit_agent_open();
+}
diff --git a/src/shared/specifier.c b/src/shared/specifier.c
new file mode 100644
index 0000000..b8f7537
--- /dev/null
+++ b/src/shared/specifier.c
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/utsname.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+/*
+ * Generic infrastructure for replacing %x style specifiers in
+ * strings. Will call a callback for each replacement.
+ */
+
+/* Any ASCII character or digit: our pool of potential specifiers,
+ * and "%" used for escaping. */
+#define POSSIBLE_SPECIFIERS ALPHANUMERICAL "%"
+
+int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **_ret) {
+ size_t l, allocated = 0;
+ _cleanup_free_ char *ret = NULL;
+ char *t;
+ const char *f;
+ bool percent = false;
+ int r;
+
+ assert(text);
+ assert(table);
+
+ l = strlen(text);
+ if (!GREEDY_REALLOC(ret, allocated, l + 1))
+ return -ENOMEM;
+ t = ret;
+
+ for (f = text; *f; f++, l--)
+ if (percent) {
+ if (*f == '%')
+ *(t++) = '%';
+ else {
+ const Specifier *i;
+
+ for (i = table; i->specifier; i++)
+ if (i->specifier == *f)
+ break;
+
+ if (i->lookup) {
+ _cleanup_free_ char *w = NULL;
+ size_t k, j;
+
+ r = i->lookup(i->specifier, i->data, userdata, &w);
+ if (r < 0)
+ return r;
+
+ j = t - ret;
+ k = strlen(w);
+
+ if (!GREEDY_REALLOC(ret, allocated, j + k + l + 1))
+ return -ENOMEM;
+ memcpy(ret + j, w, k);
+ t = ret + j + k;
+ } else if (strchr(POSSIBLE_SPECIFIERS, *f))
+ /* Oops, an unknown specifier. */
+ return -EBADSLT;
+ else {
+ *(t++) = '%';
+ *(t++) = *f;
+ }
+ }
+
+ percent = false;
+ } else if (*f == '%')
+ percent = true;
+ else
+ *(t++) = *f;
+
+ /* If string ended with a stray %, also end with % */
+ if (percent)
+ *(t++) = '%';
+ *(t++) = 0;
+
+ /* Try to deallocate unused bytes, but don't sweat it too much */
+ if ((size_t)(t - ret) < allocated) {
+ t = realloc(ret, t - ret);
+ if (t)
+ ret = t;
+ }
+
+ *_ret = TAKE_PTR(ret);
+ return 0;
+}
+
+/* Generic handler for simple string replacements */
+
+int specifier_string(char specifier, const void *data, const void *userdata, char **ret) {
+ char *n;
+
+ n = strdup(strempty(data));
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret) {
+ sd_id128_t id;
+ char *n;
+ int r;
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return r;
+
+ n = new(char, 33);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = sd_id128_to_string(id, n);
+ return 0;
+}
+
+int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret) {
+ sd_id128_t id;
+ char *n;
+ int r;
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return r;
+
+ n = new(char, 33);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = sd_id128_to_string(id, n);
+ return 0;
+}
+
+int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *n;
+
+ n = gethostname_malloc();
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret) {
+ struct utsname uts;
+ char *n;
+ int r;
+
+ r = uname(&uts);
+ if (r < 0)
+ return -errno;
+
+ n = strdup(uts.release);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *t;
+
+ t = gid_to_name(getgid());
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret) {
+ if (asprintf(ret, UID_FMT, getgid()) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *t;
+
+ /* If we are UID 0 (root), this will not result in NSS, otherwise it might. This is good, as we want to be able
+ * to run this in PID 1, where our user ID is 0, but where NSS lookups are not allowed.
+
+ * We don't use getusername_malloc() here, because we don't want to look at $USER, to remain consistent with
+ * specifer_user_id() below.
+ */
+
+ t = uid_to_name(getuid());
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret) {
+
+ if (asprintf(ret, UID_FMT, getuid()) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret) {
+
+ /* On PID 1 (which runs as root) this will not result in NSS,
+ * which is good. See above */
+
+ return get_home_dir(ret);
+}
+
+int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret) {
+
+ /* On PID 1 (which runs as root) this will not result in NSS,
+ * which is good. See above */
+
+ return get_shell(ret);
+}
+
+int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) {
+ const char *p;
+ char *copy;
+ int r;
+
+ r = tmp_dir(&p);
+ if (r < 0)
+ return r;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+}
+
+int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) {
+ const char *p;
+ char *copy;
+ int r;
+
+ r = var_tmp_dir(&p);
+ if (r < 0)
+ return r;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+}
+
+int specifier_escape_strv(char **l, char ***ret) {
+ char **z, **p, **q;
+
+ assert(ret);
+
+ if (strv_isempty(l)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ z = new(char*, strv_length(l)+1);
+ if (!z)
+ return -ENOMEM;
+
+ for (p = l, q = z; *p; p++, q++) {
+
+ *q = specifier_escape(*p);
+ if (!*q) {
+ strv_free(z);
+ return -ENOMEM;
+ }
+ }
+
+ *q = NULL;
+ *ret = z;
+
+ return 0;
+}
diff --git a/src/shared/specifier.h b/src/shared/specifier.h
new file mode 100644
index 0000000..d0221ef
--- /dev/null
+++ b/src/shared/specifier.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "string-util.h"
+
+typedef int (*SpecifierCallback)(char specifier, const void *data, const void *userdata, char **ret);
+
+typedef struct Specifier {
+ const char specifier;
+ const SpecifierCallback lookup;
+ const void *data;
+} Specifier;
+
+int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **ret);
+
+int specifier_string(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret);
+
+static inline char* specifier_escape(const char *string) {
+ return strreplace(string, "%", "%%");
+}
+
+int specifier_escape_strv(char **l, char ***ret);
diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c
new file mode 100644
index 0000000..dbb4622
--- /dev/null
+++ b/src/shared/switch-root.c
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "base-filesystem.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "missing.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "switch-root.h"
+#include "user-util.h"
+#include "util.h"
+
+int switch_root(const char *new_root,
+ const char *old_root_after, /* path below the new root, where to place the old root after the transition */
+ bool unmount_old_root,
+ unsigned long mount_flags) { /* MS_MOVE or MS_BIND */
+
+ _cleanup_free_ char *resolved_old_root_after = NULL;
+ _cleanup_close_ int old_root_fd = -1;
+ bool old_root_remove;
+ const char *i;
+ int r;
+
+ assert(new_root);
+ assert(old_root_after);
+
+ if (path_equal(new_root, "/"))
+ return 0;
+
+ /* Check if we shall remove the contents of the old root */
+ old_root_remove = in_initrd();
+ if (old_root_remove) {
+ old_root_fd = open("/", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
+ if (old_root_fd < 0)
+ return log_error_errno(errno, "Failed to open root directory: %m");
+ }
+
+ /* Determine where we shall place the old root after the transition */
+ r = chase_symlinks(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after);
+ if (r == 0) /* Doesn't exist yet. Let's create it */
+ (void) mkdir_p_label(resolved_old_root_after, 0755);
+
+ /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted MS_SHARED. Hence
+ * remount them MS_PRIVATE here as a work-around.
+ *
+ * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */
+ if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0)
+ return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m");
+
+ FOREACH_STRING(i, "/sys", "/dev", "/run", "/proc") {
+ _cleanup_free_ char *chased = NULL;
+
+ r = chase_symlinks(i, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &chased);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, i);
+ if (r > 0) {
+ /* Already exists. Let's see if it is a mount point already. */
+ r = path_is_mount_point(chased, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased);
+ if (r > 0) /* If it is already mounted, then do nothing */
+ continue;
+ } else
+ /* Doesn't exist yet? */
+ (void) mkdir_p_label(chased, 0755);
+
+ if (mount(i, chased, NULL, mount_flags, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount %s to %s: %m", i, chased);
+ }
+
+ /* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants
+ * them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error
+ * and switch_root() nevertheless. */
+ (void) base_filesystem_create(new_root, UID_INVALID, GID_INVALID);
+
+ if (chdir(new_root) < 0)
+ return log_error_errno(errno, "Failed to change directory to %s: %m", new_root);
+
+ /* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /),
+ * that's not possible however, and hence we simply overmount root */
+ if (pivot_root(new_root, resolved_old_root_after) >= 0) {
+
+ /* Immediately get rid of the old root, if detach_oldroot is set.
+ * Since we are running off it we need to do this lazily. */
+ if (unmount_old_root) {
+ r = umount_recursive(old_root_after, MNT_DETACH);
+ if (r < 0)
+ log_warning_errno(r, "Failed to unmount old root directory tree, ignoring: %m");
+ }
+
+ } else if (mount(new_root, "/", NULL, MS_MOVE, NULL) < 0)
+ return log_error_errno(errno, "Failed to move %s to /: %m", new_root);
+
+ if (chroot(".") < 0)
+ return log_error_errno(errno, "Failed to change root: %m");
+
+ if (chdir("/") < 0)
+ return log_error_errno(errno, "Failed to change directory: %m");
+
+ if (old_root_fd >= 0) {
+ struct stat rb;
+
+ if (fstat(old_root_fd, &rb) < 0)
+ log_warning_errno(errno, "Failed to stat old root directory, leaving: %m");
+ else {
+ (void) rm_rf_children(old_root_fd, 0, &rb);
+ old_root_fd = -1;
+ }
+ }
+
+ return 0;
+}
diff --git a/src/shared/switch-root.h b/src/shared/switch-root.h
new file mode 100644
index 0000000..f4d48cb
--- /dev/null
+++ b/src/shared/switch-root.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+int switch_root(const char *new_root, const char *oldroot, bool detach_oldroot, unsigned long mountflags);
diff --git a/src/shared/sysctl-util.c b/src/shared/sysctl-util.c
new file mode 100644
index 0000000..480e6c3
--- /dev/null
+++ b/src/shared/sysctl-util.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "sysctl-util.h"
+
+char *sysctl_normalize(char *s) {
+ char *n;
+
+ n = strpbrk(s, "/.");
+ /* If the first separator is a slash, the path is
+ * assumed to be normalized and slashes remain slashes
+ * and dots remains dots. */
+ if (!n || *n == '/')
+ return s;
+
+ /* Otherwise, dots become slashes and slashes become
+ * dots. Fun. */
+ while (n) {
+ if (*n == '.')
+ *n = '/';
+ else
+ *n = '.';
+
+ n = strpbrk(n + 1, "/.");
+ }
+
+ return s;
+}
+
+int sysctl_write(const char *property, const char *value) {
+ char *p;
+ _cleanup_close_ int fd = -1;
+
+ assert(property);
+ assert(value);
+
+ log_debug("Setting '%s' to '%.*s'.", property, (int) strcspn(value, NEWLINE), value);
+
+ p = strjoina("/proc/sys/", property);
+ fd = open(p, O_WRONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (!endswith(value, "\n"))
+ value = strjoina(value, "\n");
+
+ if (write(fd, value, strlen(value)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int sysctl_read(const char *property, char **content) {
+ char *p;
+
+ assert(property);
+ assert(content);
+
+ p = strjoina("/proc/sys/", property);
+ return read_full_file(p, content, NULL);
+}
diff --git a/src/shared/sysctl-util.h b/src/shared/sysctl-util.h
new file mode 100644
index 0000000..fd7c78b
--- /dev/null
+++ b/src/shared/sysctl-util.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+char *sysctl_normalize(char *s);
+int sysctl_read(const char *property, char **value);
+int sysctl_write(const char *property, const char *value);
+
diff --git a/src/shared/test-tables.h b/src/shared/test-tables.h
new file mode 100644
index 0000000..4eeda3b
--- /dev/null
+++ b/src/shared/test-tables.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef const char* (*lookup_t)(int);
+typedef int (*reverse_t)(const char*);
+
+static inline void _test_table(const char *name,
+ lookup_t lookup,
+ reverse_t reverse,
+ int size,
+ bool sparse) {
+ int i, boring = 0;
+
+ for (i = -1; i < size + 1; i++) {
+ const char* val = lookup(i);
+ int rev;
+
+ if (val) {
+ rev = reverse(val);
+ boring = 0;
+ } else {
+ rev = reverse("--no-such--value----");
+ boring += i >= 0;
+ }
+
+ if (boring < 1 || i == size)
+ printf("%s: %d → %s → %d\n", name, i, val, rev);
+ else if (boring == 1)
+ printf("%*s ...\n", (int) strlen(name), "");
+
+ assert_se(!(i >= 0 && i < size ?
+ sparse ? rev != i && rev != -1 : val == NULL || rev != i :
+ val != NULL || rev != -1));
+ }
+}
+
+#define test_table(lower, upper) \
+ _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, false)
+
+#define test_table_sparse(lower, upper) \
+ _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, true)
diff --git a/src/shared/tests.c b/src/shared/tests.c
new file mode 100644
index 0000000..11ea12e
--- /dev/null
+++ b/src/shared/tests.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <util.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the POSIX
+ * version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "path-util.h"
+#include "strv.h"
+#include "tests.h"
+
+char* setup_fake_runtime_dir(void) {
+ char t[] = "/tmp/fake-xdg-runtime-XXXXXX", *p;
+
+ assert_se(mkdtemp(t));
+ assert_se(setenv("XDG_RUNTIME_DIR", t, 1) >= 0);
+ assert_se(p = strdup(t));
+
+ return p;
+}
+
+static void load_testdata_env(void) {
+ static bool called = false;
+ _cleanup_free_ char *s = NULL;
+ _cleanup_free_ char *envpath = NULL;
+ _cleanup_strv_free_ char **pairs = NULL;
+ char **k, **v;
+
+ if (called)
+ return;
+ called = true;
+
+ assert_se(readlink_and_make_absolute("/proc/self/exe", &s) >= 0);
+ dirname(s);
+
+ envpath = path_join(s, "systemd-runtest.env");
+ if (load_env_file_pairs(NULL, envpath, &pairs) < 0)
+ return;
+
+ STRV_FOREACH_PAIR(k, v, pairs)
+ setenv(*k, *v, 0);
+}
+
+const char* get_testdata_dir(void) {
+ const char *env;
+
+ load_testdata_env();
+
+ /* if the env var is set, use that */
+ env = getenv("SYSTEMD_TEST_DATA");
+ if (!env)
+ env = SYSTEMD_TEST_DATA;
+ if (access(env, F_OK) < 0) {
+ fprintf(stderr, "ERROR: $SYSTEMD_TEST_DATA directory [%s] does not exist\n", env);
+ exit(EXIT_FAILURE);
+ }
+
+ return env;
+}
+
+const char* get_catalog_dir(void) {
+ const char *env;
+
+ load_testdata_env();
+
+ /* if the env var is set, use that */
+ env = getenv("SYSTEMD_CATALOG_DIR");
+ if (!env)
+ env = SYSTEMD_CATALOG_DIR;
+ if (access(env, F_OK) < 0) {
+ fprintf(stderr, "ERROR: $SYSTEMD_CATALOG_DIR directory [%s] does not exist\n", env);
+ exit(EXIT_FAILURE);
+ }
+ return env;
+}
+
+bool slow_tests_enabled(void) {
+ int r;
+
+ r = getenv_bool("SYSTEMD_SLOW_TESTS");
+ if (r >= 0)
+ return r;
+
+ if (r != -ENXIO)
+ log_warning_errno(r, "Cannot parse $SYSTEMD_SLOW_TESTS, ignoring.");
+ return SYSTEMD_SLOW_TESTS_DEFAULT;
+}
+
+void test_setup_logging(int level) {
+ log_set_max_level(level);
+ log_parse_environment();
+ log_open();
+}
+
+int log_tests_skipped(const char *message) {
+ log_notice("%s: %s, skipping tests.",
+ program_invocation_short_name, message);
+ return EXIT_TEST_SKIP;
+}
+
+int log_tests_skipped_errno(int r, const char *message) {
+ log_notice_errno(r, "%s: %s, skipping tests: %m",
+ program_invocation_short_name, message);
+ return EXIT_TEST_SKIP;
+}
+
+bool have_namespaces(void) {
+ siginfo_t si = {};
+ pid_t pid;
+
+ /* Checks whether namespaces are available. In some cases they aren't. We do this by calling unshare(), and we
+ * do so in a child process in order not to affect our own process. */
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ /* child */
+ if (unshare(CLONE_NEWNS) < 0)
+ _exit(EXIT_FAILURE);
+
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(waitid(P_PID, pid, &si, WEXITED) >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+
+ if (si.si_status == EXIT_SUCCESS)
+ return true;
+
+ if (si.si_status == EXIT_FAILURE)
+ return false;
+
+ assert_not_reached("unexpected exit code");
+}
diff --git a/src/shared/tests.h b/src/shared/tests.h
new file mode 100644
index 0000000..718196f
--- /dev/null
+++ b/src/shared/tests.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+char* setup_fake_runtime_dir(void);
+const char* get_testdata_dir(void);
+const char* get_catalog_dir(void);
+bool slow_tests_enabled(void);
+void test_setup_logging(int level);
+int log_tests_skipped(const char *message);
+int log_tests_skipped_errno(int r, const char *message);
+
+bool have_namespaces(void);
diff --git a/src/shared/tmpfile-util-label.c b/src/shared/tmpfile-util-label.c
new file mode 100644
index 0000000..c12d7c1
--- /dev/null
+++ b/src/shared/tmpfile-util-label.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <sys/stat.h>
+
+#include "selinux-util.h"
+#include "tmpfile-util-label.h"
+#include "tmpfile-util.h"
+
+int fopen_temporary_label(
+ const char *target,
+ const char *path,
+ FILE **f,
+ char **temp_path) {
+
+ int r;
+
+ r = mac_selinux_create_file_prepare(target, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = fopen_temporary(path, f, temp_path);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
diff --git a/src/shared/tmpfile-util-label.h b/src/shared/tmpfile-util-label.h
new file mode 100644
index 0000000..97a8751
--- /dev/null
+++ b/src/shared/tmpfile-util-label.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdio.h>
+
+/* These functions are split out of tmpfile-util.h (and not for example just flags to the functions they wrap) in order
+ * to optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but
+ * not for all */
+
+int fopen_temporary_label(const char *target, const char *path, FILE **f, char **temp_path);
diff --git a/src/shared/tomoyo-util.c b/src/shared/tomoyo-util.c
new file mode 100644
index 0000000..75c24d8
--- /dev/null
+++ b/src/shared/tomoyo-util.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <unistd.h>
+
+#include "tomoyo-util.h"
+
+bool mac_tomoyo_use(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0)
+ cached_use = (access("/sys/kernel/security/tomoyo/version",
+ F_OK) == 0);
+
+ return cached_use;
+}
diff --git a/src/shared/tomoyo-util.h b/src/shared/tomoyo-util.h
new file mode 100644
index 0000000..06e8227
--- /dev/null
+++ b/src/shared/tomoyo-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+bool mac_tomoyo_use(void);
diff --git a/src/shared/udev-util.c b/src/shared/udev-util.c
new file mode 100644
index 0000000..4200032
--- /dev/null
+++ b/src/shared/udev-util.c
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "log.h"
+#include "parse-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "udev-util.h"
+#include "udev.h"
+
+static const char* const resolve_name_timing_table[_RESOLVE_NAME_TIMING_MAX] = {
+ [RESOLVE_NAME_NEVER] = "never",
+ [RESOLVE_NAME_LATE] = "late",
+ [RESOLVE_NAME_EARLY] = "early",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(resolve_name_timing, ResolveNameTiming);
+
+int udev_parse_config_full(
+ unsigned *ret_children_max,
+ usec_t *ret_exec_delay_usec,
+ usec_t *ret_event_timeout_usec,
+ ResolveNameTiming *ret_resolve_name_timing) {
+
+ _cleanup_free_ char *log_val = NULL, *children_max = NULL, *exec_delay = NULL, *event_timeout = NULL, *resolve_names = NULL;
+ int r;
+
+ r = parse_env_file(NULL, "/etc/udev/udev.conf",
+ "udev_log", &log_val,
+ "children_max", &children_max,
+ "exec_delay", &exec_delay,
+ "event_timeout", &event_timeout,
+ "resolve_names", &resolve_names);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (log_val) {
+ const char *log;
+ size_t n;
+
+ /* unquote */
+ n = strlen(log_val);
+ if (n >= 2 &&
+ ((log_val[0] == '"' && log_val[n-1] == '"') ||
+ (log_val[0] == '\'' && log_val[n-1] == '\''))) {
+ log_val[n - 1] = '\0';
+ log = log_val + 1;
+ } else
+ log = log_val;
+
+ /* we set the udev log level here explicitly, this is supposed
+ * to regulate the code in libudev/ and udev/. */
+ r = log_set_max_level_from_string_realm(LOG_REALM_UDEV, log);
+ if (r < 0)
+ log_debug_errno(r, "/etc/udev/udev.conf: failed to set udev log level '%s', ignoring: %m", log);
+ }
+
+ if (ret_children_max && children_max) {
+ r = safe_atou(children_max, ret_children_max);
+ if (r < 0)
+ log_notice_errno(r, "/etc/udev/udev.conf: failed to set parse children_max=%s, ignoring: %m", children_max);
+ }
+
+ if (ret_exec_delay_usec && exec_delay) {
+ r = parse_sec(exec_delay, ret_exec_delay_usec);
+ if (r < 0)
+ log_notice_errno(r, "/etc/udev/udev.conf: failed to set parse exec_delay=%s, ignoring: %m", exec_delay);
+ }
+
+ if (ret_event_timeout_usec && event_timeout) {
+ r = parse_sec(event_timeout, ret_event_timeout_usec);
+ if (r < 0)
+ log_notice_errno(r, "/etc/udev/udev.conf: failed to set parse event_timeout=%s, ignoring: %m", event_timeout);
+ }
+
+ if (ret_resolve_name_timing && resolve_names) {
+ ResolveNameTiming t;
+
+ t = resolve_name_timing_from_string(resolve_names);
+ if (t < 0)
+ log_notice("/etc/udev/udev.conf: failed to set parse resolve_names=%s, ignoring.", resolve_names);
+ else
+ *ret_resolve_name_timing = t;
+ }
+
+ return 0;
+}
+
+struct DeviceMonitorData {
+ const char *sysname;
+ sd_device *device;
+};
+
+static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ struct DeviceMonitorData *data = userdata;
+ const char *sysname;
+
+ assert(device);
+ assert(data);
+ assert(data->sysname);
+ assert(!data->device);
+
+ if (sd_device_get_sysname(device, &sysname) >= 0 && streq(sysname, data->sysname)) {
+ data->device = sd_device_ref(device);
+ return sd_event_exit(sd_device_monitor_get_event(monitor), 0);
+ }
+
+ return 0;
+}
+
+int device_wait_for_initialization(sd_device *device, const char *subsystem, sd_device **ret) {
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ struct DeviceMonitorData data = {};
+ int r;
+
+ assert(device);
+ assert(subsystem);
+
+ if (sd_device_get_is_initialized(device) > 0) {
+ if (ret)
+ *ret = sd_device_ref(device);
+ return 0;
+ }
+
+ assert_se(sd_device_get_sysname(device, &data.sysname) >= 0);
+
+ /* Wait until the device is initialized, so that we can get access to the ID_PATH property */
+
+ r = sd_event_default(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default event: %m");
+
+ r = sd_device_monitor_new(&monitor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire monitor: %m");
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, subsystem, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add %s subsystem match to monitor: %m", subsystem);
+
+ r = sd_device_monitor_attach_event(monitor, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event to device monitor: %m");
+
+ r = sd_device_monitor_start(monitor, device_monitor_handler, &data);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ /* Check again, maybe things changed. Udev will re-read the db if the device wasn't initialized
+ * yet. */
+ if (sd_device_get_is_initialized(device) > 0) {
+ if (ret)
+ *ret = sd_device_ref(device);
+ return 0;
+ }
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Event loop failed: %m");
+
+ if (ret)
+ *ret = TAKE_PTR(data.device);
+ return 0;
+}
diff --git a/src/shared/udev-util.h b/src/shared/udev-util.h
new file mode 100644
index 0000000..932c4a9
--- /dev/null
+++ b/src/shared/udev-util.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "sd-device.h"
+
+#include "time-util.h"
+
+typedef enum ResolveNameTiming {
+ RESOLVE_NAME_NEVER,
+ RESOLVE_NAME_LATE,
+ RESOLVE_NAME_EARLY,
+ _RESOLVE_NAME_TIMING_MAX,
+ _RESOLVE_NAME_TIMING_INVALID = -1,
+} ResolveNameTiming;
+
+ResolveNameTiming resolve_name_timing_from_string(const char *s) _pure_;
+const char *resolve_name_timing_to_string(ResolveNameTiming i) _const_;
+
+int udev_parse_config_full(
+ unsigned *ret_children_max,
+ usec_t *ret_exec_delay_usec,
+ usec_t *ret_event_timeout_usec,
+ ResolveNameTiming *ret_resolve_name_timing);
+
+static inline int udev_parse_config(void) {
+ return udev_parse_config_full(NULL, NULL, NULL, NULL);
+}
+
+int device_wait_for_initialization(sd_device *device, const char *subsystem, sd_device **ret);
diff --git a/src/shared/uid-range.c b/src/shared/uid-range.c
new file mode 100644
index 0000000..5fa7bd2
--- /dev/null
+++ b/src/shared/uid-range.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "uid-range.h"
+#include "user-util.h"
+#include "util.h"
+
+static bool uid_range_intersect(UidRange *range, uid_t start, uid_t nr) {
+ assert(range);
+
+ return range->start <= start + nr &&
+ range->start + range->nr >= start;
+}
+
+static void uid_range_coalesce(UidRange **p, unsigned *n) {
+ unsigned i, j;
+
+ assert(p);
+ assert(n);
+
+ for (i = 0; i < *n; i++) {
+ for (j = i + 1; j < *n; j++) {
+ UidRange *x = (*p)+i, *y = (*p)+j;
+
+ if (uid_range_intersect(x, y->start, y->nr)) {
+ uid_t begin, end;
+
+ begin = MIN(x->start, y->start);
+ end = MAX(x->start + x->nr, y->start + y->nr);
+
+ x->start = begin;
+ x->nr = end - begin;
+
+ if (*n > j+1)
+ memmove(y, y+1, sizeof(UidRange) * (*n - j -1));
+
+ (*n)--;
+ j--;
+ }
+ }
+ }
+}
+
+static int uid_range_compare(const UidRange *a, const UidRange *b) {
+ int r;
+
+ r = CMP(a->start, b->start);
+ if (r != 0)
+ return r;
+
+ return CMP(a->nr, b->nr);
+}
+
+int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr) {
+ bool found = false;
+ UidRange *x;
+ unsigned i;
+
+ assert(p);
+ assert(n);
+
+ if (nr <= 0)
+ return 0;
+
+ for (i = 0; i < *n; i++) {
+ x = (*p) + i;
+ if (uid_range_intersect(x, start, nr)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ uid_t begin, end;
+
+ begin = MIN(x->start, start);
+ end = MAX(x->start + x->nr, start + nr);
+
+ x->start = begin;
+ x->nr = end - begin;
+ } else {
+ UidRange *t;
+
+ t = reallocarray(*p, *n + 1, sizeof(UidRange));
+ if (!t)
+ return -ENOMEM;
+
+ *p = t;
+ x = t + ((*n) ++);
+
+ x->start = start;
+ x->nr = nr;
+ }
+
+ typesafe_qsort(*p, *n, uid_range_compare);
+ uid_range_coalesce(p, n);
+
+ return *n;
+}
+
+int uid_range_add_str(UidRange **p, unsigned *n, const char *s) {
+ uid_t start, nr;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(n);
+ assert(s);
+
+ t = strchr(s, '-');
+ if (t) {
+ char *b;
+ uid_t end;
+
+ b = strndupa(s, t - s);
+ r = parse_uid(b, &start);
+ if (r < 0)
+ return r;
+
+ r = parse_uid(t+1, &end);
+ if (r < 0)
+ return r;
+
+ if (end < start)
+ return -EINVAL;
+
+ nr = end - start + 1;
+ } else {
+ r = parse_uid(s, &start);
+ if (r < 0)
+ return r;
+
+ nr = 1;
+ }
+
+ return uid_range_add(p, n, start, nr);
+}
+
+int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid) {
+ uid_t closest = UID_INVALID, candidate;
+ unsigned i;
+
+ assert(p);
+ assert(uid);
+
+ candidate = *uid - 1;
+
+ for (i = 0; i < n; i++) {
+ uid_t begin, end;
+
+ begin = p[i].start;
+ end = p[i].start + p[i].nr - 1;
+
+ if (candidate >= begin && candidate <= end) {
+ *uid = candidate;
+ return 1;
+ }
+
+ if (end < candidate)
+ closest = end;
+ }
+
+ if (closest == UID_INVALID)
+ return -EBUSY;
+
+ *uid = closest;
+ return 1;
+}
+
+bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid) {
+ unsigned i;
+
+ assert(p);
+ assert(uid);
+
+ for (i = 0; i < n; i++)
+ if (uid >= p[i].start && uid < p[i].start + p[i].nr)
+ return true;
+
+ return false;
+}
diff --git a/src/shared/uid-range.h b/src/shared/uid-range.h
new file mode 100644
index 0000000..49ba382
--- /dev/null
+++ b/src/shared/uid-range.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+typedef struct UidRange {
+ uid_t start, nr;
+} UidRange;
+
+int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr);
+int uid_range_add_str(UidRange **p, unsigned *n, const char *s);
+
+int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid);
+bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid);
diff --git a/src/shared/utmp-wtmp.c b/src/shared/utmp-wtmp.c
new file mode 100644
index 0000000..ef9427f
--- /dev/null
+++ b/src/shared/utmp-wtmp.c
@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+#include <utmpx.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "user-util.h"
+#include "util.h"
+#include "utmp-wtmp.h"
+
+int utmp_get_runlevel(int *runlevel, int *previous) {
+ struct utmpx *found, lookup = { .ut_type = RUN_LVL };
+ int r;
+ const char *e;
+
+ assert(runlevel);
+
+ /* If these values are set in the environment this takes
+ * precedence. Presumably, sysvinit does this to work around a
+ * race condition that would otherwise exist where we'd always
+ * go to disk and hence might read runlevel data that might be
+ * very new and does not apply to the current script being
+ * executed. */
+
+ e = getenv("RUNLEVEL");
+ if (e && e[0] > 0) {
+ *runlevel = e[0];
+
+ if (previous) {
+ /* $PREVLEVEL seems to be an Upstart thing */
+
+ e = getenv("PREVLEVEL");
+ if (e && e[0] > 0)
+ *previous = e[0];
+ else
+ *previous = 0;
+ }
+
+ return 0;
+ }
+
+ if (utmpxname(_PATH_UTMPX) < 0)
+ return -errno;
+
+ setutxent();
+
+ found = getutxid(&lookup);
+ if (!found)
+ r = -errno;
+ else {
+ int a, b;
+
+ a = found->ut_pid & 0xFF;
+ b = (found->ut_pid >> 8) & 0xFF;
+
+ *runlevel = a;
+ if (previous)
+ *previous = b;
+
+ r = 0;
+ }
+
+ endutxent();
+
+ return r;
+}
+
+static void init_timestamp(struct utmpx *store, usec_t t) {
+ assert(store);
+
+ if (t <= 0)
+ t = now(CLOCK_REALTIME);
+
+ store->ut_tv.tv_sec = t / USEC_PER_SEC;
+ store->ut_tv.tv_usec = t % USEC_PER_SEC;
+}
+
+static void init_entry(struct utmpx *store, usec_t t) {
+ struct utsname uts = {};
+
+ assert(store);
+
+ init_timestamp(store, t);
+
+ if (uname(&uts) >= 0)
+ strncpy(store->ut_host, uts.release, sizeof(store->ut_host));
+
+ strncpy(store->ut_line, "~", sizeof(store->ut_line)); /* or ~~ ? */
+ strncpy(store->ut_id, "~~", sizeof(store->ut_id));
+}
+
+static int write_entry_utmp(const struct utmpx *store) {
+ int r;
+
+ assert(store);
+
+ /* utmp is similar to wtmp, but there is only one entry for
+ * each entry type resp. user; i.e. basically a key/value
+ * table. */
+
+ if (utmpxname(_PATH_UTMPX) < 0)
+ return -errno;
+
+ setutxent();
+
+ if (!pututxline(store))
+ r = -errno;
+ else
+ r = 0;
+
+ endutxent();
+
+ return r;
+}
+
+static int write_entry_wtmp(const struct utmpx *store) {
+ assert(store);
+
+ /* wtmp is a simple append-only file where each entry is
+ simply appended to the end; i.e. basically a log. */
+
+ errno = 0;
+ updwtmpx(_PATH_WTMPX, store);
+ return -errno;
+}
+
+static int write_utmp_wtmp(const struct utmpx *store_utmp, const struct utmpx *store_wtmp) {
+ int r, s;
+
+ r = write_entry_utmp(store_utmp);
+ s = write_entry_wtmp(store_wtmp);
+
+ if (r >= 0)
+ r = s;
+
+ /* If utmp/wtmp have been disabled, that's a good thing, hence
+ * ignore the errors */
+ if (r == -ENOENT)
+ r = 0;
+
+ return r;
+}
+
+static int write_entry_both(const struct utmpx *store) {
+ return write_utmp_wtmp(store, store);
+}
+
+int utmp_put_shutdown(void) {
+ struct utmpx store = {};
+
+ init_entry(&store, 0);
+
+ store.ut_type = RUN_LVL;
+ strncpy(store.ut_user, "shutdown", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+int utmp_put_reboot(usec_t t) {
+ struct utmpx store = {};
+
+ init_entry(&store, t);
+
+ store.ut_type = BOOT_TIME;
+ strncpy(store.ut_user, "reboot", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+_pure_ static const char *sanitize_id(const char *id) {
+ size_t l;
+
+ assert(id);
+ l = strlen(id);
+
+ if (l <= sizeof(((struct utmpx*) NULL)->ut_id))
+ return id;
+
+ return id + l - sizeof(((struct utmpx*) NULL)->ut_id);
+}
+
+int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) {
+ struct utmpx store = {
+ .ut_type = INIT_PROCESS,
+ .ut_pid = pid,
+ .ut_session = sid,
+ };
+ int r;
+
+ assert(id);
+
+ init_timestamp(&store, 0);
+
+ /* ut_id needs only be nul-terminated if it is shorter than sizeof(ut_id) */
+ strncpy(store.ut_id, sanitize_id(id), sizeof(store.ut_id));
+
+ if (line)
+ strncpy(store.ut_line, basename(line), sizeof(store.ut_line));
+
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(ut_type, LOGIN_PROCESS, USER_PROCESS)) {
+ store.ut_type = LOGIN_PROCESS;
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+ }
+
+ if (ut_type == USER_PROCESS) {
+ store.ut_type = USER_PROCESS;
+ strncpy(store.ut_user, user, sizeof(store.ut_user)-1);
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) {
+ struct utmpx lookup = {
+ .ut_type = INIT_PROCESS /* looks for DEAD_PROCESS, LOGIN_PROCESS, USER_PROCESS, too */
+ }, store, store_wtmp, *found;
+
+ assert(id);
+
+ setutxent();
+
+ /* ut_id needs only be nul-terminated if it is shorter than sizeof(ut_id) */
+ strncpy(lookup.ut_id, sanitize_id(id), sizeof(lookup.ut_id));
+
+ found = getutxid(&lookup);
+ if (!found)
+ return 0;
+
+ if (found->ut_pid != pid)
+ return 0;
+
+ memcpy(&store, found, sizeof(store));
+ store.ut_type = DEAD_PROCESS;
+ store.ut_exit.e_termination = code;
+ store.ut_exit.e_exit = status;
+
+ zero(store.ut_user);
+ zero(store.ut_host);
+ zero(store.ut_tv);
+
+ memcpy(&store_wtmp, &store, sizeof(store_wtmp));
+ /* wtmp wants the current time */
+ init_timestamp(&store_wtmp, 0);
+
+ return write_utmp_wtmp(&store, &store_wtmp);
+}
+
+int utmp_put_runlevel(int runlevel, int previous) {
+ struct utmpx store = {};
+ int r;
+
+ assert(runlevel > 0);
+
+ if (previous <= 0) {
+ /* Find the old runlevel automatically */
+
+ r = utmp_get_runlevel(&previous, NULL);
+ if (r < 0) {
+ if (r != -ESRCH)
+ return r;
+
+ previous = 0;
+ }
+ }
+
+ if (previous == runlevel)
+ return 0;
+
+ init_entry(&store, 0);
+
+ store.ut_type = RUN_LVL;
+ store.ut_pid = (runlevel & 0xFF) | ((previous & 0xFF) << 8);
+ strncpy(store.ut_user, "runlevel", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+#define TIMEOUT_MSEC 50
+
+static int write_to_terminal(const char *tty, const char *message) {
+ _cleanup_close_ int fd = -1;
+ const char *p;
+ size_t left;
+ usec_t end;
+
+ assert(tty);
+ assert(message);
+
+ fd = open(tty, O_WRONLY|O_NONBLOCK|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0 || !isatty(fd))
+ return -errno;
+
+ p = message;
+ left = strlen(message);
+
+ end = now(CLOCK_MONOTONIC) + TIMEOUT_MSEC*USEC_PER_MSEC;
+
+ while (left > 0) {
+ ssize_t n;
+ struct pollfd pollfd = {
+ .fd = fd,
+ .events = POLLOUT,
+ };
+ usec_t t;
+ int k;
+
+ t = now(CLOCK_MONOTONIC);
+
+ if (t >= end)
+ return -ETIME;
+
+ k = poll(&pollfd, 1, (end - t) / USEC_PER_MSEC);
+ if (k < 0)
+ return -errno;
+
+ if (k == 0)
+ return -ETIME;
+
+ n = write(fd, p, left);
+ if (n < 0) {
+ if (errno == EAGAIN)
+ continue;
+
+ return -errno;
+ }
+
+ assert((size_t) n <= left);
+
+ p += n;
+ left -= n;
+ }
+
+ return 0;
+}
+
+int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata) {
+
+ _cleanup_free_ char *text = NULL, *hn = NULL, *un = NULL, *stdin_tty = NULL;
+ char date[FORMAT_TIMESTAMP_MAX];
+ struct utmpx *u;
+ int r;
+
+ hn = gethostname_malloc();
+ if (!hn)
+ return -ENOMEM;
+ if (!username) {
+ un = getlogname_malloc();
+ if (!un)
+ return -ENOMEM;
+ }
+
+ if (!origin_tty) {
+ getttyname_harder(STDIN_FILENO, &stdin_tty);
+ origin_tty = stdin_tty;
+ }
+
+ if (asprintf(&text,
+ "\a\r\n"
+ "Broadcast message from %s@%s%s%s (%s):\r\n\r\n"
+ "%s\r\n\r\n",
+ un ?: username, hn,
+ origin_tty ? " on " : "", strempty(origin_tty),
+ format_timestamp(date, sizeof(date), now(CLOCK_REALTIME)),
+ message) < 0)
+ return -ENOMEM;
+
+ setutxent();
+
+ r = 0;
+
+ while ((u = getutxent())) {
+ _cleanup_free_ char *buf = NULL;
+ const char *path;
+ int q;
+
+ if (u->ut_type != USER_PROCESS || u->ut_user[0] == 0)
+ continue;
+
+ /* this access is fine, because STRLEN("/dev/") << 32 (UT_LINESIZE) */
+ if (path_startswith(u->ut_line, "/dev/"))
+ path = u->ut_line;
+ else {
+ if (asprintf(&buf, "/dev/%.*s", (int) sizeof(u->ut_line), u->ut_line) < 0)
+ return -ENOMEM;
+
+ path = buf;
+ }
+
+ if (!match_tty || match_tty(path, userdata)) {
+ q = write_to_terminal(path, text);
+ if (q < 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
diff --git a/src/shared/utmp-wtmp.h b/src/shared/utmp-wtmp.h
new file mode 100644
index 0000000..9e433cf
--- /dev/null
+++ b/src/shared/utmp-wtmp.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "time-util.h"
+#include "util.h"
+
+#if ENABLE_UTMP
+int utmp_get_runlevel(int *runlevel, int *previous);
+
+int utmp_put_shutdown(void);
+int utmp_put_reboot(usec_t timestamp);
+int utmp_put_runlevel(int runlevel, int previous);
+
+int utmp_put_dead_process(const char *id, pid_t pid, int code, int status);
+int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user);
+
+int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata);
+
+#else /* ENABLE_UTMP */
+
+static inline int utmp_get_runlevel(int *runlevel, int *previous) {
+ return -ESRCH;
+}
+static inline int utmp_put_shutdown(void) {
+ return 0;
+}
+static inline int utmp_put_reboot(usec_t timestamp) {
+ return 0;
+}
+static inline int utmp_put_runlevel(int runlevel, int previous) {
+ return 0;
+}
+static inline int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) {
+ return 0;
+}
+static inline int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) {
+ return 0;
+}
+static inline int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata) {
+ return 0;
+}
+
+#endif /* ENABLE_UTMP */
diff --git a/src/shared/verbs.c b/src/shared/verbs.c
new file mode 100644
index 0000000..7c5dcb0
--- /dev/null
+++ b/src/shared/verbs.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "env-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "verbs.h"
+#include "virt.h"
+
+/* Wraps running_in_chroot() which is used in various places, but also adds an environment variable check so external
+ * processes can reliably force this on.
+ */
+bool running_in_chroot_or_offline(void) {
+ int r;
+
+ /* Added to support use cases like rpm-ostree, where from %post scripts we only want to execute "preset", but
+ * not "start"/"restart" for example.
+ *
+ * See docs/ENVIRONMENT.md for docs.
+ */
+ r = getenv_bool("SYSTEMD_OFFLINE");
+ if (r < 0 && r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_OFFLINE: %m");
+ else if (r >= 0)
+ return r > 0;
+
+ /* We've had this condition check for a long time which basically checks for legacy chroot case like Fedora's
+ * "mock", which is used for package builds. We don't want to try to start systemd services there, since
+ * without --new-chroot we don't even have systemd running, and even if we did, adding a concept of background
+ * daemons to builds would be an enormous change, requiring considering things like how the journal output is
+ * handled, etc. And there's really not a use case today for a build talking to a service.
+ *
+ * Note this call itself also looks for a different variable SYSTEMD_IGNORE_CHROOT=1.
+ */
+ r = running_in_chroot();
+ if (r < 0)
+ log_debug_errno(r, "running_in_chroot(): %m");
+
+ return r > 0;
+}
+
+int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata) {
+ const Verb *verb;
+ const char *name;
+ unsigned i;
+ int left, r;
+
+ assert(verbs);
+ assert(verbs[0].dispatch);
+ assert(argc >= 0);
+ assert(argv);
+ assert(argc >= optind);
+
+ left = argc - optind;
+ argv += optind;
+ optind = 0;
+ name = argv[0];
+
+ for (i = 0;; i++) {
+ bool found;
+
+ /* At the end of the list? */
+ if (!verbs[i].dispatch) {
+ if (name)
+ log_error("Unknown operation %s.", name);
+ else
+ log_error("Requires operation parameter.");
+ return -EINVAL;
+ }
+
+ if (name)
+ found = streq(name, verbs[i].verb);
+ else
+ found = verbs[i].flags & VERB_DEFAULT;
+
+ if (found) {
+ verb = &verbs[i];
+ break;
+ }
+ }
+
+ assert(verb);
+
+ if (!name)
+ left = 1;
+
+ if (verb->min_args != VERB_ANY &&
+ (unsigned) left < verb->min_args)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too few arguments.");
+
+ if (verb->max_args != VERB_ANY &&
+ (unsigned) left > verb->max_args)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ if ((verb->flags & VERB_ONLINE_ONLY) && running_in_chroot_or_offline()) {
+ if (name)
+ log_info("Running in chroot, ignoring request: %s", name);
+ else
+ log_info("Running in chroot, ignoring request.");
+ return 0;
+ }
+
+ if (verb->flags & VERB_MUST_BE_ROOT) {
+ r = must_be_root();
+ if (r < 0)
+ return r;
+ }
+
+ if (name)
+ return verb->dispatch(left, argv, userdata);
+ else {
+ char* fake[2] = {
+ (char*) verb->verb,
+ NULL
+ };
+
+ return verb->dispatch(1, fake, userdata);
+ }
+}
diff --git a/src/shared/verbs.h b/src/shared/verbs.h
new file mode 100644
index 0000000..010c0df
--- /dev/null
+++ b/src/shared/verbs.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#define VERB_ANY ((unsigned) -1)
+
+typedef enum VerbFlags {
+ VERB_DEFAULT = 1 << 0,
+ VERB_ONLINE_ONLY = 1 << 1,
+ VERB_MUST_BE_ROOT = 1 << 2,
+} VerbFlags;
+
+typedef struct {
+ const char *verb;
+ unsigned min_args, max_args;
+ VerbFlags flags;
+ int (* const dispatch)(int argc, char *argv[], void *userdata);
+} Verb;
+
+bool running_in_chroot_or_offline(void);
+
+int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata);
diff --git a/src/shared/vlan-util.c b/src/shared/vlan-util.c
new file mode 100644
index 0000000..2f9df7d
--- /dev/null
+++ b/src/shared/vlan-util.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "conf-parser.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "vlan-util.h"
+
+int parse_vlanid(const char *p, uint16_t *ret) {
+ uint16_t id;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ r = safe_atou16(p, &id);
+ if (r < 0)
+ return r;
+ if (!vlanid_is_valid(id))
+ return -ERANGE;
+
+ *ret = id;
+ return 0;
+}
+
+int config_parse_default_port_vlanid(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ uint16_t *id = data;
+
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "none")) {
+ *id = 0;
+ return 0;
+ }
+
+ return config_parse_vlanid(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+}
+
+int config_parse_vlanid(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t *id = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_vlanid(rvalue, id);
+ if (r == -ERANGE) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "VLAN identifier outside of valid range 0…4094, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse VLAN identifier value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/src/shared/vlan-util.h b/src/shared/vlan-util.h
new file mode 100644
index 0000000..ebe4331
--- /dev/null
+++ b/src/shared/vlan-util.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include "conf-parser.h"
+
+#define VLANID_MAX 4094
+#define VLANID_INVALID UINT16_MAX
+
+/* Note that we permit VLAN Id 0 here, as that is apparently OK by the Linux kernel */
+static inline bool vlanid_is_valid(uint16_t id) {
+ return id <= VLANID_MAX;
+}
+
+int parse_vlanid(const char *p, uint16_t *ret);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_default_port_vlanid);
+CONFIG_PARSER_PROTOTYPE(config_parse_vlanid);
diff --git a/src/shared/volatile-util.c b/src/shared/volatile-util.c
new file mode 100644
index 0000000..4d75bc0
--- /dev/null
+++ b/src/shared/volatile-util.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "volatile-util.h"
+
+int query_volatile_mode(VolatileMode *ret) {
+ _cleanup_free_ char *mode = NULL;
+ VolatileMode m = VOLATILE_NO;
+ int r;
+
+ r = proc_cmdline_get_key("systemd.volatile", PROC_CMDLINE_VALUE_OPTIONAL, &mode);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ goto finish;
+
+ if (mode) {
+ m = volatile_mode_from_string(mode);
+ if (m < 0)
+ return -EINVAL;
+ } else
+ m = VOLATILE_YES;
+
+ r = 1;
+
+finish:
+ *ret = m;
+ return r;
+}
+
+static const char* const volatile_mode_table[_VOLATILE_MODE_MAX] = {
+ [VOLATILE_NO] = "no",
+ [VOLATILE_YES] = "yes",
+ [VOLATILE_STATE] = "state",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(volatile_mode, VolatileMode, VOLATILE_YES);
diff --git a/src/shared/volatile-util.h b/src/shared/volatile-util.h
new file mode 100644
index 0000000..8761c44
--- /dev/null
+++ b/src/shared/volatile-util.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+typedef enum VolatileMode {
+ VOLATILE_NO,
+ VOLATILE_YES,
+ VOLATILE_STATE,
+ _VOLATILE_MODE_MAX,
+ _VOLATILE_MODE_INVALID = -1
+} VolatileMode;
+
+VolatileMode volatile_mode_from_string(const char *s);
+const char* volatile_mode_to_string(VolatileMode m);
+
+int query_volatile_mode(VolatileMode *ret);
diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c
new file mode 100644
index 0000000..c423af6
--- /dev/null
+++ b/src/shared/watchdog.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <linux/watchdog.h>
+
+#include "fd-util.h"
+#include "log.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "watchdog.h"
+
+static int watchdog_fd = -1;
+static char *watchdog_device = NULL;
+static usec_t watchdog_timeout = USEC_INFINITY;
+
+static int update_timeout(void) {
+ int r;
+
+ if (watchdog_fd < 0)
+ return 0;
+
+ if (watchdog_timeout == USEC_INFINITY)
+ return 0;
+ else if (watchdog_timeout == 0) {
+ int flags;
+
+ flags = WDIOS_DISABLECARD;
+ r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to disable hardware watchdog: %m");
+ } else {
+ int sec, flags;
+ char buf[FORMAT_TIMESPAN_MAX];
+
+ sec = (int) DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC);
+ r = ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to set timeout to %is: %m", sec);
+
+ watchdog_timeout = (usec_t) sec * USEC_PER_SEC;
+ log_info("Set hardware watchdog to %s.", format_timespan(buf, sizeof(buf), watchdog_timeout, 0));
+
+ flags = WDIOS_ENABLECARD;
+ r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags);
+ if (r < 0) {
+ /* ENOTTY means the watchdog is always enabled so we're fine */
+ log_full(errno == ENOTTY ? LOG_DEBUG : LOG_WARNING,
+ "Failed to enable hardware watchdog: %m");
+ if (errno != ENOTTY)
+ return -errno;
+ }
+
+ r = ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to ping hardware watchdog: %m");
+ }
+
+ return 0;
+}
+
+static int open_watchdog(void) {
+ struct watchdog_info ident;
+
+ if (watchdog_fd >= 0)
+ return 0;
+
+ watchdog_fd = open(watchdog_device ?: "/dev/watchdog",
+ O_WRONLY|O_CLOEXEC);
+ if (watchdog_fd < 0)
+ return -errno;
+
+ if (ioctl(watchdog_fd, WDIOC_GETSUPPORT, &ident) >= 0)
+ log_info("Hardware watchdog '%s', version %x",
+ ident.identity,
+ ident.firmware_version);
+
+ return update_timeout();
+}
+
+int watchdog_set_device(char *path) {
+ return free_and_strdup(&watchdog_device, path);
+}
+
+int watchdog_set_timeout(usec_t *usec) {
+ int r;
+
+ watchdog_timeout = *usec;
+
+ /* If we didn't open the watchdog yet and didn't get any
+ * explicit timeout value set, don't do anything */
+ if (watchdog_fd < 0 && watchdog_timeout == USEC_INFINITY)
+ return 0;
+
+ if (watchdog_fd < 0)
+ r = open_watchdog();
+ else
+ r = update_timeout();
+
+ *usec = watchdog_timeout;
+
+ return r;
+}
+
+int watchdog_ping(void) {
+ int r;
+
+ if (watchdog_fd < 0) {
+ r = open_watchdog();
+ if (r < 0)
+ return r;
+ }
+
+ r = ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to ping hardware watchdog: %m");
+
+ return 0;
+}
+
+void watchdog_close(bool disarm) {
+ int r;
+
+ if (watchdog_fd < 0)
+ return;
+
+ if (disarm) {
+ int flags;
+
+ /* Explicitly disarm it */
+ flags = WDIOS_DISABLECARD;
+ r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags);
+ if (r < 0)
+ log_warning_errno(errno, "Failed to disable hardware watchdog: %m");
+
+ /* To be sure, use magic close logic, too */
+ for (;;) {
+ static const char v = 'V';
+
+ if (write(watchdog_fd, &v, 1) > 0)
+ break;
+
+ if (errno != EINTR) {
+ log_error_errno(errno, "Failed to disarm watchdog timer: %m");
+ break;
+ }
+ }
+ }
+
+ watchdog_fd = safe_close(watchdog_fd);
+}
diff --git a/src/shared/watchdog.h b/src/shared/watchdog.h
new file mode 100644
index 0000000..a345e4b
--- /dev/null
+++ b/src/shared/watchdog.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+#include "util.h"
+
+int watchdog_set_device(char *path);
+int watchdog_set_timeout(usec_t *usec);
+int watchdog_ping(void);
+void watchdog_close(bool disarm);
+
+static inline void watchdog_free_device(void) {
+ (void) watchdog_set_device(NULL);
+}
diff --git a/src/shared/web-util.c b/src/shared/web-util.c
new file mode 100644
index 0000000..edf650d
--- /dev/null
+++ b/src/shared/web-util.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <stdbool.h>
+
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+#include "web-util.h"
+
+bool http_etag_is_valid(const char *etag) {
+ if (isempty(etag))
+ return false;
+
+ if (!endswith(etag, "\""))
+ return false;
+
+ if (!STARTSWITH_SET(etag, "\"", "W/\""))
+ return false;
+
+ return true;
+}
+
+bool http_url_is_valid(const char *url) {
+ const char *p;
+
+ if (isempty(url))
+ return false;
+
+ p = STARTSWITH_SET(url, "http://", "https://");
+ if (!p)
+ return false;
+
+ if (isempty(p))
+ return false;
+
+ return ascii_is_valid(p);
+}
+
+bool documentation_url_is_valid(const char *url) {
+ const char *p;
+
+ if (isempty(url))
+ return false;
+
+ if (http_url_is_valid(url))
+ return true;
+
+ p = STARTSWITH_SET(url, "file:/", "info:", "man:");
+ if (isempty(p))
+ return false;
+
+ return ascii_is_valid(p);
+}
diff --git a/src/shared/web-util.h b/src/shared/web-util.h
new file mode 100644
index 0000000..c9e67e5
--- /dev/null
+++ b/src/shared/web-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+bool http_url_is_valid(const char *url) _pure_;
+
+bool documentation_url_is_valid(const char *url) _pure_;
+
+bool http_etag_is_valid(const char *etag);
diff --git a/src/shared/wireguard-netlink.h b/src/shared/wireguard-netlink.h
new file mode 100644
index 0000000..eb17091
--- /dev/null
+++ b/src/shared/wireguard-netlink.h
@@ -0,0 +1,179 @@
+#pragma once
+
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR MIT)
+ *
+ * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Documentation
+ * =============
+ *
+ * The below enums and macros are for interfacing with WireGuard, using generic
+ * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two
+ * methods: get and set. Note that while they share many common attributes, these
+ * two functions actually accept a slightly different set of inputs and outputs.
+ *
+ * WG_CMD_GET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain
+ * one but not both of:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ *
+ * The kernel will then return several messages (NLM_F_MULTI) containing the following
+ * tree of nested items:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN
+ * WGDEVICE_A_PUBLIC_KEY: len WG_KEY_LEN
+ * WGDEVICE_A_LISTEN_PORT: NLA_U16
+ * WGDEVICE_A_FWMARK: NLA_U32
+ * WGDEVICE_A_PEERS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN
+ * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN
+ * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6
+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16
+ * WGPEER_A_LAST_HANDSHAKE_TIME: struct timespec
+ * WGPEER_A_RX_BYTES: NLA_U64
+ * WGPEER_A_TX_BYTES: NLA_U64
+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGALLOWEDIP_A_FAMILY: NLA_U16
+ * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr
+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ * 1: NLA_NESTED
+ * ...
+ * 2: NLA_NESTED
+ * ...
+ * ...
+ * 1: NLA_NESTED
+ * ...
+ * ...
+ *
+ * It is possible that all of the allowed IPs of a single peer will not
+ * fit within a single netlink message. In that case, the same peer will
+ * be written in the following message, except it will only contain
+ * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several
+ * times in a row for the same peer. It is then up to the receiver to
+ * coalesce adjacent peers. Likewise, it is possible that all peers will
+ * not fit within a single message. So, subsequent peers will be sent
+ * in following messages, except those will only contain WGDEVICE_A_IFNAME
+ * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these
+ * messages to form the complete list of peers.
+ *
+ * Since this is an NLA_F_DUMP command, the final message will always be
+ * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message
+ * contains an integer error code. It is either zero or a negative error
+ * code corresponding to the errno.
+ *
+ * WG_CMD_SET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST. The command should contain the following
+ * tree of nested items, containing one but not both of WGDEVICE_A_IFINDEX
+ * and WGDEVICE_A_IFNAME:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
+ * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current
+ * peers should be removed prior to adding the list below.
+ * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove
+ * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly
+ * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable
+ * WGDEVICE_A_PEERS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN
+ * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the specified peer
+ * should be removed rather than added/updated and/or
+ * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed IPs of
+ * this peer should be removed prior to adding the list below.
+ * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove
+ * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6
+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable
+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGALLOWEDIP_A_FAMILY: NLA_U16
+ * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr
+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ * 1: NLA_NESTED
+ * ...
+ * 2: NLA_NESTED
+ * ...
+ * ...
+ * 1: NLA_NESTED
+ * ...
+ * ...
+ *
+ * It is possible that the amount of configuration data exceeds that of
+ * the maximum message length accepted by the kernel. In that case,
+ * several messages should be sent one after another, with each
+ * successive one filling in information not contained in the prior. Note
+ * that if WGDEVICE_F_REPLACE_PEERS is specified in the first message, it
+ * probably should not be specified in fragments that come after, so that
+ * the list of peers is only cleared the first time but appened after.
+ * Likewise for peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the
+ * first message of a peer, it likely should not be specified in subsequent
+ * fragments.
+ *
+ * If an error occurs, NLMSG_ERROR will reply containing an errno.
+ */
+
+#define WG_GENL_NAME "wireguard"
+#define WG_GENL_VERSION 1
+
+#define WG_KEY_LEN 32
+
+enum wg_cmd {
+ WG_CMD_GET_DEVICE,
+ WG_CMD_SET_DEVICE,
+ __WG_CMD_MAX
+};
+#define WG_CMD_MAX (__WG_CMD_MAX - 1)
+
+enum wgdevice_flag {
+ WGDEVICE_F_REPLACE_PEERS = 1U << 0
+};
+enum wgdevice_attribute {
+ WGDEVICE_A_UNSPEC,
+ WGDEVICE_A_IFINDEX,
+ WGDEVICE_A_IFNAME,
+ WGDEVICE_A_PRIVATE_KEY,
+ WGDEVICE_A_PUBLIC_KEY,
+ WGDEVICE_A_FLAGS,
+ WGDEVICE_A_LISTEN_PORT,
+ WGDEVICE_A_FWMARK,
+ WGDEVICE_A_PEERS,
+ __WGDEVICE_A_LAST
+};
+#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1)
+
+enum wgpeer_flag {
+ WGPEER_F_REMOVE_ME = 1U << 0,
+ WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1
+};
+enum wgpeer_attribute {
+ WGPEER_A_UNSPEC,
+ WGPEER_A_PUBLIC_KEY,
+ WGPEER_A_PRESHARED_KEY,
+ WGPEER_A_FLAGS,
+ WGPEER_A_ENDPOINT,
+ WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
+ WGPEER_A_LAST_HANDSHAKE_TIME,
+ WGPEER_A_RX_BYTES,
+ WGPEER_A_TX_BYTES,
+ WGPEER_A_ALLOWEDIPS,
+ __WGPEER_A_LAST
+};
+#define WGPEER_A_MAX (__WGPEER_A_LAST - 1)
+
+enum wgallowedip_attribute {
+ WGALLOWEDIP_A_UNSPEC,
+ WGALLOWEDIP_A_FAMILY,
+ WGALLOWEDIP_A_IPADDR,
+ WGALLOWEDIP_A_CIDR_MASK,
+ __WGALLOWEDIP_A_LAST
+};
+#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1)
diff --git a/src/shared/xml.c b/src/shared/xml.c
new file mode 100644
index 0000000..2709076
--- /dev/null
+++ b/src/shared/xml.c
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "xml.h"
+
+enum {
+ STATE_NULL,
+ STATE_TEXT,
+ STATE_TAG,
+ STATE_ATTRIBUTE,
+};
+
+static void inc_lines(unsigned *line, const char *s, size_t n) {
+ const char *p = s;
+
+ if (!line)
+ return;
+
+ for (;;) {
+ const char *f;
+
+ f = memchr(p, '\n', n);
+ if (!f)
+ return;
+
+ n -= (f - p) + 1;
+ p = f + 1;
+ (*line)++;
+ }
+}
+
+/* We don't actually do real XML here. We only read a simplistic
+ * subset, that is a bit less strict that XML and lacks all the more
+ * complex features, like entities, or namespaces. However, we do
+ * support some HTML5-like simplifications */
+
+int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
+ const char *c, *e, *b;
+ char *ret;
+ int t;
+
+ assert(p);
+ assert(*p);
+ assert(name);
+ assert(state);
+
+ t = PTR_TO_INT(*state);
+ c = *p;
+
+ if (t == STATE_NULL) {
+ if (line)
+ *line = 1;
+ t = STATE_TEXT;
+ }
+
+ for (;;) {
+ if (*c == 0)
+ return XML_END;
+
+ switch (t) {
+
+ case STATE_TEXT: {
+ int x;
+
+ e = strchrnul(c, '<');
+ if (e > c) {
+ /* More text... */
+ ret = strndup(c, e - c);
+ if (!ret)
+ return -ENOMEM;
+
+ inc_lines(line, c, e - c);
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_TEXT);
+
+ return XML_TEXT;
+ }
+
+ assert(*e == '<');
+ b = c + 1;
+
+ if (startswith(b, "!--")) {
+ /* A comment */
+ e = strstr(b + 3, "-->");
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 3 - b);
+
+ c = e + 3;
+ continue;
+ }
+
+ if (*b == '?') {
+ /* Processing instruction */
+
+ e = strstr(b + 1, "?>");
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 2 - b);
+
+ c = e + 2;
+ continue;
+ }
+
+ if (*b == '!') {
+ /* DTD */
+
+ e = strchr(b + 1, '>');
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 1 - b);
+
+ c = e + 1;
+ continue;
+ }
+
+ if (*b == '/') {
+ /* A closing tag */
+ x = XML_TAG_CLOSE;
+ b++;
+ } else
+ x = XML_TAG_OPEN;
+
+ e = strpbrk(b, WHITESPACE "/>");
+ if (!e)
+ return -EINVAL;
+
+ ret = strndup(b, e - b);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_TAG);
+
+ return x;
+ }
+
+ case STATE_TAG:
+
+ b = c + strspn(c, WHITESPACE);
+ if (*b == 0)
+ return -EINVAL;
+
+ inc_lines(line, c, b - c);
+
+ e = b + strcspn(b, WHITESPACE "=/>");
+ if (e > b) {
+ /* An attribute */
+
+ ret = strndup(b, e - b);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_ATTRIBUTE);
+
+ return XML_ATTRIBUTE_NAME;
+ }
+
+ if (startswith(b, "/>")) {
+ /* An empty tag */
+
+ *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
+ *p = b + 2;
+ *state = INT_TO_PTR(STATE_TEXT);
+
+ return XML_TAG_CLOSE_EMPTY;
+ }
+
+ if (*b != '>')
+ return -EINVAL;
+
+ c = b + 1;
+ t = STATE_TEXT;
+ continue;
+
+ case STATE_ATTRIBUTE:
+
+ if (*c == '=') {
+ c++;
+
+ if (IN_SET(*c, '\'', '"')) {
+ /* Tag with a quoted value */
+
+ e = strchr(c+1, *c);
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, c, e - c);
+
+ ret = strndup(c+1, e - c - 1);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e + 1;
+ *state = INT_TO_PTR(STATE_TAG);
+
+ return XML_ATTRIBUTE_VALUE;
+
+ }
+
+ /* Tag with a value without quotes */
+
+ b = strpbrk(c, WHITESPACE ">");
+ if (!b)
+ b = c;
+
+ ret = strndup(c, b - c);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = b;
+ *state = INT_TO_PTR(STATE_TAG);
+ return XML_ATTRIBUTE_VALUE;
+ }
+
+ t = STATE_TAG;
+ continue;
+ }
+
+ }
+
+ assert_not_reached("Bad state");
+}
diff --git a/src/shared/xml.h b/src/shared/xml.h
new file mode 100644
index 0000000..8da2ff5
--- /dev/null
+++ b/src/shared/xml.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+enum {
+ XML_END,
+ XML_TEXT,
+ XML_TAG_OPEN,
+ XML_TAG_CLOSE,
+ XML_TAG_CLOSE_EMPTY,
+ XML_ATTRIBUTE_NAME,
+ XML_ATTRIBUTE_VALUE,
+};
+
+int xml_tokenize(const char **p, char **name, void **state, unsigned *line);