summaryrefslogtreecommitdiffstats
path: root/src/shared
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 13:00:47 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 13:00:47 +0000
commit2cb7e0aaedad73b076ea18c6900b0e86c5760d79 (patch)
treeda68ca54bb79f4080079bf0828acda937593a4e1 /src/shared
parentInitial commit. (diff)
downloadsystemd-2cb7e0aaedad73b076ea18c6900b0e86c5760d79.tar.xz
systemd-2cb7e0aaedad73b076ea18c6900b0e86c5760d79.zip
Adding upstream version 247.3.upstream/247.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/shared/acl-util.c437
-rw-r--r--src/shared/acl-util.h40
-rw-r--r--src/shared/acpi-fpdt.c147
-rw-r--r--src/shared/acpi-fpdt.h6
-rw-r--r--src/shared/apparmor-util.c22
-rw-r--r--src/shared/apparmor-util.h6
-rw-r--r--src/shared/ask-password-api.c1050
-rw-r--r--src/shared/ask-password-api.h21
-rw-r--r--src/shared/barrier.c398
-rw-r--r--src/shared/barrier.h74
-rw-r--r--src/shared/base-filesystem.c124
-rw-r--r--src/shared/base-filesystem.h6
-rw-r--r--src/shared/binfmt-util.c35
-rw-r--r--src/shared/binfmt-util.h4
-rw-r--r--src/shared/bitmap.c212
-rw-r--r--src/shared/bitmap.h37
-rw-r--r--src/shared/blkid-util.h10
-rw-r--r--src/shared/bond-util.c73
-rw-r--r--src/shared/bond-util.h106
-rw-r--r--src/shared/boot-timestamps.c46
-rw-r--r--src/shared/boot-timestamps.h6
-rw-r--r--src/shared/bootspec.c1432
-rw-r--r--src/shared/bootspec.h86
-rw-r--r--src/shared/bpf-program.c256
-rw-r--r--src/shared/bpf-program.h43
-rw-r--r--src/shared/bridge-util.c13
-rw-r--r--src/shared/bridge-util.h20
-rw-r--r--src/shared/bus-get-properties.c167
-rw-r--r--src/shared/bus-get-properties.h102
-rw-r--r--src/shared/bus-locator.c213
-rw-r--r--src/shared/bus-locator.h34
-rw-r--r--src/shared/bus-log-control-api.c115
-rw-r--r--src/shared/bus-log-control-api.h19
-rw-r--r--src/shared/bus-map-properties.c246
-rw-r--r--src/shared/bus-map-properties.h25
-rw-r--r--src/shared/bus-message-util.c182
-rw-r--r--src/shared/bus-message-util.h18
-rw-r--r--src/shared/bus-object.c177
-rw-r--r--src/shared/bus-object.h34
-rw-r--r--src/shared/bus-polkit.c415
-rw-r--r--src/shared/bus-polkit.h11
-rw-r--r--src/shared/bus-print-properties.c462
-rw-r--r--src/shared/bus-print-properties.h16
-rw-r--r--src/shared/bus-unit-procs.c407
-rw-r--r--src/shared/bus-unit-procs.h8
-rw-r--r--src/shared/bus-unit-util.c2432
-rw-r--r--src/shared/bus-unit-util.h32
-rw-r--r--src/shared/bus-util.c577
-rw-r--r--src/shared/bus-util.h77
-rw-r--r--src/shared/bus-wait-for-jobs.c331
-rw-r--r--src/shared/bus-wait-for-jobs.h16
-rw-r--r--src/shared/bus-wait-for-units.c429
-rw-r--r--src/shared/bus-wait-for-units.h35
-rw-r--r--src/shared/calendarspec.c1405
-rw-r--r--src/shared/calendarspec.h46
-rw-r--r--src/shared/cgroup-setup.c841
-rw-r--r--src/shared/cgroup-setup.h34
-rw-r--r--src/shared/cgroup-show.c400
-rw-r--r--src/shared/cgroup-show.h24
-rw-r--r--src/shared/chown-recursive.c178
-rw-r--r--src/shared/chown-recursive.h8
-rw-r--r--src/shared/clean-ipc.c454
-rw-r--r--src/shared/clean-ipc.h17
-rw-r--r--src/shared/clock-util.c167
-rw-r--r--src/shared/clock-util.h11
-rw-r--r--src/shared/condition.c973
-rw-r--r--src/shared/condition.h106
-rw-r--r--src/shared/conf-parser.c1247
-rw-r--r--src/shared/conf-parser.h303
-rw-r--r--src/shared/coredump-util.c74
-rw-r--r--src/shared/coredump-util.h29
-rw-r--r--src/shared/cpu-set-util.c295
-rw-r--r--src/shared/cpu-set-util.h52
-rw-r--r--src/shared/cryptsetup-util.c111
-rw-r--r--src/shared/cryptsetup-util.h44
-rw-r--r--src/shared/daemon-util.h22
-rw-r--r--src/shared/dev-setup.c120
-rw-r--r--src/shared/dev-setup.h8
-rw-r--r--src/shared/dissect-image.c2557
-rw-r--r--src/shared/dissect-image.h163
-rw-r--r--src/shared/dm-util.c45
-rw-r--r--src/shared/dm-util.h4
-rw-r--r--src/shared/dns-domain.c1414
-rw-r--r--src/shared/dns-domain.h115
-rw-r--r--src/shared/dropin.c279
-rw-r--r--src/shared/dropin.h26
-rw-r--r--src/shared/efi-loader.c806
-rw-r--r--src/shared/efi-loader.h97
-rw-r--r--src/shared/enable-mempool.c5
-rw-r--r--src/shared/env-file-label.c21
-rw-r--r--src/shared/env-file-label.h8
-rw-r--r--src/shared/ethtool-util.c1149
-rw-r--r--src/shared/ethtool-util.h129
-rw-r--r--src/shared/exec-util.c446
-rw-r--r--src/shared/exec-util.h47
-rw-r--r--src/shared/exit-status.c178
-rw-r--r--src/shared/exit-status.h111
-rw-r--r--src/shared/fdset.c252
-rw-r--r--src/shared/fdset.h44
-rw-r--r--src/shared/fileio-label.c37
-rw-r--r--src/shared/fileio-label.h15
-rw-r--r--src/shared/firewall-util.c350
-rw-r--r--src/shared/firewall-util.h65
-rw-r--r--src/shared/format-table.c2549
-rw-r--r--src/shared/format-table.h139
-rw-r--r--src/shared/fsck-util.h14
-rw-r--r--src/shared/fstab-util.c297
-rw-r--r--src/shared/fstab-util.h38
-rwxr-xr-xsrc/shared/generate-ip-protocol-list.sh6
-rwxr-xr-xsrc/shared/generate-syscall-list.py14
-rw-r--r--src/shared/generator.c631
-rw-r--r--src/shared/generator.h84
-rw-r--r--src/shared/geneve-util.c12
-rw-r--r--src/shared/geneve-util.h17
-rw-r--r--src/shared/gpt.c97
-rw-r--r--src/shared/gpt.h128
-rw-r--r--src/shared/group-record.c348
-rw-r--r--src/shared/group-record.h46
-rw-r--r--src/shared/id128-print.c76
-rw-r--r--src/shared/id128-print.h19
-rw-r--r--src/shared/idn-util.c91
-rw-r--r--src/shared/idn-util.h32
-rw-r--r--src/shared/ima-util.c15
-rw-r--r--src/shared/ima-util.h6
-rw-r--r--src/shared/import-util.c179
-rw-r--r--src/shared/import-util.h27
-rw-r--r--src/shared/initreq.h74
-rw-r--r--src/shared/install-printf.c127
-rw-r--r--src/shared/install-printf.h6
-rw-r--r--src/shared/install.c3479
-rw-r--r--src/shared/install.h213
-rw-r--r--src/shared/ip-protocol-list.c67
-rw-r--r--src/shared/ip-protocol-list.h6
-rw-r--r--src/shared/ip-protocol-to-name.awk9
-rw-r--r--src/shared/ipvlan-util.c22
-rw-r--r--src/shared/ipvlan-util.h29
-rw-r--r--src/shared/journal-importer.c483
-rw-r--r--src/shared/journal-importer.h60
-rw-r--r--src/shared/journal-util.c139
-rw-r--r--src/shared/journal-util.h10
-rw-r--r--src/shared/json-internal.h76
-rw-r--r--src/shared/json.c4410
-rw-r--r--src/shared/json.h356
-rw-r--r--src/shared/libcrypt-util.c212
-rw-r--r--src/shared/libcrypt-util.h13
-rw-r--r--src/shared/libmount-util.h47
-rw-r--r--src/shared/libshared.sym3
-rw-r--r--src/shared/linux/README8
-rw-r--r--src/shared/linux/auto_dev-ioctl.h220
-rw-r--r--src/shared/linux/bpf.h3057
-rw-r--r--src/shared/linux/bpf_common.h57
-rw-r--r--src/shared/linux/bpf_insn.h225
-rw-r--r--src/shared/linux/dm-ioctl.h363
-rw-r--r--src/shared/linux/ethtool.h2021
-rw-r--r--src/shared/linux/nl80211.h6554
-rw-r--r--src/shared/local-addresses.c315
-rw-r--r--src/shared/local-addresses.h17
-rw-r--r--src/shared/lockfile-util.c137
-rw-r--r--src/shared/lockfile-util.h14
-rw-r--r--src/shared/log-link.h39
-rw-r--r--src/shared/logs-show.c1672
-rw-r--r--src/shared/logs-show.h64
-rw-r--r--src/shared/loop-util.c722
-rw-r--r--src/shared/loop-util.h29
-rw-r--r--src/shared/machine-image.c1274
-rw-r--r--src/shared/machine-image.h112
-rw-r--r--src/shared/machine-pool.c45
-rw-r--r--src/shared/machine-pool.h8
-rw-r--r--src/shared/macvlan-util.c15
-rw-r--r--src/shared/macvlan-util.h17
-rw-r--r--src/shared/main-func.h40
-rw-r--r--src/shared/meson.build399
-rw-r--r--src/shared/mkfs-util.c135
-rw-r--r--src/shared/mkfs-util.h10
-rw-r--r--src/shared/module-util.c72
-rw-r--r--src/shared/module-util.h12
-rw-r--r--src/shared/mount-util.c744
-rw-r--r--src/shared/mount-util.h99
-rw-r--r--src/shared/netif-naming-scheme.c69
-rw-r--r--src/shared/netif-naming-scheme.h57
-rw-r--r--src/shared/nscd-flush.c151
-rw-r--r--src/shared/nscd-flush.h4
-rw-r--r--src/shared/nsflags.c71
-rw-r--r--src/shared/nsflags.h29
-rw-r--r--src/shared/numa-util.c192
-rw-r--r--src/shared/numa-util.h35
-rw-r--r--src/shared/offline-passwd.c164
-rw-r--r--src/shared/offline-passwd.h9
-rw-r--r--src/shared/openssl-util.h12
-rw-r--r--src/shared/os-util.c149
-rw-r--r--src/shared/os-util.h13
-rw-r--r--src/shared/output-mode.c42
-rw-r--r--src/shared/output-mode.h49
-rw-r--r--src/shared/pager.c331
-rw-r--r--src/shared/pager.h17
-rw-r--r--src/shared/pam-util.c83
-rw-r--r--src/shared/pam-util.h15
-rw-r--r--src/shared/pe-header.h61
-rw-r--r--src/shared/pkcs11-util.c932
-rw-r--r--src/shared/pkcs11-util.h47
-rw-r--r--src/shared/pretty-print.c325
-rw-r--r--src/shared/pretty-print.h19
-rw-r--r--src/shared/psi-util.c118
-rw-r--r--src/shared/psi-util.h30
-rw-r--r--src/shared/ptyfwd.c681
-rw-r--r--src/shared/ptyfwd.h42
-rw-r--r--src/shared/pwquality-util.c191
-rw-r--r--src/shared/pwquality-util.h41
-rw-r--r--src/shared/qrcode-util.c107
-rw-r--r--src/shared/qrcode-util.h13
-rw-r--r--src/shared/reboot-util.c109
-rw-r--r--src/shared/reboot-util.h15
-rw-r--r--src/shared/resize-fs.c121
-rw-r--r--src/shared/resize-fs.h15
-rw-r--r--src/shared/resolve-util.c52
-rw-r--r--src/shared/resolve-util.h92
-rw-r--r--src/shared/seccomp-util.c2140
-rw-r--r--src/shared/seccomp-util.h142
-rw-r--r--src/shared/securebits-util.c66
-rw-r--r--src/shared/securebits-util.h18
-rw-r--r--src/shared/serialize.c215
-rw-r--r--src/shared/serialize.h27
-rw-r--r--src/shared/service-util.c87
-rw-r--r--src/shared/service-util.h10
-rw-r--r--src/shared/sleep-config.c703
-rw-r--r--src/shared/sleep-config.h59
-rw-r--r--src/shared/socket-netlink.c493
-rw-r--r--src/shared/socket-netlink.h50
-rw-r--r--src/shared/spawn-ask-password-agent.c61
-rw-r--r--src/shared/spawn-ask-password-agent.h11
-rw-r--r--src/shared/spawn-polkit-agent.c98
-rw-r--r--src/shared/spawn-polkit-agent.h11
-rw-r--r--src/shared/specifier.c358
-rw-r--r--src/shared/specifier.h91
-rw-r--r--src/shared/switch-root.c127
-rw-r--r--src/shared/switch-root.h6
-rw-r--r--src/shared/syscall-names.text598
-rw-r--r--src/shared/sysctl-util.c129
-rw-r--r--src/shared/sysctl-util.h30
-rw-r--r--src/shared/test-tables.h44
-rw-r--r--src/shared/tests.c343
-rw-r--r--src/shared/tests.h45
-rw-r--r--src/shared/tmpfile-util-label.c26
-rw-r--r--src/shared/tmpfile-util-label.h10
-rw-r--r--src/shared/tomoyo-util.c15
-rw-r--r--src/shared/tomoyo-util.h6
-rw-r--r--src/shared/udev-util.c371
-rw-r--r--src/shared/udev-util.h36
-rw-r--r--src/shared/uid-range.c180
-rw-r--r--src/shared/uid-range.h15
-rw-r--r--src/shared/unit-file.c601
-rw-r--r--src/shared/unit-file.h61
-rw-r--r--src/shared/user-record-nss.c531
-rw-r--r--src/shared/user-record-nss.h24
-rw-r--r--src/shared/user-record-show.c584
-rw-r--r--src/shared/user-record-show.h10
-rw-r--r--src/shared/user-record.c2272
-rw-r--r--src/shared/user-record.h444
-rw-r--r--src/shared/userdb.c1249
-rw-r--r--src/shared/userdb.h41
-rw-r--r--src/shared/utmp-wtmp.c409
-rw-r--r--src/shared/utmp-wtmp.h67
-rw-r--r--src/shared/varlink.c2502
-rw-r--r--src/shared/varlink.h174
-rw-r--r--src/shared/verbs.c112
-rw-r--r--src/shared/verbs.h23
-rw-r--r--src/shared/vlan-util.c100
-rw-r--r--src/shared/vlan-util.h21
-rw-r--r--src/shared/volatile-util.c46
-rw-r--r--src/shared/volatile-util.h16
-rw-r--r--src/shared/watchdog.c191
-rw-r--r--src/shared/watchdog.h17
-rw-r--r--src/shared/web-util.c53
-rw-r--r--src/shared/web-util.h12
-rw-r--r--src/shared/wifi-util.c128
-rw-r--r--src/shared/wifi-util.h11
-rw-r--r--src/shared/xml.c237
-rw-r--r--src/shared/xml.h14
278 files changed, 80898 insertions, 0 deletions
diff --git a/src/shared/acl-util.c b/src/shared/acl-util.c
new file mode 100644
index 0000000..ef4b883
--- /dev/null
+++ b/src/shared/acl-util.c
@@ -0,0 +1,437 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "acl-util.h"
+#include "alloc-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+#include "util.h"
+
+int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *ret_entry) {
+ acl_entry_t i;
+ int r;
+
+ assert(acl);
+ assert(uid_is_valid(uid));
+ assert(ret_entry);
+
+ for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) {
+
+ acl_tag_t tag;
+ uid_t *u;
+ bool b;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag != ACL_USER)
+ continue;
+
+ u = acl_get_qualifier(i);
+ if (!u)
+ return -errno;
+
+ b = *u == uid;
+ acl_free(u);
+
+ if (b) {
+ *ret_entry = i;
+ return 1;
+ }
+ }
+ if (r < 0)
+ return -errno;
+
+ *ret_entry = NULL;
+ return 0;
+}
+
+int calc_acl_mask_if_needed(acl_t *acl_p) {
+ acl_entry_t i;
+ int r;
+ bool need = false;
+
+ assert(acl_p);
+
+ for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag == ACL_MASK)
+ return 0;
+
+ if (IN_SET(tag, ACL_USER, ACL_GROUP))
+ need = true;
+ }
+ if (r < 0)
+ return -errno;
+
+ if (need && acl_calc_mask(acl_p) < 0)
+ return -errno;
+
+ return need;
+}
+
+int add_base_acls_if_needed(acl_t *acl_p, const char *path) {
+ acl_entry_t i;
+ int r;
+ bool have_user_obj = false, have_group_obj = false, have_other = false;
+ struct stat st;
+ _cleanup_(acl_freep) acl_t basic = NULL;
+
+ assert(acl_p);
+
+ for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if (tag == ACL_USER_OBJ)
+ have_user_obj = true;
+ else if (tag == ACL_GROUP_OBJ)
+ have_group_obj = true;
+ else if (tag == ACL_OTHER)
+ have_other = true;
+ if (have_user_obj && have_group_obj && have_other)
+ return 0;
+ }
+ if (r < 0)
+ return -errno;
+
+ r = stat(path, &st);
+ if (r < 0)
+ return -errno;
+
+ basic = acl_from_mode(st.st_mode);
+ if (!basic)
+ return -errno;
+
+ for (r = acl_get_entry(basic, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(basic, ACL_NEXT_ENTRY, &i)) {
+ acl_tag_t tag;
+ acl_entry_t dst;
+
+ if (acl_get_tag_type(i, &tag) < 0)
+ return -errno;
+
+ if ((tag == ACL_USER_OBJ && have_user_obj) ||
+ (tag == ACL_GROUP_OBJ && have_group_obj) ||
+ (tag == ACL_OTHER && have_other))
+ continue;
+
+ r = acl_create_entry(acl_p, &dst);
+ if (r < 0)
+ return -errno;
+
+ r = acl_copy_entry(dst, i);
+ if (r < 0)
+ return -errno;
+ }
+ if (r < 0)
+ return -errno;
+ return 0;
+}
+
+int acl_search_groups(const char *path, char ***ret_groups) {
+ _cleanup_strv_free_ char **g = NULL;
+ _cleanup_(acl_freep) acl_t acl = NULL;
+ bool ret = false;
+ acl_entry_t entry;
+ int r;
+
+ assert(path);
+
+ acl = acl_get_file(path, ACL_TYPE_DEFAULT);
+ if (!acl)
+ return -errno;
+
+ r = acl_get_entry(acl, ACL_FIRST_ENTRY, &entry);
+ for (;;) {
+ _cleanup_(acl_free_gid_tpp) gid_t *gid = NULL;
+ acl_tag_t tag;
+
+ if (r < 0)
+ return -errno;
+ if (r == 0)
+ break;
+
+ if (acl_get_tag_type(entry, &tag) < 0)
+ return -errno;
+
+ if (tag != ACL_GROUP)
+ goto next;
+
+ gid = acl_get_qualifier(entry);
+ if (!gid)
+ return -errno;
+
+ if (in_gid(*gid) > 0) {
+ if (!ret_groups)
+ return true;
+
+ ret = true;
+ }
+
+ if (ret_groups) {
+ char *name;
+
+ name = gid_to_name(*gid);
+ if (!name)
+ return -ENOMEM;
+
+ r = strv_consume(&g, name);
+ if (r < 0)
+ return r;
+ }
+
+ next:
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &entry);
+ }
+
+ if (ret_groups)
+ *ret_groups = TAKE_PTR(g);
+
+ return ret;
+}
+
+int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask) {
+ _cleanup_free_ char **a = NULL, **d = NULL; /* strings are not freed */
+ _cleanup_strv_free_ char **split;
+ char **entry;
+ int r = -EINVAL;
+ _cleanup_(acl_freep) acl_t a_acl = NULL, d_acl = NULL;
+
+ split = strv_split(text, ",");
+ if (!split)
+ return -ENOMEM;
+
+ STRV_FOREACH(entry, split) {
+ char *p;
+
+ p = STARTSWITH_SET(*entry, "default:", "d:");
+ if (p)
+ r = strv_push(&d, p);
+ else
+ r = strv_push(&a, *entry);
+ if (r < 0)
+ return r;
+ }
+
+ if (!strv_isempty(a)) {
+ _cleanup_free_ char *join;
+
+ join = strv_join(a, ",");
+ if (!join)
+ return -ENOMEM;
+
+ a_acl = acl_from_text(join);
+ if (!a_acl)
+ return -errno;
+
+ if (want_mask) {
+ r = calc_acl_mask_if_needed(&a_acl);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!strv_isempty(d)) {
+ _cleanup_free_ char *join;
+
+ join = strv_join(d, ",");
+ if (!join)
+ return -ENOMEM;
+
+ d_acl = acl_from_text(join);
+ if (!d_acl)
+ return -errno;
+
+ if (want_mask) {
+ r = calc_acl_mask_if_needed(&d_acl);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *acl_access = TAKE_PTR(a_acl);
+ *acl_default = TAKE_PTR(d_acl);
+
+ return 0;
+}
+
+static int acl_entry_equal(acl_entry_t a, acl_entry_t b) {
+ acl_tag_t tag_a, tag_b;
+
+ if (acl_get_tag_type(a, &tag_a) < 0)
+ return -errno;
+
+ if (acl_get_tag_type(b, &tag_b) < 0)
+ return -errno;
+
+ if (tag_a != tag_b)
+ return false;
+
+ switch (tag_a) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ /* can have only one of those */
+ return true;
+ case ACL_USER: {
+ _cleanup_(acl_free_uid_tpp) uid_t *uid_a = NULL, *uid_b = NULL;
+
+ uid_a = acl_get_qualifier(a);
+ if (!uid_a)
+ return -errno;
+
+ uid_b = acl_get_qualifier(b);
+ if (!uid_b)
+ return -errno;
+
+ return *uid_a == *uid_b;
+ }
+ case ACL_GROUP: {
+ _cleanup_(acl_free_gid_tpp) gid_t *gid_a = NULL, *gid_b = NULL;
+
+ gid_a = acl_get_qualifier(a);
+ if (!gid_a)
+ return -errno;
+
+ gid_b = acl_get_qualifier(b);
+ if (!gid_b)
+ return -errno;
+
+ return *gid_a == *gid_b;
+ }
+ default:
+ assert_not_reached("Unknown acl tag type");
+ }
+}
+
+static int find_acl_entry(acl_t acl, acl_entry_t entry, acl_entry_t *out) {
+ acl_entry_t i;
+ int r;
+
+ for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) {
+
+ r = acl_entry_equal(i, entry);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *out = i;
+ return 1;
+ }
+ }
+ if (r < 0)
+ return -errno;
+ return 0;
+}
+
+int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl) {
+ _cleanup_(acl_freep) acl_t old;
+ acl_entry_t i;
+ int r;
+
+ old = acl_get_file(path, type);
+ if (!old)
+ return -errno;
+
+ for (r = acl_get_entry(new, ACL_FIRST_ENTRY, &i);
+ r > 0;
+ r = acl_get_entry(new, ACL_NEXT_ENTRY, &i)) {
+
+ acl_entry_t j;
+
+ r = find_acl_entry(old, i, &j);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ if (acl_create_entry(&old, &j) < 0)
+ return -errno;
+
+ if (acl_copy_entry(j, i) < 0)
+ return -errno;
+ }
+ if (r < 0)
+ return -errno;
+
+ *acl = TAKE_PTR(old);
+
+ return 0;
+}
+
+/* POSIX says that ACL_{READ,WRITE,EXECUTE} don't have to be bitmasks. But that is a natural thing to do and
+ * all extant implementations do it. Let's make sure that we fail verbosely in the (imho unlikely) scenario
+ * that we get a new implementation that does not satisfy this. */
+assert_cc(!(ACL_READ & ACL_WRITE));
+assert_cc(!(ACL_WRITE & ACL_EXECUTE));
+assert_cc(!(ACL_EXECUTE & ACL_READ));
+assert_cc((unsigned) ACL_READ == ACL_READ);
+assert_cc((unsigned) ACL_WRITE == ACL_WRITE);
+assert_cc((unsigned) ACL_EXECUTE == ACL_EXECUTE);
+
+int fd_add_uid_acl_permission(
+ int fd,
+ uid_t uid,
+ unsigned mask) {
+
+ _cleanup_(acl_freep) acl_t acl = NULL;
+ acl_permset_t permset;
+ acl_entry_t entry;
+ int r;
+
+ /* Adds an ACL entry for the specified file to allow the indicated access to the specified
+ * user. Operates purely incrementally. */
+
+ assert(fd >= 0);
+ assert(uid_is_valid(uid));
+
+ acl = acl_get_fd(fd);
+ if (!acl)
+ return -errno;
+
+ r = acl_find_uid(acl, uid, &entry);
+ if (r <= 0) {
+ if (acl_create_entry(&acl, &entry) < 0 ||
+ acl_set_tag_type(entry, ACL_USER) < 0 ||
+ acl_set_qualifier(entry, &uid) < 0)
+ return -errno;
+ }
+
+ if (acl_get_permset(entry, &permset) < 0)
+ return -errno;
+
+ if ((mask & ACL_READ) && acl_add_perm(permset, ACL_READ) < 0)
+ return -errno;
+ if ((mask & ACL_WRITE) && acl_add_perm(permset, ACL_WRITE) < 0)
+ return -errno;
+ if ((mask & ACL_EXECUTE) && acl_add_perm(permset, ACL_EXECUTE) < 0)
+ return -errno;
+
+ r = calc_acl_mask_if_needed(&acl);
+ if (r < 0)
+ return r;
+
+ if (acl_set_fd(fd, acl) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/acl-util.h b/src/shared/acl-util.h
new file mode 100644
index 0000000..837e869
--- /dev/null
+++ b/src/shared/acl-util.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+#include <unistd.h>
+
+#if HAVE_ACL
+#include <acl/libacl.h>
+#include <stdbool.h>
+#include <sys/acl.h>
+
+#include "macro.h"
+
+int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry);
+int calc_acl_mask_if_needed(acl_t *acl_p);
+int add_base_acls_if_needed(acl_t *acl_p, const char *path);
+int acl_search_groups(const char* path, char ***ret_groups);
+int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask);
+int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl);
+int fd_add_uid_acl_permission(int fd, uid_t uid, unsigned mask);
+
+/* acl_free takes multiple argument types.
+ * Multiple cleanup functions are necessary. */
+DEFINE_TRIVIAL_CLEANUP_FUNC(acl_t, acl_free);
+#define acl_free_charp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, acl_free_charp);
+#define acl_free_uid_tp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(uid_t*, acl_free_uid_tp);
+#define acl_free_gid_tp acl_free
+DEFINE_TRIVIAL_CLEANUP_FUNC(gid_t*, acl_free_gid_tp);
+
+#else
+#define ACL_READ 0x04
+#define ACL_WRITE 0x02
+#define ACL_EXECUTE 0x01
+
+static inline int fd_add_uid_acl_permission(int fd, uid_t uid, unsigned mask) {
+ return -EOPNOTSUPP;
+}
+#endif
diff --git a/src/shared/acpi-fpdt.c b/src/shared/acpi-fpdt.c
new file mode 100644
index 0000000..1124453
--- /dev/null
+++ b/src/shared/acpi-fpdt.c
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "acpi-fpdt.h"
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "time-util.h"
+
+struct acpi_table_header {
+ char signature[4];
+ uint32_t length;
+ uint8_t revision;
+ uint8_t checksum;
+ char oem_id[6];
+ char oem_table_id[8];
+ uint32_t oem_revision;
+ char asl_compiler_id[4];
+ uint32_t asl_compiler_revision;
+} _packed_;
+
+enum {
+ ACPI_FPDT_TYPE_BOOT = 0,
+ ACPI_FPDT_TYPE_S3PERF = 1,
+};
+
+struct acpi_fpdt_header {
+ uint16_t type;
+ uint8_t length;
+ uint8_t revision;
+ uint8_t reserved[4];
+ uint64_t ptr;
+} _packed_;
+
+struct acpi_fpdt_boot_header {
+ char signature[4];
+ uint32_t length;
+} _packed_;
+
+enum {
+ ACPI_FPDT_S3PERF_RESUME_REC = 0,
+ ACPI_FPDT_S3PERF_SUSPEND_REC = 1,
+ ACPI_FPDT_BOOT_REC = 2,
+};
+
+struct acpi_fpdt_boot {
+ uint16_t type;
+ uint8_t length;
+ uint8_t revision;
+ uint8_t reserved[4];
+ uint64_t reset_end;
+ uint64_t load_start;
+ uint64_t startup_start;
+ uint64_t exit_services_entry;
+ uint64_t exit_services_exit;
+} _packed;
+
+int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit) {
+ _cleanup_free_ char *buf = NULL;
+ struct acpi_table_header *tbl;
+ size_t l = 0;
+ struct acpi_fpdt_header *rec;
+ int r;
+ uint64_t ptr = 0;
+ _cleanup_close_ int fd = -1;
+ struct acpi_fpdt_boot_header hbrec;
+ struct acpi_fpdt_boot brec;
+
+ r = read_full_file("/sys/firmware/acpi/tables/FPDT", &buf, &l);
+ if (r < 0)
+ return r;
+
+ if (l < sizeof(struct acpi_table_header) + sizeof(struct acpi_fpdt_header))
+ return -EINVAL;
+
+ tbl = (struct acpi_table_header *)buf;
+ if (l != tbl->length)
+ return -EINVAL;
+
+ if (memcmp(tbl->signature, "FPDT", 4) != 0)
+ return -EINVAL;
+
+ /* find Firmware Basic Boot Performance Pointer Record */
+ for (rec = (struct acpi_fpdt_header *)(buf + sizeof(struct acpi_table_header));
+ (char *)rec < buf + l;
+ rec = (struct acpi_fpdt_header *)((char *)rec + rec->length)) {
+ if (rec->length <= 0)
+ break;
+ if (rec->type != ACPI_FPDT_TYPE_BOOT)
+ continue;
+ if (rec->length != sizeof(struct acpi_fpdt_header))
+ continue;
+
+ ptr = rec->ptr;
+ break;
+ }
+
+ if (ptr == 0)
+ return -ENODATA;
+
+ /* read Firmware Basic Boot Performance Data Record */
+ fd = open("/dev/mem", O_CLOEXEC|O_RDONLY);
+ if (fd < 0)
+ return -errno;
+
+ l = pread(fd, &hbrec, sizeof(struct acpi_fpdt_boot_header), ptr);
+ if (l != sizeof(struct acpi_fpdt_boot_header))
+ return -EINVAL;
+
+ if (memcmp(hbrec.signature, "FBPT", 4) != 0)
+ return -EINVAL;
+
+ if (hbrec.length < sizeof(struct acpi_fpdt_boot_header) + sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ l = pread(fd, &brec, sizeof(struct acpi_fpdt_boot), ptr + sizeof(struct acpi_fpdt_boot_header));
+ if (l != sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ if (brec.length != sizeof(struct acpi_fpdt_boot))
+ return -EINVAL;
+
+ if (brec.type != ACPI_FPDT_BOOT_REC)
+ return -EINVAL;
+
+ if (brec.exit_services_exit == 0)
+ /* Non-UEFI compatible boot. */
+ return -ENODATA;
+
+ if (brec.startup_start == 0 || brec.exit_services_exit < brec.startup_start)
+ return -EINVAL;
+ if (brec.exit_services_exit > NSEC_PER_HOUR)
+ return -EINVAL;
+
+ if (loader_start)
+ *loader_start = brec.startup_start / 1000;
+ if (loader_exit)
+ *loader_exit = brec.exit_services_exit / 1000;
+
+ return 0;
+}
diff --git a/src/shared/acpi-fpdt.h b/src/shared/acpi-fpdt.h
new file mode 100644
index 0000000..9eef92b
--- /dev/null
+++ b/src/shared/acpi-fpdt.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <time-util.h>
+
+int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit);
diff --git a/src/shared/apparmor-util.c b/src/shared/apparmor-util.c
new file mode 100644
index 0000000..68e1c55
--- /dev/null
+++ b/src/shared/apparmor-util.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stddef.h>
+
+#include "alloc-util.h"
+#include "apparmor-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+
+bool mac_apparmor_use(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ cached_use =
+ read_one_line_file("/sys/module/apparmor/parameters/enabled", &p) >= 0 &&
+ parse_boolean(p) > 0;
+ }
+
+ return cached_use;
+}
diff --git a/src/shared/apparmor-util.h b/src/shared/apparmor-util.h
new file mode 100644
index 0000000..8007aeb
--- /dev/null
+++ b/src/shared/apparmor-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+bool mac_apparmor_use(void);
diff --git a/src/shared/ask-password-api.c b/src/shared/ask-password-api.c
new file mode 100644
index 0000000..8d66f9f
--- /dev/null
+++ b/src/shared/ask-password-api.c
@@ -0,0 +1,1050 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/inotify.h>
+#include <sys/signalfd.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/un.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "ask-password-api.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "io-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "mkdir.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "tmpfile-util.h"
+#include "umask-util.h"
+#include "utf8.h"
+
+#define KEYRING_TIMEOUT_USEC ((5 * USEC_PER_MINUTE) / 2)
+
+static int lookup_key(const char *keyname, key_serial_t *ret) {
+ key_serial_t serial;
+
+ assert(keyname);
+ assert(ret);
+
+ serial = request_key("user", keyname, NULL, 0);
+ if (serial == -1)
+ return negative_errno();
+
+ *ret = serial;
+ return 0;
+}
+
+static int retrieve_key(key_serial_t serial, char ***ret) {
+ size_t nfinal, m = 100;
+ char **l;
+ _cleanup_(erase_and_freep) char *pfinal = NULL;
+
+ assert(ret);
+
+ for (;;) {
+ _cleanup_(erase_and_freep) char *p = NULL;
+ long n;
+
+ p = new(char, m);
+ if (!p)
+ return -ENOMEM;
+
+ n = keyctl(KEYCTL_READ, (unsigned long) serial, (unsigned long) p, (unsigned long) m, 0);
+ if (n < 0)
+ return -errno;
+ if ((size_t) n <= m) {
+ nfinal = (size_t) n;
+ pfinal = TAKE_PTR(p);
+ break;
+ }
+
+ if (m > LONG_MAX / 2) /* overflow check */
+ return -ENOMEM;
+ m *= 2;
+ }
+
+ l = strv_parse_nulstr(pfinal, nfinal);
+ if (!l)
+ return -ENOMEM;
+
+ *ret = l;
+ return 0;
+}
+
+static int add_to_keyring(const char *keyname, AskPasswordFlags flags, char **passwords) {
+ _cleanup_strv_free_erase_ char **l = NULL;
+ _cleanup_(erase_and_freep) char *p = NULL;
+ key_serial_t serial;
+ size_t n;
+ int r;
+
+ assert(keyname);
+
+ if (!(flags & ASK_PASSWORD_PUSH_CACHE))
+ return 0;
+ if (strv_isempty(passwords))
+ return 0;
+
+ r = lookup_key(keyname, &serial);
+ if (r >= 0) {
+ r = retrieve_key(serial, &l);
+ if (r < 0)
+ return r;
+ } else if (r != -ENOKEY)
+ return r;
+
+ r = strv_extend_strv(&l, passwords, true);
+ if (r <= 0)
+ return r;
+
+ r = strv_make_nulstr(l, &p, &n);
+ if (r < 0)
+ return r;
+
+ serial = add_key("user", keyname, p, n, KEY_SPEC_USER_KEYRING);
+ if (serial == -1)
+ return -errno;
+
+ if (keyctl(KEYCTL_SET_TIMEOUT,
+ (unsigned long) serial,
+ (unsigned long) DIV_ROUND_UP(KEYRING_TIMEOUT_USEC, USEC_PER_SEC), 0, 0) < 0)
+ log_debug_errno(errno, "Failed to adjust kernel keyring key timeout: %m");
+
+ /* Tell everyone to check the keyring */
+ (void) touch("/run/systemd/ask-password");
+
+ log_debug("Added key to kernel keyring as %" PRIi32 ".", serial);
+
+ return 1;
+}
+
+static int add_to_keyring_and_log(const char *keyname, AskPasswordFlags flags, char **passwords) {
+ int r;
+
+ assert(keyname);
+
+ r = add_to_keyring(keyname, flags, passwords);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add password to kernel keyring: %m");
+
+ return 0;
+}
+
+static int ask_password_keyring(const char *keyname, AskPasswordFlags flags, char ***ret) {
+
+ key_serial_t serial;
+ int r;
+
+ assert(keyname);
+ assert(ret);
+
+ if (!(flags & ASK_PASSWORD_ACCEPT_CACHED))
+ return -EUNATCH;
+
+ r = lookup_key(keyname, &serial);
+ if (ERRNO_IS_NOT_SUPPORTED(r) || r == -EPERM) /* when retrieving the distinction between "kernel or
+ * container manager don't support or allow this" and
+ * "no matching key known" doesn't matter. Note that we
+ * propagate EACCESS here (even if EPERM not) since
+ * that is used if the keyring is available but we lack
+ * access to the key. */
+ return -ENOKEY;
+ if (r < 0)
+ return r;
+
+ return retrieve_key(serial, ret);
+}
+
+static int backspace_chars(int ttyfd, size_t p) {
+ if (ttyfd < 0)
+ return 0;
+
+ _cleanup_free_ char *buf = malloc_multiply(3, p);
+ if (!buf)
+ return log_oom();
+
+ for (size_t i = 0; i < p; i++)
+ memcpy(buf + 3 * i, "\b \b", 3);
+
+ return loop_write(ttyfd, buf, 3*p, false);
+}
+
+static int backspace_string(int ttyfd, const char *str) {
+ assert(str);
+
+ /* Backspaces through enough characters to entirely undo printing of the specified string. */
+
+ if (ttyfd < 0)
+ return 0;
+
+ size_t m = utf8_n_codepoints(str);
+ if (m == (size_t) -1)
+ m = strlen(str); /* Not a valid UTF-8 string? If so, let's backspace the number of bytes
+ * output. Most likely this happened because we are not in an UTF-8 locale,
+ * and in that case that is the correct thing to do. And even if it's not,
+ * terminals tend to stop backspacing at the leftmost column, hence
+ * backspacing too much should be mostly OK. */
+
+ return backspace_chars(ttyfd, m);
+}
+
+int ask_password_plymouth(
+ const char *message,
+ usec_t until,
+ AskPasswordFlags flags,
+ const char *flag_file,
+ char ***ret) {
+
+ static const union sockaddr_union sa = PLYMOUTH_SOCKET;
+ _cleanup_close_ int fd = -1, notify = -1;
+ _cleanup_free_ char *packet = NULL;
+ ssize_t k;
+ int r, n;
+ struct pollfd pollfd[2] = {};
+ char buffer[LINE_MAX];
+ size_t p = 0;
+ enum {
+ POLL_SOCKET,
+ POLL_INOTIFY
+ };
+
+ assert(ret);
+
+ if (!message)
+ message = "Password:";
+
+ if (flag_file) {
+ notify = inotify_init1(IN_CLOEXEC|IN_NONBLOCK);
+ if (notify < 0)
+ return -errno;
+
+ r = inotify_add_watch(notify, flag_file, IN_ATTRIB); /* for the link count */
+ if (r < 0)
+ return -errno;
+ }
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return -errno;
+
+ if (flags & ASK_PASSWORD_ACCEPT_CACHED) {
+ packet = strdup("c");
+ n = 1;
+ } else if (asprintf(&packet, "*\002%c%s%n", (int) (strlen(message) + 1), message, &n) < 0)
+ packet = NULL;
+ if (!packet)
+ return -ENOMEM;
+
+ r = loop_write(fd, packet, n + 1, true);
+ if (r < 0)
+ return r;
+
+ pollfd[POLL_SOCKET].fd = fd;
+ pollfd[POLL_SOCKET].events = POLLIN;
+ pollfd[POLL_INOTIFY].fd = notify;
+ pollfd[POLL_INOTIFY].events = POLLIN;
+
+ for (;;) {
+ int sleep_for = -1, j;
+
+ if (until > 0) {
+ usec_t y;
+
+ y = now(CLOCK_MONOTONIC);
+
+ if (y > until) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ sleep_for = (int) ((until - y) / USEC_PER_MSEC);
+ }
+
+ if (flag_file && access(flag_file, F_OK) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ j = poll(pollfd, notify >= 0 ? 2 : 1, sleep_for);
+ if (j < 0) {
+ if (errno == EINTR)
+ continue;
+
+ r = -errno;
+ goto finish;
+ } else if (j == 0) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ if (pollfd[POLL_SOCKET].revents & POLLNVAL ||
+ (notify >= 0 && pollfd[POLL_INOTIFY].revents & POLLNVAL)) {
+ r = -EBADF;
+ goto finish;
+ }
+
+ if (notify >= 0 && pollfd[POLL_INOTIFY].revents != 0)
+ (void) flush_fd(notify);
+
+ if (pollfd[POLL_SOCKET].revents == 0)
+ continue;
+
+ k = read(fd, buffer + p, sizeof(buffer) - p);
+ if (k < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ continue;
+
+ r = -errno;
+ goto finish;
+ } else if (k == 0) {
+ r = -EIO;
+ goto finish;
+ }
+
+ p += k;
+
+ if (buffer[0] == 5) {
+
+ if (flags & ASK_PASSWORD_ACCEPT_CACHED) {
+ /* Hmm, first try with cached
+ * passwords failed, so let's retry
+ * with a normal password request */
+ packet = mfree(packet);
+
+ if (asprintf(&packet, "*\002%c%s%n", (int) (strlen(message) + 1), message, &n) < 0) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = loop_write(fd, packet, n+1, true);
+ if (r < 0)
+ goto finish;
+
+ flags &= ~ASK_PASSWORD_ACCEPT_CACHED;
+ p = 0;
+ continue;
+ }
+
+ /* No password, because UI not shown */
+ r = -ENOENT;
+ goto finish;
+
+ } else if (IN_SET(buffer[0], 2, 9)) {
+ uint32_t size;
+ char **l;
+
+ /* One or more answers */
+ if (p < 5)
+ continue;
+
+ memcpy(&size, buffer+1, sizeof(size));
+ size = le32toh(size);
+ if (size + 5 > sizeof(buffer)) {
+ r = -EIO;
+ goto finish;
+ }
+
+ if (p-5 < size)
+ continue;
+
+ l = strv_parse_nulstr(buffer + 5, size);
+ if (!l) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ *ret = l;
+ break;
+
+ } else {
+ /* Unknown packet */
+ r = -EIO;
+ goto finish;
+ }
+ }
+
+ r = 0;
+
+finish:
+ explicit_bzero_safe(buffer, sizeof(buffer));
+ return r;
+}
+
+#define NO_ECHO "(no echo) "
+#define PRESS_TAB "(press TAB for no echo) "
+#define SKIPPED "(skipped)"
+
+int ask_password_tty(
+ int ttyfd,
+ const char *message,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ const char *flag_file,
+ char ***ret) {
+
+ enum {
+ POLL_TTY,
+ POLL_INOTIFY,
+ _POLL_MAX,
+ };
+
+ bool reset_tty = false, dirty = false, use_color = false, press_tab_visible = false;
+ _cleanup_close_ int cttyfd = -1, notify = -1;
+ struct termios old_termios, new_termios;
+ char passphrase[LINE_MAX + 1] = {}, *x;
+ _cleanup_strv_free_erase_ char **l = NULL;
+ struct pollfd pollfd[_POLL_MAX];
+ size_t p = 0, codepoint = 0;
+ int r;
+
+ assert(ret);
+
+ if (flags & ASK_PASSWORD_NO_TTY)
+ return -EUNATCH;
+
+ if (!message)
+ message = "Password:";
+
+ if (emoji_enabled())
+ message = strjoina(special_glyph(SPECIAL_GLYPH_LOCK_AND_KEY), " ", message);
+
+ if (flag_file || ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname)) {
+ notify = inotify_init1(IN_CLOEXEC|IN_NONBLOCK);
+ if (notify < 0)
+ return -errno;
+ }
+ if (flag_file) {
+ if (inotify_add_watch(notify, flag_file, IN_ATTRIB /* for the link count */) < 0)
+ return -errno;
+ }
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0)
+ return 0;
+ else if (r != -ENOKEY)
+ return r;
+
+ if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0)
+ return -errno;
+ }
+
+ /* If the caller didn't specify a TTY, then use the controlling tty, if we can. */
+ if (ttyfd < 0)
+ ttyfd = cttyfd = open("/dev/tty", O_RDWR|O_NOCTTY|O_CLOEXEC);
+
+ if (ttyfd >= 0) {
+ if (tcgetattr(ttyfd, &old_termios) < 0)
+ return -errno;
+
+ if (flags & ASK_PASSWORD_CONSOLE_COLOR)
+ use_color = dev_console_colors_enabled();
+ else
+ use_color = colors_enabled();
+
+ if (use_color)
+ (void) loop_write(ttyfd, ANSI_HIGHLIGHT, STRLEN(ANSI_HIGHLIGHT), false);
+
+ (void) loop_write(ttyfd, message, strlen(message), false);
+ (void) loop_write(ttyfd, " ", 1, false);
+
+ if (!(flags & ASK_PASSWORD_SILENT) && !(flags & ASK_PASSWORD_ECHO)) {
+ if (use_color)
+ (void) loop_write(ttyfd, ANSI_GREY, STRLEN(ANSI_GREY), false);
+ (void) loop_write(ttyfd, PRESS_TAB, strlen(PRESS_TAB), false);
+ press_tab_visible = true;
+ }
+
+ if (use_color)
+ (void) loop_write(ttyfd, ANSI_NORMAL, STRLEN(ANSI_NORMAL), false);
+
+ new_termios = old_termios;
+ new_termios.c_lflag &= ~(ICANON|ECHO);
+ new_termios.c_cc[VMIN] = 1;
+ new_termios.c_cc[VTIME] = 0;
+
+ if (tcsetattr(ttyfd, TCSADRAIN, &new_termios) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ reset_tty = true;
+ }
+
+ pollfd[POLL_TTY] = (struct pollfd) {
+ .fd = ttyfd >= 0 ? ttyfd : STDIN_FILENO,
+ .events = POLLIN,
+ };
+ pollfd[POLL_INOTIFY] = (struct pollfd) {
+ .fd = notify,
+ .events = POLLIN,
+ };
+
+ for (;;) {
+ _cleanup_(erase_char) char c;
+ int sleep_for = -1, k;
+ ssize_t n;
+
+ if (until > 0) {
+ usec_t y;
+
+ y = now(CLOCK_MONOTONIC);
+
+ if (y > until) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ sleep_for = (int) DIV_ROUND_UP(until - y, USEC_PER_MSEC);
+ }
+
+ if (flag_file)
+ if (access(flag_file, F_OK) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ k = poll(pollfd, notify >= 0 ? 2 : 1, sleep_for);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ r = -errno;
+ goto finish;
+ } else if (k == 0) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ if ((pollfd[POLL_TTY].revents & POLLNVAL) ||
+ (notify >= 0 && (pollfd[POLL_INOTIFY].revents & POLLNVAL))) {
+ r = -EBADF;
+ goto finish;
+ }
+
+ if (notify >= 0 && pollfd[POLL_INOTIFY].revents != 0 && keyname) {
+ (void) flush_fd(notify);
+
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+ }
+
+ if (pollfd[POLL_TTY].revents == 0)
+ continue;
+
+ n = read(ttyfd >= 0 ? ttyfd : STDIN_FILENO, &c, 1);
+ if (n < 0) {
+ if (IN_SET(errno, EINTR, EAGAIN))
+ continue;
+
+ r = -errno;
+ goto finish;
+
+ }
+
+ if (press_tab_visible) {
+ assert(ttyfd >= 0);
+ backspace_chars(ttyfd, strlen(PRESS_TAB));
+ press_tab_visible = false;
+ }
+
+ /* We treat EOF, newline and NUL byte all as valid end markers */
+ if (n == 0 || c == '\n' || c == 0)
+ break;
+
+ if (c == 4) { /* C-d also known as EOT */
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, SKIPPED, strlen(SKIPPED), false);
+
+ goto skipped;
+ }
+
+ if (c == 21) { /* C-u */
+
+ if (!(flags & ASK_PASSWORD_SILENT))
+ (void) backspace_string(ttyfd, passphrase);
+
+ explicit_bzero_safe(passphrase, sizeof(passphrase));
+ p = codepoint = 0;
+
+ } else if (IN_SET(c, '\b', 127)) {
+
+ if (p > 0) {
+ size_t q;
+
+ if (!(flags & ASK_PASSWORD_SILENT))
+ (void) backspace_chars(ttyfd, 1);
+
+ /* Remove a full UTF-8 codepoint from the end. For that, figure out where the
+ * last one begins */
+ q = 0;
+ for (;;) {
+ size_t z;
+
+ z = utf8_encoded_valid_unichar(passphrase + q, (size_t) -1);
+ if (z == 0) {
+ q = (size_t) -1; /* Invalid UTF8! */
+ break;
+ }
+
+ if (q + z >= p) /* This one brings us over the edge */
+ break;
+
+ q += z;
+ }
+
+ p = codepoint = q == (size_t) -1 ? p - 1 : q;
+ explicit_bzero_safe(passphrase + p, sizeof(passphrase) - p);
+
+ } else if (!dirty && !(flags & ASK_PASSWORD_SILENT)) {
+
+ flags |= ASK_PASSWORD_SILENT;
+
+ /* There are two ways to enter silent mode. Either by pressing backspace as
+ * first key (and only as first key), or ... */
+
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, NO_ECHO, strlen(NO_ECHO), false);
+
+ } else if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "\a", 1, false);
+
+ } else if (c == '\t' && !(flags & ASK_PASSWORD_SILENT)) {
+
+ (void) backspace_string(ttyfd, passphrase);
+ flags |= ASK_PASSWORD_SILENT;
+
+ /* ... or by pressing TAB at any time. */
+
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, NO_ECHO, strlen(NO_ECHO), false);
+
+ } else if (p >= sizeof(passphrase)-1) {
+
+ /* Reached the size limit */
+ if (ttyfd >= 0)
+ (void) loop_write(ttyfd, "\a", 1, false);
+
+ } else {
+ passphrase[p++] = c;
+
+ if (!(flags & ASK_PASSWORD_SILENT) && ttyfd >= 0) {
+ /* Check if we got a complete UTF-8 character now. If so, let's output one '*'. */
+ n = utf8_encoded_valid_unichar(passphrase + codepoint, (size_t) -1);
+ if (n >= 0) {
+ if (flags & ASK_PASSWORD_ECHO)
+ (void) loop_write(ttyfd, passphrase + codepoint, n, false);
+ else
+ (void) loop_write(ttyfd, "*", 1, false);
+ codepoint = p;
+ }
+ }
+
+ dirty = true;
+ }
+ }
+
+ x = strndup(passphrase, p);
+ explicit_bzero_safe(passphrase, sizeof(passphrase));
+ if (!x) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = strv_consume(&l, x);
+ if (r < 0)
+ goto finish;
+
+skipped:
+ if (strv_isempty(l))
+ r = log_debug_errno(SYNTHETIC_ERRNO(ECANCELED), "Password query was cancelled.");
+ else {
+ if (keyname)
+ (void) add_to_keyring_and_log(keyname, flags, l);
+
+ *ret = TAKE_PTR(l);
+ r = 0;
+ }
+
+finish:
+ if (ttyfd >= 0 && reset_tty) {
+ (void) loop_write(ttyfd, "\n", 1, false);
+ (void) tcsetattr(ttyfd, TCSADRAIN, &old_termios);
+ }
+
+ return r;
+}
+
+static int create_socket(char **ret) {
+ _cleanup_free_ char *path = NULL;
+ union sockaddr_union sa;
+ socklen_t sa_len;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(ret);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (asprintf(&path, "/run/systemd/ask-password/sck.%" PRIx64, random_u64()) < 0)
+ return -ENOMEM;
+
+ r = sockaddr_un_set_path(&sa.un, path);
+ if (r < 0)
+ return r;
+ sa_len = r;
+
+ RUN_WITH_UMASK(0177)
+ if (bind(fd, &sa.sa, sa_len) < 0)
+ return -errno;
+
+ r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(path);
+ return TAKE_FD(fd);
+}
+
+int ask_password_agent(
+ const char *message,
+ const char *icon,
+ const char *id,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ char ***ret) {
+
+ enum {
+ FD_SOCKET,
+ FD_SIGNAL,
+ FD_INOTIFY,
+ _FD_MAX
+ };
+
+ _cleanup_close_ int socket_fd = -1, signal_fd = -1, notify = -1, fd = -1;
+ char temp[] = "/run/systemd/ask-password/tmp.XXXXXX";
+ char final[sizeof(temp)] = "";
+ _cleanup_free_ char *socket_name = NULL;
+ _cleanup_strv_free_erase_ char **l = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ struct pollfd pollfd[_FD_MAX];
+ sigset_t mask, oldmask;
+ int r;
+
+ assert(ret);
+
+ if (flags & ASK_PASSWORD_NO_AGENT)
+ return -EUNATCH;
+
+ assert_se(sigemptyset(&mask) >= 0);
+ assert_se(sigset_add_many(&mask, SIGINT, SIGTERM, -1) >= 0);
+ assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) >= 0);
+
+ (void) mkdir_p_label("/run/systemd/ask-password", 0755);
+
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+
+ notify = inotify_init1(IN_CLOEXEC | IN_NONBLOCK);
+ if (notify < 0) {
+ r = -errno;
+ goto finish;
+ }
+ if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+
+ fd = mkostemp_safe(temp);
+ if (fd < 0) {
+ r = fd;
+ goto finish;
+ }
+
+ (void) fchmod(fd, 0644);
+
+ f = take_fdopen(&fd, "w");
+ if (!f) {
+ r = -errno;
+ goto finish;
+ }
+
+ signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (signal_fd < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ socket_fd = create_socket(&socket_name);
+ if (socket_fd < 0) {
+ r = socket_fd;
+ goto finish;
+ }
+
+ fprintf(f,
+ "[Ask]\n"
+ "PID="PID_FMT"\n"
+ "Socket=%s\n"
+ "AcceptCached=%i\n"
+ "Echo=%i\n"
+ "NotAfter="USEC_FMT"\n",
+ getpid_cached(),
+ socket_name,
+ (flags & ASK_PASSWORD_ACCEPT_CACHED) ? 1 : 0,
+ (flags & ASK_PASSWORD_ECHO) ? 1 : 0,
+ until);
+
+ if (message)
+ fprintf(f, "Message=%s\n", message);
+
+ if (icon)
+ fprintf(f, "Icon=%s\n", icon);
+
+ if (id)
+ fprintf(f, "Id=%s\n", id);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ goto finish;
+
+ memcpy(final, temp, sizeof(temp));
+
+ final[sizeof(final)-11] = 'a';
+ final[sizeof(final)-10] = 's';
+ final[sizeof(final)-9] = 'k';
+
+ if (rename(temp, final) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ zero(pollfd);
+ pollfd[FD_SOCKET].fd = socket_fd;
+ pollfd[FD_SOCKET].events = POLLIN;
+ pollfd[FD_SIGNAL].fd = signal_fd;
+ pollfd[FD_SIGNAL].events = POLLIN;
+ pollfd[FD_INOTIFY].fd = notify;
+ pollfd[FD_INOTIFY].events = POLLIN;
+
+ for (;;) {
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
+ char passphrase[LINE_MAX+1];
+ struct iovec iovec;
+ struct ucred *ucred;
+ ssize_t n;
+ int k;
+ usec_t t;
+
+ t = now(CLOCK_MONOTONIC);
+
+ if (until > 0 && until <= t) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ k = poll(pollfd, notify >= 0 ? _FD_MAX : _FD_MAX - 1, until > 0 ? (int) ((until-t)/USEC_PER_MSEC) : -1);
+ if (k < 0) {
+ if (errno == EINTR)
+ continue;
+
+ r = -errno;
+ goto finish;
+ }
+
+ if (k <= 0) {
+ r = -ETIME;
+ goto finish;
+ }
+
+ if (pollfd[FD_SOCKET].revents & POLLNVAL ||
+ pollfd[FD_SIGNAL].revents & POLLNVAL ||
+ (notify >= 0 && pollfd[FD_INOTIFY].revents & POLLNVAL)) {
+ r = -EBADF;
+ goto finish;
+ }
+
+ if (pollfd[FD_SIGNAL].revents & POLLIN) {
+ r = -EINTR;
+ goto finish;
+ }
+
+ if (notify >= 0 && pollfd[FD_INOTIFY].revents != 0) {
+ (void) flush_fd(notify);
+
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r >= 0) {
+ r = 0;
+ goto finish;
+ } else if (r != -ENOKEY)
+ goto finish;
+ }
+
+ if (pollfd[FD_SOCKET].revents == 0)
+ continue;
+
+ if (pollfd[FD_SOCKET].revents != POLLIN) {
+ r = -EIO;
+ goto finish;
+ }
+
+ iovec = IOVEC_MAKE(passphrase, sizeof(passphrase));
+
+ struct msghdr msghdr = {
+ .msg_iov = &iovec,
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+
+ n = recvmsg_safe(socket_fd, &msghdr, 0);
+ if (IN_SET(n, -EAGAIN, -EINTR))
+ continue;
+ if (n == -EXFULL) {
+ log_debug("Got message with truncated control data, ignoring.");
+ continue;
+ }
+ if (n < 0) {
+ r = (int) n;
+ goto finish;
+ }
+
+ cmsg_close_all(&msghdr);
+
+ if (n <= 0) {
+ log_debug("Message too short");
+ continue;
+ }
+
+ ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred);
+ if (!ucred) {
+ log_debug("Received message without credentials. Ignoring.");
+ continue;
+ }
+
+ if (ucred->uid != 0) {
+ log_debug("Got request from unprivileged user. Ignoring.");
+ continue;
+ }
+
+ if (passphrase[0] == '+') {
+ /* An empty message refers to the empty password */
+ if (n == 1)
+ l = strv_new("");
+ else
+ l = strv_parse_nulstr(passphrase+1, n-1);
+ explicit_bzero_safe(passphrase, n);
+ if (!l) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ if (strv_isempty(l)) {
+ l = strv_free(l);
+ log_debug("Invalid packet");
+ continue;
+ }
+
+ break;
+ }
+
+ if (passphrase[0] == '-') {
+ r = -ECANCELED;
+ goto finish;
+ }
+
+ log_debug("Invalid packet");
+ }
+
+ if (keyname)
+ (void) add_to_keyring_and_log(keyname, flags, l);
+
+ *ret = TAKE_PTR(l);
+ r = 0;
+
+finish:
+ if (socket_name)
+ (void) unlink(socket_name);
+
+ (void) unlink(temp);
+
+ if (final[0])
+ (void) unlink(final);
+
+ assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) == 0);
+ return r;
+}
+
+int ask_password_auto(
+ const char *message,
+ const char *icon,
+ const char *id,
+ const char *keyname,
+ usec_t until,
+ AskPasswordFlags flags,
+ char ***ret) {
+
+ int r;
+
+ assert(ret);
+
+ if ((flags & ASK_PASSWORD_ACCEPT_CACHED) &&
+ keyname &&
+ ((flags & ASK_PASSWORD_NO_TTY) || !isatty(STDIN_FILENO)) &&
+ (flags & ASK_PASSWORD_NO_AGENT)) {
+ r = ask_password_keyring(keyname, flags, ret);
+ if (r != -ENOKEY)
+ return r;
+ }
+
+ if (!(flags & ASK_PASSWORD_NO_TTY) && isatty(STDIN_FILENO))
+ return ask_password_tty(-1, message, keyname, until, flags, NULL, ret);
+
+ if (!(flags & ASK_PASSWORD_NO_AGENT))
+ return ask_password_agent(message, icon, id, keyname, until, flags, ret);
+
+ return -EUNATCH;
+}
diff --git a/src/shared/ask-password-api.h b/src/shared/ask-password-api.h
new file mode 100644
index 0000000..7aac5e5
--- /dev/null
+++ b/src/shared/ask-password-api.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+
+typedef enum AskPasswordFlags {
+ ASK_PASSWORD_ACCEPT_CACHED = 1 << 0,
+ ASK_PASSWORD_PUSH_CACHE = 1 << 1,
+ ASK_PASSWORD_ECHO = 1 << 2, /* show the password literally while reading, instead of "*" */
+ ASK_PASSWORD_SILENT = 1 << 3, /* do no show any password at all while reading */
+ ASK_PASSWORD_NO_TTY = 1 << 4,
+ ASK_PASSWORD_NO_AGENT = 1 << 5,
+ ASK_PASSWORD_CONSOLE_COLOR = 1 << 6, /* Use color if /dev/console points to a console that supports color */
+} AskPasswordFlags;
+
+int ask_password_tty(int tty_fd, const char *message, const char *keyname, usec_t until, AskPasswordFlags flags, const char *flag_file, char ***ret);
+int ask_password_plymouth(const char *message, usec_t until, AskPasswordFlags flags, const char *flag_file, char ***ret);
+int ask_password_agent(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret);
+int ask_password_auto(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret);
diff --git a/src/shared/barrier.c b/src/shared/barrier.c
new file mode 100644
index 0000000..9c93d61
--- /dev/null
+++ b/src/shared/barrier.c
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "barrier.h"
+#include "fd-util.h"
+#include "macro.h"
+
+/**
+ * Barriers
+ * This barrier implementation provides a simple synchronization method based
+ * on file-descriptors that can safely be used between threads and processes. A
+ * barrier object contains 2 shared counters based on eventfd. Both processes
+ * can now place barriers and wait for the other end to reach a random or
+ * specific barrier.
+ * Barriers are numbered, so you can either wait for the other end to reach any
+ * barrier or the last barrier that you placed. This way, you can use barriers
+ * for one-way *and* full synchronization. Note that even-though barriers are
+ * numbered, these numbers are internal and recycled once both sides reached the
+ * same barrier (implemented as a simple signed counter). It is thus not
+ * possible to address barriers by their ID.
+ *
+ * Barrier-API: Both ends can place as many barriers via barrier_place() as
+ * they want and each pair of barriers on both sides will be implicitly linked.
+ * Each side can use the barrier_wait/sync_*() family of calls to wait for the
+ * other side to place a specific barrier. barrier_wait_next() waits until the
+ * other side calls barrier_place(). No links between the barriers are
+ * considered and this simply serves as most basic asynchronous barrier.
+ * barrier_sync_next() is like barrier_wait_next() and waits for the other side
+ * to place their next barrier via barrier_place(). However, it only waits for
+ * barriers that are linked to a barrier we already placed. If the other side
+ * already placed more barriers than we did, barrier_sync_next() returns
+ * immediately.
+ * barrier_sync() extends barrier_sync_next() and waits until the other end
+ * placed as many barriers via barrier_place() as we did. If they already placed
+ * as many as we did (or more), it returns immediately.
+ *
+ * Additionally to basic barriers, an abortion event is available.
+ * barrier_abort() places an abortion event that cannot be undone. An abortion
+ * immediately cancels all placed barriers and replaces them. Any running and
+ * following wait/sync call besides barrier_wait_abortion() will immediately
+ * return false on both sides (otherwise, they always return true).
+ * barrier_abort() can be called multiple times on both ends and will be a
+ * no-op if already called on this side.
+ * barrier_wait_abortion() can be used to wait for the other side to call
+ * barrier_abort() and is the only wait/sync call that does not return
+ * immediately if we aborted outself. It only returns once the other side
+ * called barrier_abort().
+ *
+ * Barriers can be used for in-process and inter-process synchronization.
+ * However, for in-process synchronization you could just use mutexes.
+ * Therefore, main target is IPC and we require both sides to *not* share the FD
+ * table. If that's given, barriers provide target tracking: If the remote side
+ * exit()s, an abortion event is implicitly queued on the other side. This way,
+ * a sync/wait call will be woken up if the remote side crashed or exited
+ * unexpectedly. However, note that these abortion events are only queued if the
+ * barrier-queue has been drained. Therefore, it is safe to place a barrier and
+ * exit. The other side can safely wait on the barrier even though the exit
+ * queued an abortion event. Usually, the abortion event would overwrite the
+ * barrier, however, that's not true for exit-abortion events. Those are only
+ * queued if the barrier-queue is drained (thus, the receiving side has placed
+ * more barriers than the remote side).
+ */
+
+/**
+ * barrier_create() - Initialize a barrier object
+ * @obj: barrier to initialize
+ *
+ * This initializes a barrier object. The caller is responsible of allocating
+ * the memory and keeping it valid. The memory does not have to be zeroed
+ * beforehand.
+ * Two eventfd objects are allocated for each barrier. If allocation fails, an
+ * error is returned.
+ *
+ * If this function fails, the barrier is reset to an invalid state so it is
+ * safe to call barrier_destroy() on the object regardless whether the
+ * initialization succeeded or not.
+ *
+ * The caller is responsible to destroy the object via barrier_destroy() before
+ * releasing the underlying memory.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int barrier_create(Barrier *b) {
+ _cleanup_(barrier_destroyp) Barrier *staging = b;
+ int r;
+
+ assert(b);
+
+ b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+ if (b->me < 0)
+ return -errno;
+
+ b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+ if (b->them < 0)
+ return -errno;
+
+ r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
+ if (r < 0)
+ return -errno;
+
+ staging = NULL;
+ return 0;
+}
+
+/**
+ * barrier_destroy() - Destroy a barrier object
+ * @b: barrier to destroy or NULL
+ *
+ * This destroys a barrier object that has previously been passed to
+ * barrier_create(). The object is released and reset to invalid
+ * state. Therefore, it is safe to call barrier_destroy() multiple
+ * times or even if barrier_create() failed. However, barrier must be
+ * always initialized with BARRIER_NULL.
+ *
+ * If @b is NULL, this is a no-op.
+ */
+void barrier_destroy(Barrier *b) {
+ if (!b)
+ return;
+
+ b->me = safe_close(b->me);
+ b->them = safe_close(b->them);
+ safe_close_pair(b->pipe);
+ b->barriers = 0;
+}
+
+/**
+ * barrier_set_role() - Set the local role of the barrier
+ * @b: barrier to operate on
+ * @role: role to set on the barrier
+ *
+ * This sets the roles on a barrier object. This is needed to know
+ * which side of the barrier you're on. Usually, the parent creates
+ * the barrier via barrier_create() and then calls fork() or clone().
+ * Therefore, the FDs are duplicated and the child retains the same
+ * barrier object.
+ *
+ * Both sides need to call barrier_set_role() after fork() or clone()
+ * are done. If this is not done, barriers will not work correctly.
+ *
+ * Note that barriers could be supported without fork() or clone(). However,
+ * this is currently not needed so it hasn't been implemented.
+ */
+void barrier_set_role(Barrier *b, unsigned role) {
+ assert(b);
+ assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
+ /* make sure this is only called once */
+ assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
+
+ if (role == BARRIER_PARENT)
+ b->pipe[1] = safe_close(b->pipe[1]);
+ else {
+ b->pipe[0] = safe_close(b->pipe[0]);
+
+ /* swap me/them for children */
+ SWAP_TWO(b->me, b->them);
+ }
+}
+
+/* places barrier; returns false if we aborted, otherwise true */
+static bool barrier_write(Barrier *b, uint64_t buf) {
+ ssize_t len;
+
+ /* prevent new sync-points if we already aborted */
+ if (barrier_i_aborted(b))
+ return false;
+
+ assert(b->me >= 0);
+ do {
+ len = write(b->me, &buf, sizeof(buf));
+ } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
+
+ if (len != sizeof(buf))
+ goto error;
+
+ /* lock if we aborted */
+ if (buf >= (uint64_t)BARRIER_ABORTION) {
+ if (barrier_they_aborted(b))
+ b->barriers = BARRIER_WE_ABORTED;
+ else
+ b->barriers = BARRIER_I_ABORTED;
+ } else if (!barrier_is_aborted(b))
+ b->barriers += buf;
+
+ return !barrier_i_aborted(b);
+
+error:
+ /* If there is an unexpected error, we have to make this fatal. There
+ * is no way we can recover from sync-errors. Therefore, we close the
+ * pipe-ends and treat this as abortion. The other end will notice the
+ * pipe-close and treat it as abortion, too. */
+
+ safe_close_pair(b->pipe);
+ b->barriers = BARRIER_WE_ABORTED;
+ return false;
+}
+
+/* waits for barriers; returns false if they aborted, otherwise true */
+static bool barrier_read(Barrier *b, int64_t comp) {
+ if (barrier_they_aborted(b))
+ return false;
+
+ while (b->barriers > comp) {
+ struct pollfd pfd[2] = {
+ { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
+ .events = POLLHUP },
+ { .fd = b->them,
+ .events = POLLIN }};
+ uint64_t buf;
+ int r;
+
+ r = poll(pfd, ELEMENTSOF(pfd), -1);
+ if (r < 0) {
+ if (IN_SET(errno, EAGAIN, EINTR))
+ continue;
+ goto error;
+ }
+ if (pfd[0].revents & POLLNVAL ||
+ pfd[1].revents & POLLNVAL)
+ goto error;
+
+ if (pfd[1].revents) {
+ ssize_t len;
+
+ /* events on @them signal new data for us */
+ len = read(b->them, &buf, sizeof(buf));
+ if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
+ continue;
+
+ if (len != sizeof(buf))
+ goto error;
+ } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
+ /* POLLHUP on the pipe tells us the other side exited.
+ * We treat this as implicit abortion. But we only
+ * handle it if there's no event on the eventfd. This
+ * guarantees that exit-abortions do not overwrite real
+ * barriers. */
+ buf = BARRIER_ABORTION;
+ else
+ continue;
+
+ /* lock if they aborted */
+ if (buf >= (uint64_t)BARRIER_ABORTION) {
+ if (barrier_i_aborted(b))
+ b->barriers = BARRIER_WE_ABORTED;
+ else
+ b->barriers = BARRIER_THEY_ABORTED;
+ } else if (!barrier_is_aborted(b))
+ b->barriers -= buf;
+ }
+
+ return !barrier_they_aborted(b);
+
+error:
+ /* If there is an unexpected error, we have to make this fatal. There
+ * is no way we can recover from sync-errors. Therefore, we close the
+ * pipe-ends and treat this as abortion. The other end will notice the
+ * pipe-close and treat it as abortion, too. */
+
+ safe_close_pair(b->pipe);
+ b->barriers = BARRIER_WE_ABORTED;
+ return false;
+}
+
+/**
+ * barrier_place() - Place a new barrier
+ * @b: barrier object
+ *
+ * This places a new barrier on the barrier object. If either side already
+ * aborted, this is a no-op and returns "false". Otherwise, the barrier is
+ * placed and this returns "true".
+ *
+ * Returns: true if barrier was placed, false if either side aborted.
+ */
+bool barrier_place(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_write(b, BARRIER_SINGLE);
+ return true;
+}
+
+/**
+ * barrier_abort() - Abort the synchronization
+ * @b: barrier object to abort
+ *
+ * This aborts the barrier-synchronization. If barrier_abort() was already
+ * called on this side, this is a no-op. Otherwise, the barrier is put into the
+ * ABORT-state and will stay there. The other side is notified about the
+ * abortion. Any following attempt to place normal barriers or to wait on normal
+ * barriers will return immediately as "false".
+ *
+ * You can wait for the other side to call barrier_abort(), too. Use
+ * barrier_wait_abortion() for that.
+ *
+ * Returns: false if the other side already aborted, true otherwise.
+ */
+bool barrier_abort(Barrier *b) {
+ assert(b);
+
+ barrier_write(b, BARRIER_ABORTION);
+ return !barrier_they_aborted(b);
+}
+
+/**
+ * barrier_wait_next() - Wait for the next barrier of the other side
+ * @b: barrier to operate on
+ *
+ * This waits until the other side places its next barrier. This is independent
+ * of any barrier-links and just waits for any next barrier of the other side.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_wait_next(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, b->barriers - 1);
+ return !barrier_is_aborted(b);
+}
+
+/**
+ * barrier_wait_abortion() - Wait for the other side to abort
+ * @b: barrier to operate on
+ *
+ * This waits until the other side called barrier_abort(). This can be called
+ * regardless whether the local side already called barrier_abort() or not.
+ *
+ * If the other side has already aborted, this returns immediately.
+ *
+ * Returns: false if the local side aborted, true otherwise.
+ */
+bool barrier_wait_abortion(Barrier *b) {
+ assert(b);
+
+ barrier_read(b, BARRIER_THEY_ABORTED);
+ return !barrier_i_aborted(b);
+}
+
+/**
+ * barrier_sync_next() - Wait for the other side to place a next linked barrier
+ * @b: barrier to operate on
+ *
+ * This is like barrier_wait_next() and waits for the other side to call
+ * barrier_place(). However, this only waits for linked barriers. That means, if
+ * the other side already placed more barriers than (or as much as) we did, this
+ * returns immediately instead of waiting.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_sync_next(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, MAX((int64_t)0, b->barriers - 1));
+ return !barrier_is_aborted(b);
+}
+
+/**
+ * barrier_sync() - Wait for the other side to place as many barriers as we did
+ * @b: barrier to operate on
+ *
+ * This is like barrier_sync_next() but waits for the other side to call
+ * barrier_place() as often as we did (in total). If they already placed as much
+ * as we did (or more), this returns immediately instead of waiting.
+ *
+ * If either side aborted, this returns false.
+ *
+ * Returns: false if either side aborted, true otherwise.
+ */
+bool barrier_sync(Barrier *b) {
+ assert(b);
+
+ if (barrier_is_aborted(b))
+ return false;
+
+ barrier_read(b, 0);
+ return !barrier_is_aborted(b);
+}
diff --git a/src/shared/barrier.h b/src/shared/barrier.h
new file mode 100644
index 0000000..b11dce4
--- /dev/null
+++ b/src/shared/barrier.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "macro.h"
+
+/* See source file for an API description. */
+
+typedef struct Barrier Barrier;
+
+enum {
+ BARRIER_SINGLE = 1LL,
+ BARRIER_ABORTION = INT64_MAX,
+
+ /* bias values to store state; keep @WE < @THEY < @I */
+ BARRIER_BIAS = INT64_MIN,
+ BARRIER_WE_ABORTED = BARRIER_BIAS + 1LL,
+ BARRIER_THEY_ABORTED = BARRIER_BIAS + 2LL,
+ BARRIER_I_ABORTED = BARRIER_BIAS + 3LL,
+};
+
+enum {
+ BARRIER_PARENT,
+ BARRIER_CHILD,
+};
+
+struct Barrier {
+ int me;
+ int them;
+ int pipe[2];
+ int64_t barriers;
+};
+
+#define BARRIER_NULL {-1, -1, {-1, -1}, 0}
+
+int barrier_create(Barrier *obj);
+void barrier_destroy(Barrier *b);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Barrier*, barrier_destroy);
+
+void barrier_set_role(Barrier *b, unsigned role);
+
+bool barrier_place(Barrier *b);
+bool barrier_abort(Barrier *b);
+
+bool barrier_wait_next(Barrier *b);
+bool barrier_wait_abortion(Barrier *b);
+bool barrier_sync_next(Barrier *b);
+bool barrier_sync(Barrier *b);
+
+static inline bool barrier_i_aborted(Barrier *b) {
+ return IN_SET(b->barriers, BARRIER_I_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_they_aborted(Barrier *b) {
+ return IN_SET(b->barriers, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_we_aborted(Barrier *b) {
+ return b->barriers == BARRIER_WE_ABORTED;
+}
+
+static inline bool barrier_is_aborted(Barrier *b) {
+ return IN_SET(b->barriers,
+ BARRIER_I_ABORTED, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED);
+}
+
+static inline bool barrier_place_and_sync(Barrier *b) {
+ (void) barrier_place(b);
+ return barrier_sync(b);
+}
diff --git a/src/shared/base-filesystem.c b/src/shared/base-filesystem.c
new file mode 100644
index 0000000..1d05409
--- /dev/null
+++ b/src/shared/base-filesystem.c
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "base-filesystem.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "nulstr-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+
+typedef struct BaseFilesystem {
+ const char *dir;
+ mode_t mode;
+ const char *target;
+ const char *exists;
+ bool ignore_failure;
+} BaseFilesystem;
+
+static const BaseFilesystem table[] = {
+ { "bin", 0, "usr/bin\0", NULL },
+ { "lib", 0, "usr/lib\0", NULL },
+ { "root", 0755, NULL, NULL, true },
+ { "sbin", 0, "usr/sbin\0", NULL },
+ { "usr", 0755, NULL, NULL },
+ { "var", 0755, NULL, NULL },
+ { "etc", 0755, NULL, NULL },
+ { "proc", 0755, NULL, NULL, true },
+ { "sys", 0755, NULL, NULL, true },
+ { "dev", 0755, NULL, NULL, true },
+#if defined(__i386__) || defined(__x86_64__)
+ { "lib64", 0, "usr/lib/x86_64-linux-gnu\0"
+ "usr/lib64\0", "ld-linux-x86-64.so.2" },
+#endif
+};
+
+int base_filesystem_create(const char *root, uid_t uid, gid_t gid) {
+ _cleanup_close_ int fd = -1;
+ size_t i;
+ int r;
+
+ fd = open(root, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to open root file system: %m");
+
+ for (i = 0; i < ELEMENTSOF(table); i ++) {
+ if (faccessat(fd, table[i].dir, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
+ continue;
+
+ if (table[i].target) {
+ const char *target = NULL, *s;
+
+ /* check if one of the targets exists */
+ NULSTR_FOREACH(s, table[i].target) {
+ if (faccessat(fd, s, F_OK, AT_SYMLINK_NOFOLLOW) < 0)
+ continue;
+
+ /* check if a specific file exists at the target path */
+ if (table[i].exists) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(s, table[i].exists);
+ if (!p)
+ return log_oom();
+
+ if (faccessat(fd, p, F_OK, AT_SYMLINK_NOFOLLOW) < 0)
+ continue;
+ }
+
+ target = s;
+ break;
+ }
+
+ if (!target)
+ continue;
+
+ if (symlinkat(target, fd, table[i].dir) < 0) {
+ log_full_errno(IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to create symlink at %s/%s: %m", root, table[i].dir);
+
+ if (IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure)
+ continue;
+
+ return -errno;
+ }
+
+ if (uid_is_valid(uid) || gid_is_valid(gid)) {
+ if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "Failed to chown symlink at %s/%s: %m", root, table[i].dir);
+ }
+
+ continue;
+ }
+
+ RUN_WITH_UMASK(0000)
+ r = mkdirat(fd, table[i].dir, table[i].mode);
+ if (r < 0) {
+ log_full_errno(IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to create directory at %s/%s: %m", root, table[i].dir);
+
+ if (IN_SET(errno, EEXIST, EROFS) || table[i].ignore_failure)
+ continue;
+
+ return -errno;
+ }
+
+ if (uid != UID_INVALID || gid != UID_INVALID) {
+ if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0)
+ return log_error_errno(errno, "Failed to chown directory at %s/%s: %m", root, table[i].dir);
+ }
+ }
+
+ return 0;
+}
diff --git a/src/shared/base-filesystem.h b/src/shared/base-filesystem.h
new file mode 100644
index 0000000..a33975f
--- /dev/null
+++ b/src/shared/base-filesystem.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+int base_filesystem_create(const char *root, uid_t uid, gid_t gid);
diff --git a/src/shared/binfmt-util.c b/src/shared/binfmt-util.c
new file mode 100644
index 0000000..724d7f2
--- /dev/null
+++ b/src/shared/binfmt-util.c
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/vfs.h>
+
+#include "binfmt-util.h"
+#include "fileio.h"
+#include "missing_magic.h"
+#include "stat-util.h"
+
+int disable_binfmt(void) {
+ int r;
+
+ /* Flush out all rules. This is important during shutdown to cover for rules using "F", since those
+ * might pin a file and thus block us from unmounting stuff cleanly.
+ *
+ * We are a bit careful here, since binfmt_misc might still be an autofs which we don't want to
+ * trigger. */
+
+ r = path_is_fs_type("/proc/sys/fs/binfmt_misc", BINFMTFS_MAGIC);
+ if (r == 0 || r == -ENOENT) {
+ log_debug("binfmt_misc is not mounted, not detaching entries.");
+ return 0;
+ }
+ if (r < 0)
+ return log_warning_errno(r, "Failed to determine whether binfmt_misc is mounted: %m");
+
+ r = write_string_file("/proc/sys/fs/binfmt_misc/status", "-1", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to unregister binfmt_misc entries: %m");
+
+ log_debug("Unregistered all remaining binfmt_misc entries.");
+ return 0;
+}
diff --git a/src/shared/binfmt-util.h b/src/shared/binfmt-util.h
new file mode 100644
index 0000000..2f008d1
--- /dev/null
+++ b/src/shared/binfmt-util.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int disable_binfmt(void);
diff --git a/src/shared/bitmap.c b/src/shared/bitmap.c
new file mode 100644
index 0000000..5d450c8
--- /dev/null
+++ b/src/shared/bitmap.c
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "bitmap.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "memory-util.h"
+
+/* Bitmaps are only meant to store relatively small numbers
+ * (corresponding to, say, an enum), so it is ok to limit
+ * the max entry. 64k should be plenty. */
+#define BITMAPS_MAX_ENTRY 0xffff
+
+/* This indicates that we reached the end of the bitmap */
+#define BITMAP_END ((unsigned) -1)
+
+#define BITMAP_NUM_TO_OFFSET(n) ((n) / (sizeof(uint64_t) * 8))
+#define BITMAP_NUM_TO_REM(n) ((n) % (sizeof(uint64_t) * 8))
+#define BITMAP_OFFSET_TO_NUM(offset, rem) ((offset) * sizeof(uint64_t) * 8 + (rem))
+
+Bitmap *bitmap_new(void) {
+ return new0(Bitmap, 1);
+}
+
+Bitmap *bitmap_copy(Bitmap *b) {
+ Bitmap *ret;
+
+ ret = bitmap_new();
+ if (!ret)
+ return NULL;
+
+ ret->bitmaps = newdup(uint64_t, b->bitmaps, b->n_bitmaps);
+ if (!ret->bitmaps)
+ return mfree(ret);
+
+ ret->n_bitmaps = ret->bitmaps_allocated = b->n_bitmaps;
+ return ret;
+}
+
+void bitmap_free(Bitmap *b) {
+ if (!b)
+ return;
+
+ free(b->bitmaps);
+ free(b);
+}
+
+int bitmap_ensure_allocated(Bitmap **b) {
+ Bitmap *a;
+
+ assert(b);
+
+ if (*b)
+ return 0;
+
+ a = bitmap_new();
+ if (!a)
+ return -ENOMEM;
+
+ *b = a;
+
+ return 0;
+}
+
+int bitmap_set(Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ assert(b);
+
+ /* we refuse to allocate huge bitmaps */
+ if (n > BITMAPS_MAX_ENTRY)
+ return -ERANGE;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps) {
+ if (!GREEDY_REALLOC0(b->bitmaps, b->bitmaps_allocated, offset + 1))
+ return -ENOMEM;
+
+ b->n_bitmaps = offset + 1;
+ }
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ b->bitmaps[offset] |= bitmask;
+
+ return 0;
+}
+
+void bitmap_unset(Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ if (!b)
+ return;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps)
+ return;
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ b->bitmaps[offset] &= ~bitmask;
+}
+
+bool bitmap_isset(const Bitmap *b, unsigned n) {
+ uint64_t bitmask;
+ unsigned offset;
+
+ if (!b)
+ return false;
+
+ offset = BITMAP_NUM_TO_OFFSET(n);
+
+ if (offset >= b->n_bitmaps)
+ return false;
+
+ bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n);
+
+ return !!(b->bitmaps[offset] & bitmask);
+}
+
+bool bitmap_isclear(const Bitmap *b) {
+ unsigned i;
+
+ if (!b)
+ return true;
+
+ for (i = 0; i < b->n_bitmaps; i++)
+ if (b->bitmaps[i] != 0)
+ return false;
+
+ return true;
+}
+
+void bitmap_clear(Bitmap *b) {
+ if (!b)
+ return;
+
+ b->bitmaps = mfree(b->bitmaps);
+ b->n_bitmaps = 0;
+ b->bitmaps_allocated = 0;
+}
+
+bool bitmap_iterate(const Bitmap *b, Iterator *i, unsigned *n) {
+ uint64_t bitmask;
+ unsigned offset, rem;
+
+ assert(i);
+ assert(n);
+
+ if (!b || i->idx == BITMAP_END)
+ return false;
+
+ offset = BITMAP_NUM_TO_OFFSET(i->idx);
+ rem = BITMAP_NUM_TO_REM(i->idx);
+ bitmask = UINT64_C(1) << rem;
+
+ for (; offset < b->n_bitmaps; offset ++) {
+ if (b->bitmaps[offset]) {
+ for (; bitmask; bitmask <<= 1, rem ++) {
+ if (b->bitmaps[offset] & bitmask) {
+ *n = BITMAP_OFFSET_TO_NUM(offset, rem);
+ i->idx = *n + 1;
+
+ return true;
+ }
+ }
+ }
+
+ rem = 0;
+ bitmask = 1;
+ }
+
+ i->idx = BITMAP_END;
+
+ return false;
+}
+
+bool bitmap_equal(const Bitmap *a, const Bitmap *b) {
+ size_t common_n_bitmaps;
+ const Bitmap *c;
+ unsigned i;
+
+ if (a == b)
+ return true;
+
+ if (!a != !b)
+ return false;
+
+ if (!a)
+ return true;
+
+ common_n_bitmaps = MIN(a->n_bitmaps, b->n_bitmaps);
+ if (memcmp_safe(a->bitmaps, b->bitmaps, sizeof(uint64_t) * common_n_bitmaps) != 0)
+ return false;
+
+ c = a->n_bitmaps > b->n_bitmaps ? a : b;
+ for (i = common_n_bitmaps; i < c->n_bitmaps; i++)
+ if (c->bitmaps[i] != 0)
+ return false;
+
+ return true;
+}
diff --git a/src/shared/bitmap.h b/src/shared/bitmap.h
new file mode 100644
index 0000000..1c305a2
--- /dev/null
+++ b/src/shared/bitmap.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+
+typedef struct Bitmap {
+ uint64_t *bitmaps;
+ size_t n_bitmaps;
+ size_t bitmaps_allocated;
+} Bitmap;
+
+Bitmap *bitmap_new(void);
+Bitmap *bitmap_copy(Bitmap *b);
+int bitmap_ensure_allocated(Bitmap **b);
+void bitmap_free(Bitmap *b);
+
+int bitmap_set(Bitmap *b, unsigned n);
+void bitmap_unset(Bitmap *b, unsigned n);
+bool bitmap_isset(const Bitmap *b, unsigned n);
+bool bitmap_isclear(const Bitmap *b);
+void bitmap_clear(Bitmap *b);
+
+bool bitmap_iterate(const Bitmap *b, Iterator *i, unsigned *n);
+
+bool bitmap_equal(const Bitmap *a, const Bitmap *b);
+
+#define _BITMAP_FOREACH(n, b, i) \
+ for (Iterator i = {}; bitmap_iterate((b), &i, (unsigned*)&(n)); )
+#define BITMAP_FOREACH(n, b) \
+ _BITMAP_FOREACH(n, b, UNIQ_T(i, UNIQ))
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Bitmap*, bitmap_free);
+
+#define _cleanup_bitmap_free_ _cleanup_(bitmap_freep)
diff --git a/src/shared/blkid-util.h b/src/shared/blkid-util.h
new file mode 100644
index 0000000..3f38e9b
--- /dev/null
+++ b/src/shared/blkid-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if HAVE_BLKID
+# include <blkid.h>
+
+# include "macro.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(blkid_probe, blkid_free_probe);
+#endif
diff --git a/src/shared/bond-util.c b/src/shared/bond-util.c
new file mode 100644
index 0000000..e04b201
--- /dev/null
+++ b/src/shared/bond-util.c
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bond-util.h"
+#include "string-table.h"
+
+static const char* const bond_mode_table[_NETDEV_BOND_MODE_MAX] = {
+ [NETDEV_BOND_MODE_BALANCE_RR] = "balance-rr",
+ [NETDEV_BOND_MODE_ACTIVE_BACKUP] = "active-backup",
+ [NETDEV_BOND_MODE_BALANCE_XOR] = "balance-xor",
+ [NETDEV_BOND_MODE_BROADCAST] = "broadcast",
+ [NETDEV_BOND_MODE_802_3AD] = "802.3ad",
+ [NETDEV_BOND_MODE_BALANCE_TLB] = "balance-tlb",
+ [NETDEV_BOND_MODE_BALANCE_ALB] = "balance-alb",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_mode, BondMode);
+
+static const char* const bond_xmit_hash_policy_table[_NETDEV_BOND_XMIT_HASH_POLICY_MAX] = {
+ [NETDEV_BOND_XMIT_HASH_POLICY_LAYER2] = "layer2",
+ [NETDEV_BOND_XMIT_HASH_POLICY_LAYER34] = "layer3+4",
+ [NETDEV_BOND_XMIT_HASH_POLICY_LAYER23] = "layer2+3",
+ [NETDEV_BOND_XMIT_HASH_POLICY_ENCAP23] = "encap2+3",
+ [NETDEV_BOND_XMIT_HASH_POLICY_ENCAP34] = "encap3+4",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_xmit_hash_policy, BondXmitHashPolicy);
+
+static const char* const bond_lacp_rate_table[_NETDEV_BOND_LACP_RATE_MAX] = {
+ [NETDEV_BOND_LACP_RATE_SLOW] = "slow",
+ [NETDEV_BOND_LACP_RATE_FAST] = "fast",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_lacp_rate, BondLacpRate);
+
+static const char* const bond_ad_select_table[_NETDEV_BOND_AD_SELECT_MAX] = {
+ [NETDEV_BOND_AD_SELECT_STABLE] = "stable",
+ [NETDEV_BOND_AD_SELECT_BANDWIDTH] = "bandwidth",
+ [NETDEV_BOND_AD_SELECT_COUNT] = "count",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_ad_select, BondAdSelect);
+
+static const char* const bond_fail_over_mac_table[_NETDEV_BOND_FAIL_OVER_MAC_MAX] = {
+ [NETDEV_BOND_FAIL_OVER_MAC_NONE] = "none",
+ [NETDEV_BOND_FAIL_OVER_MAC_ACTIVE] = "active",
+ [NETDEV_BOND_FAIL_OVER_MAC_FOLLOW] = "follow",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_fail_over_mac, BondFailOverMac);
+
+static const char *const bond_arp_validate_table[_NETDEV_BOND_ARP_VALIDATE_MAX] = {
+ [NETDEV_BOND_ARP_VALIDATE_NONE] = "none",
+ [NETDEV_BOND_ARP_VALIDATE_ACTIVE]= "active",
+ [NETDEV_BOND_ARP_VALIDATE_BACKUP]= "backup",
+ [NETDEV_BOND_ARP_VALIDATE_ALL]= "all",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_arp_validate, BondArpValidate);
+
+static const char *const bond_arp_all_targets_table[_NETDEV_BOND_ARP_ALL_TARGETS_MAX] = {
+ [NETDEV_BOND_ARP_ALL_TARGETS_ANY] = "any",
+ [NETDEV_BOND_ARP_ALL_TARGETS_ALL] = "all",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_arp_all_targets, BondArpAllTargets);
+
+static const char *const bond_primary_reselect_table[_NETDEV_BOND_PRIMARY_RESELECT_MAX] = {
+ [NETDEV_BOND_PRIMARY_RESELECT_ALWAYS] = "always",
+ [NETDEV_BOND_PRIMARY_RESELECT_BETTER]= "better",
+ [NETDEV_BOND_PRIMARY_RESELECT_FAILURE]= "failure",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bond_primary_reselect, BondPrimaryReselect);
diff --git a/src/shared/bond-util.h b/src/shared/bond-util.h
new file mode 100644
index 0000000..a8f9ecb
--- /dev/null
+++ b/src/shared/bond-util.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/if_bonding.h>
+
+#include "macro.h"
+
+/*
+ * Maximum number of targets supported by the kernel for a single
+ * bond netdev.
+ */
+#define NETDEV_BOND_ARP_TARGETS_MAX 16
+
+typedef enum BondMode {
+ NETDEV_BOND_MODE_BALANCE_RR = BOND_MODE_ROUNDROBIN,
+ NETDEV_BOND_MODE_ACTIVE_BACKUP = BOND_MODE_ACTIVEBACKUP,
+ NETDEV_BOND_MODE_BALANCE_XOR = BOND_MODE_XOR,
+ NETDEV_BOND_MODE_BROADCAST = BOND_MODE_BROADCAST,
+ NETDEV_BOND_MODE_802_3AD = BOND_MODE_8023AD,
+ NETDEV_BOND_MODE_BALANCE_TLB = BOND_MODE_TLB,
+ NETDEV_BOND_MODE_BALANCE_ALB = BOND_MODE_ALB,
+ _NETDEV_BOND_MODE_MAX,
+ _NETDEV_BOND_MODE_INVALID = -1
+} BondMode;
+
+typedef enum BondXmitHashPolicy {
+ NETDEV_BOND_XMIT_HASH_POLICY_LAYER2 = BOND_XMIT_POLICY_LAYER2,
+ NETDEV_BOND_XMIT_HASH_POLICY_LAYER34 = BOND_XMIT_POLICY_LAYER34,
+ NETDEV_BOND_XMIT_HASH_POLICY_LAYER23 = BOND_XMIT_POLICY_LAYER23,
+ NETDEV_BOND_XMIT_HASH_POLICY_ENCAP23 = BOND_XMIT_POLICY_ENCAP23,
+ NETDEV_BOND_XMIT_HASH_POLICY_ENCAP34 = BOND_XMIT_POLICY_ENCAP34,
+ _NETDEV_BOND_XMIT_HASH_POLICY_MAX,
+ _NETDEV_BOND_XMIT_HASH_POLICY_INVALID = -1
+} BondXmitHashPolicy;
+
+typedef enum BondLacpRate {
+ NETDEV_BOND_LACP_RATE_SLOW,
+ NETDEV_BOND_LACP_RATE_FAST,
+ _NETDEV_BOND_LACP_RATE_MAX,
+ _NETDEV_BOND_LACP_RATE_INVALID = -1,
+} BondLacpRate;
+
+typedef enum BondAdSelect {
+ NETDEV_BOND_AD_SELECT_STABLE,
+ NETDEV_BOND_AD_SELECT_BANDWIDTH,
+ NETDEV_BOND_AD_SELECT_COUNT,
+ _NETDEV_BOND_AD_SELECT_MAX,
+ _NETDEV_BOND_AD_SELECT_INVALID = -1,
+} BondAdSelect;
+
+typedef enum BondFailOverMac {
+ NETDEV_BOND_FAIL_OVER_MAC_NONE,
+ NETDEV_BOND_FAIL_OVER_MAC_ACTIVE,
+ NETDEV_BOND_FAIL_OVER_MAC_FOLLOW,
+ _NETDEV_BOND_FAIL_OVER_MAC_MAX,
+ _NETDEV_BOND_FAIL_OVER_MAC_INVALID = -1,
+} BondFailOverMac;
+
+typedef enum BondArpValidate {
+ NETDEV_BOND_ARP_VALIDATE_NONE,
+ NETDEV_BOND_ARP_VALIDATE_ACTIVE,
+ NETDEV_BOND_ARP_VALIDATE_BACKUP,
+ NETDEV_BOND_ARP_VALIDATE_ALL,
+ _NETDEV_BOND_ARP_VALIDATE_MAX,
+ _NETDEV_BOND_ARP_VALIDATE_INVALID = -1,
+} BondArpValidate;
+
+typedef enum BondArpAllTargets {
+ NETDEV_BOND_ARP_ALL_TARGETS_ANY,
+ NETDEV_BOND_ARP_ALL_TARGETS_ALL,
+ _NETDEV_BOND_ARP_ALL_TARGETS_MAX,
+ _NETDEV_BOND_ARP_ALL_TARGETS_INVALID = -1,
+} BondArpAllTargets;
+
+typedef enum BondPrimaryReselect {
+ NETDEV_BOND_PRIMARY_RESELECT_ALWAYS,
+ NETDEV_BOND_PRIMARY_RESELECT_BETTER,
+ NETDEV_BOND_PRIMARY_RESELECT_FAILURE,
+ _NETDEV_BOND_PRIMARY_RESELECT_MAX,
+ _NETDEV_BOND_PRIMARY_RESELECT_INVALID = -1,
+} BondPrimaryReselect;
+
+const char *bond_mode_to_string(BondMode d) _const_;
+BondMode bond_mode_from_string(const char *d) _pure_;
+
+const char *bond_xmit_hash_policy_to_string(BondXmitHashPolicy d) _const_;
+BondXmitHashPolicy bond_xmit_hash_policy_from_string(const char *d) _pure_;
+
+const char *bond_lacp_rate_to_string(BondLacpRate d) _const_;
+BondLacpRate bond_lacp_rate_from_string(const char *d) _pure_;
+
+const char *bond_fail_over_mac_to_string(BondFailOverMac d) _const_;
+BondFailOverMac bond_fail_over_mac_from_string(const char *d) _pure_;
+
+const char *bond_ad_select_to_string(BondAdSelect d) _const_;
+BondAdSelect bond_ad_select_from_string(const char *d) _pure_;
+
+const char *bond_arp_validate_to_string(BondArpValidate d) _const_;
+BondArpValidate bond_arp_validate_from_string(const char *d) _pure_;
+
+const char *bond_arp_all_targets_to_string(BondArpAllTargets d) _const_;
+BondArpAllTargets bond_arp_all_targets_from_string(const char *d) _pure_;
+
+const char *bond_primary_reselect_to_string(BondPrimaryReselect d) _const_;
+BondPrimaryReselect bond_primary_reselect_from_string(const char *d) _pure_;
diff --git a/src/shared/boot-timestamps.c b/src/shared/boot-timestamps.c
new file mode 100644
index 0000000..8786e89
--- /dev/null
+++ b/src/shared/boot-timestamps.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "acpi-fpdt.h"
+#include "boot-timestamps.h"
+#include "efi-loader.h"
+#include "macro.h"
+#include "time-util.h"
+
+int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader) {
+ usec_t x = 0, y = 0, a;
+ int r;
+ dual_timestamp _n;
+
+ assert(firmware);
+ assert(loader);
+
+ if (!n) {
+ dual_timestamp_get(&_n);
+ n = &_n;
+ }
+
+ r = acpi_get_boot_usec(&x, &y);
+ if (r < 0) {
+ r = efi_loader_get_boot_usec(&x, &y);
+ if (r < 0)
+ return r;
+ }
+
+ /* Let's convert this to timestamps where the firmware
+ * began/loader began working. To make this more confusing:
+ * since usec_t is unsigned and the kernel's monotonic clock
+ * begins at kernel initialization we'll actually initialize
+ * the monotonic timestamps here as negative of the actual
+ * value. */
+
+ firmware->monotonic = y;
+ loader->monotonic = y - x;
+
+ a = n->monotonic + firmware->monotonic;
+ firmware->realtime = n->realtime > a ? n->realtime - a : 0;
+
+ a = n->monotonic + loader->monotonic;
+ loader->realtime = n->realtime > a ? n->realtime - a : 0;
+
+ return 0;
+}
diff --git a/src/shared/boot-timestamps.h b/src/shared/boot-timestamps.h
new file mode 100644
index 0000000..55b7ad1
--- /dev/null
+++ b/src/shared/boot-timestamps.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <time-util.h>
+
+int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader);
diff --git a/src/shared/bootspec.c b/src/shared/bootspec.c
new file mode 100644
index 0000000..e50408a
--- /dev/null
+++ b/src/shared/bootspec.c
@@ -0,0 +1,1432 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <linux/magic.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "blkid-util.h"
+#include "bootspec.h"
+#include "conf-files.h"
+#include "def.h"
+#include "device-nodes.h"
+#include "dirent-util.h"
+#include "efivars.h"
+#include "efi-loader.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pe-header.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unaligned.h"
+#include "util.h"
+#include "virt.h"
+
+static void boot_entry_free(BootEntry *entry) {
+ assert(entry);
+
+ free(entry->id);
+ free(entry->id_old);
+ free(entry->path);
+ free(entry->root);
+ free(entry->title);
+ free(entry->show_title);
+ free(entry->version);
+ free(entry->machine_id);
+ free(entry->architecture);
+ strv_free(entry->options);
+ free(entry->kernel);
+ free(entry->efi);
+ strv_free(entry->initrd);
+ free(entry->device_tree);
+}
+
+static int boot_entry_load(
+ const char *root,
+ const char *path,
+ BootEntry *entry) {
+
+ _cleanup_(boot_entry_free) BootEntry tmp = {
+ .type = BOOT_ENTRY_CONF,
+ };
+
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned line = 1;
+ char *b, *c;
+ int r;
+
+ assert(root);
+ assert(path);
+ assert(entry);
+
+ c = endswith_no_case(path, ".conf");
+ if (!c)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid loader entry file suffix: %s", path);
+
+ b = basename(path);
+ tmp.id = strdup(b);
+ tmp.id_old = strndup(b, c - b);
+ if (!tmp.id || !tmp.id_old)
+ return log_oom();
+
+ if (!efi_loader_entry_name_valid(tmp.id))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid loader entry name: %s", tmp.id);
+
+ tmp.path = strdup(path);
+ if (!tmp.path)
+ return log_oom();
+
+ tmp.root = strdup(root);
+ if (!tmp.root)
+ return log_oom();
+
+ f = fopen(path, "re");
+ if (!f)
+ return log_error_errno(errno, "Failed to open \"%s\": %m", path);
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL, *field = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS)
+ return log_error_errno(r, "%s:%u: Line too long", path, line);
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+
+ line++;
+
+ if (IN_SET(*strstrip(buf), '#', '\0'))
+ continue;
+
+ p = buf;
+ r = extract_first_word(&p, &field, " \t", 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line);
+ continue;
+ }
+ if (r == 0) {
+ log_warning("%s:%u: Bad syntax", path, line);
+ continue;
+ }
+
+ if (streq(field, "title"))
+ r = free_and_strdup(&tmp.title, p);
+ else if (streq(field, "version"))
+ r = free_and_strdup(&tmp.version, p);
+ else if (streq(field, "machine-id"))
+ r = free_and_strdup(&tmp.machine_id, p);
+ else if (streq(field, "architecture"))
+ r = free_and_strdup(&tmp.architecture, p);
+ else if (streq(field, "options"))
+ r = strv_extend(&tmp.options, p);
+ else if (streq(field, "linux"))
+ r = free_and_strdup(&tmp.kernel, p);
+ else if (streq(field, "efi"))
+ r = free_and_strdup(&tmp.efi, p);
+ else if (streq(field, "initrd"))
+ r = strv_extend(&tmp.initrd, p);
+ else if (streq(field, "devicetree"))
+ r = free_and_strdup(&tmp.device_tree, p);
+ else {
+ log_notice("%s:%u: Unknown line \"%s\", ignoring.", path, line, field);
+ continue;
+ }
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+ }
+
+ *entry = tmp;
+ tmp = (BootEntry) {};
+ return 0;
+}
+
+void boot_config_free(BootConfig *config) {
+ size_t i;
+
+ assert(config);
+
+ free(config->default_pattern);
+ free(config->timeout);
+ free(config->editor);
+ free(config->auto_entries);
+ free(config->auto_firmware);
+ free(config->console_mode);
+ free(config->random_seed_mode);
+
+ free(config->entry_oneshot);
+ free(config->entry_default);
+
+ for (i = 0; i < config->n_entries; i++)
+ boot_entry_free(config->entries + i);
+ free(config->entries);
+}
+
+static int boot_loader_read_conf(const char *path, BootConfig *config) {
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned line = 1;
+ int r;
+
+ assert(path);
+ assert(config);
+
+ f = fopen(path, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open \"%s\": %m", path);
+ }
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL, *field = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS)
+ return log_error_errno(r, "%s:%u: Line too long", path, line);
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+
+ line++;
+
+ if (IN_SET(*strstrip(buf), '#', '\0'))
+ continue;
+
+ p = buf;
+ r = extract_first_word(&p, &field, " \t", 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line);
+ continue;
+ }
+ if (r == 0) {
+ log_warning("%s:%u: Bad syntax", path, line);
+ continue;
+ }
+
+ if (streq(field, "default"))
+ r = free_and_strdup(&config->default_pattern, p);
+ else if (streq(field, "timeout"))
+ r = free_and_strdup(&config->timeout, p);
+ else if (streq(field, "editor"))
+ r = free_and_strdup(&config->editor, p);
+ else if (streq(field, "auto-entries"))
+ r = free_and_strdup(&config->auto_entries, p);
+ else if (streq(field, "auto-firmware"))
+ r = free_and_strdup(&config->auto_firmware, p);
+ else if (streq(field, "console-mode"))
+ r = free_and_strdup(&config->console_mode, p);
+ else if (streq(field, "random-seed-mode"))
+ r = free_and_strdup(&config->random_seed_mode, p);
+ else {
+ log_notice("%s:%u: Unknown line \"%s\", ignoring.", path, line, field);
+ continue;
+ }
+ if (r < 0)
+ return log_error_errno(r, "%s:%u: Error while reading: %m", path, line);
+ }
+
+ return 1;
+}
+
+static int boot_entry_compare(const BootEntry *a, const BootEntry *b) {
+ return str_verscmp(a->id, b->id);
+}
+
+static int boot_entries_find(
+ const char *root,
+ const char *dir,
+ BootEntry **entries,
+ size_t *n_entries) {
+
+ _cleanup_strv_free_ char **files = NULL;
+ size_t n_allocated = *n_entries;
+ char **f;
+ int r;
+
+ assert(root);
+ assert(dir);
+ assert(entries);
+ assert(n_entries);
+
+ r = conf_files_list(&files, ".conf", NULL, 0, dir);
+ if (r < 0)
+ return log_error_errno(r, "Failed to list files in \"%s\": %m", dir);
+
+ STRV_FOREACH(f, files) {
+ if (!GREEDY_REALLOC0(*entries, n_allocated, *n_entries + 1))
+ return log_oom();
+
+ r = boot_entry_load(root, *f, *entries + *n_entries);
+ if (r < 0)
+ continue;
+
+ (*n_entries) ++;
+ }
+
+ return 0;
+}
+
+static int boot_entry_load_unified(
+ const char *root,
+ const char *path,
+ const char *osrelease,
+ const char *cmdline,
+ BootEntry *ret) {
+
+ _cleanup_free_ char *os_pretty_name = NULL, *os_id = NULL, *version_id = NULL, *build_id = NULL;
+ _cleanup_(boot_entry_free) BootEntry tmp = {
+ .type = BOOT_ENTRY_UNIFIED,
+ };
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *k;
+ char *b;
+ int r;
+
+ assert(root);
+ assert(path);
+ assert(osrelease);
+
+ k = path_startswith(path, root);
+ if (!k)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Path is not below root: %s", path);
+
+ f = fmemopen_unlocked((void*) osrelease, strlen(osrelease), "r");
+ if (!f)
+ return log_error_errno(errno, "Failed to open os-release buffer: %m");
+
+ r = parse_env_file(f, "os-release",
+ "PRETTY_NAME", &os_pretty_name,
+ "ID", &os_id,
+ "VERSION_ID", &version_id,
+ "BUILD_ID", &build_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse os-release data from unified kernel image %s: %m", path);
+
+ if (!os_pretty_name || !os_id || !(version_id || build_id))
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Missing fields in os-release data from unified kernel image %s, refusing.", path);
+
+ b = basename(path);
+ tmp.id = strdup(b);
+ tmp.id_old = strjoin(os_id, "-", version_id ?: build_id);
+ if (!tmp.id || !tmp.id_old)
+ return log_oom();
+
+ if (!efi_loader_entry_name_valid(tmp.id))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid loader entry name: %s", tmp.id);
+
+ tmp.path = strdup(path);
+ if (!tmp.path)
+ return log_oom();
+
+ tmp.root = strdup(root);
+ if (!tmp.root)
+ return log_oom();
+
+ tmp.kernel = strdup(skip_leading_chars(k, "/"));
+ if (!tmp.kernel)
+ return log_oom();
+
+ tmp.options = strv_new(skip_leading_chars(cmdline, WHITESPACE));
+ if (!tmp.options)
+ return log_oom();
+
+ delete_trailing_chars(tmp.options[0], WHITESPACE);
+
+ tmp.title = TAKE_PTR(os_pretty_name);
+
+ *ret = tmp;
+ tmp = (BootEntry) {};
+ return 0;
+}
+
+/* Maximum PE section we are willing to load (Note that sections we are not interested in may be larger, but
+ * the ones we do care about and we are willing to load into memory have this size limit.) */
+#define PE_SECTION_SIZE_MAX (4U*1024U*1024U)
+
+static int find_sections(
+ int fd,
+ char **ret_osrelease,
+ char **ret_cmdline) {
+
+ _cleanup_free_ struct PeSectionHeader *sections = NULL;
+ _cleanup_free_ char *osrelease = NULL, *cmdline = NULL;
+ size_t i, n_sections;
+ struct DosFileHeader dos;
+ struct PeHeader pe;
+ uint64_t start;
+ ssize_t n;
+
+ n = pread(fd, &dos, sizeof(dos), 0);
+ if (n < 0)
+ return log_error_errno(errno, "Failed read DOS header: %m");
+ if (n != sizeof(dos))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading DOS header, refusing.");
+
+ if (dos.Magic[0] != 'M' || dos.Magic[1] != 'Z')
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "DOS executable magic missing, refusing.");
+
+ start = unaligned_read_le32(&dos.ExeHeader);
+ n = pread(fd, &pe, sizeof(pe), start);
+ if (n < 0)
+ return log_error_errno(errno, "Failed to read PE header: %m");
+ if (n != sizeof(pe))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading PE header, refusing.");
+
+ if (pe.Magic[0] != 'P' || pe.Magic[1] != 'E' || pe.Magic[2] != 0 || pe.Magic[3] != 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "PE executable magic missing, refusing.");
+
+ n_sections = unaligned_read_le16(&pe.FileHeader.NumberOfSections);
+ if (n_sections > 96)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "PE header has too many sections, refusing.");
+
+ sections = new(struct PeSectionHeader, n_sections);
+ if (!sections)
+ return log_oom();
+
+ n = pread(fd, sections,
+ n_sections * sizeof(struct PeSectionHeader),
+ start + sizeof(pe) + unaligned_read_le16(&pe.FileHeader.SizeOfOptionalHeader));
+ if (n < 0)
+ return log_error_errno(errno, "Failed to read section data: %m");
+ if ((size_t) n != n_sections * sizeof(struct PeSectionHeader))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading sections, refusing.");
+
+ for (i = 0; i < n_sections; i++) {
+ _cleanup_free_ char *k = NULL;
+ uint32_t offset, size;
+ char **b;
+
+ if (strneq((char*) sections[i].Name, ".osrel", sizeof(sections[i].Name)))
+ b = &osrelease;
+ else if (strneq((char*) sections[i].Name, ".cmdline", sizeof(sections[i].Name)))
+ b = &cmdline;
+ else
+ continue;
+
+ if (*b)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Duplicate section %s, refusing.", sections[i].Name);
+
+ offset = unaligned_read_le32(&sections[i].PointerToRawData);
+ size = unaligned_read_le32(&sections[i].VirtualSize);
+
+ if (size > PE_SECTION_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Section %s too large, refusing.", sections[i].Name);
+
+ k = new(char, size+1);
+ if (!k)
+ return log_oom();
+
+ n = pread(fd, k, size, offset);
+ if (n < 0)
+ return log_error_errno(errno, "Failed to read section payload: %m");
+ if ((size_t) n != size)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading section payload, refusing:");
+
+ /* Allow one trailing NUL byte, but nothing more. */
+ if (size > 0 && memchr(k, 0, size - 1))
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Section contains embedded NUL byte: %m");
+
+ k[size] = 0;
+ *b = TAKE_PTR(k);
+ }
+
+ if (!osrelease)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Image lacks .osrel section, refusing.");
+
+ if (ret_osrelease)
+ *ret_osrelease = TAKE_PTR(osrelease);
+ if (ret_cmdline)
+ *ret_cmdline = TAKE_PTR(cmdline);
+
+ return 0;
+}
+
+static int boot_entries_find_unified(
+ const char *root,
+ const char *dir,
+ BootEntry **entries,
+ size_t *n_entries) {
+
+ _cleanup_(closedirp) DIR *d = NULL;
+ size_t n_allocated = *n_entries;
+ struct dirent *de;
+ int r;
+
+ assert(root);
+ assert(dir);
+ assert(entries);
+ assert(n_entries);
+
+ d = opendir(dir);
+ if (!d) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_error_errno(errno, "Failed to open %s: %m", dir);
+ }
+
+ FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read %s: %m", dir)) {
+ _cleanup_free_ char *j = NULL, *osrelease = NULL, *cmdline = NULL;
+ _cleanup_close_ int fd = -1;
+
+ dirent_ensure_type(d, de);
+ if (!dirent_is_file(de))
+ continue;
+
+ if (!endswith_no_case(de->d_name, ".efi"))
+ continue;
+
+ if (!GREEDY_REALLOC0(*entries, n_allocated, *n_entries + 1))
+ return log_oom();
+
+ fd = openat(dirfd(d), de->d_name, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
+ if (fd < 0) {
+ log_warning_errno(errno, "Failed to open %s/%s, ignoring: %m", dir, de->d_name);
+ continue;
+ }
+
+ r = fd_verify_regular(fd);
+ if (r < 0) {
+ log_warning_errno(r, "File %s/%s is not regular, ignoring: %m", dir, de->d_name);
+ continue;
+ }
+
+ r = find_sections(fd, &osrelease, &cmdline);
+ if (r < 0)
+ continue;
+
+ j = path_join(dir, de->d_name);
+ if (!j)
+ return log_oom();
+
+ r = boot_entry_load_unified(root, j, osrelease, cmdline, *entries + *n_entries);
+ if (r < 0)
+ continue;
+
+ (*n_entries) ++;
+ }
+
+ return 0;
+}
+
+static bool find_nonunique(BootEntry *entries, size_t n_entries, bool *arr) {
+ size_t i, j;
+ bool non_unique = false;
+
+ assert(entries || n_entries == 0);
+ assert(arr || n_entries == 0);
+
+ for (i = 0; i < n_entries; i++)
+ arr[i] = false;
+
+ for (i = 0; i < n_entries; i++)
+ for (j = 0; j < n_entries; j++)
+ if (i != j && streq(boot_entry_title(entries + i),
+ boot_entry_title(entries + j)))
+ non_unique = arr[i] = arr[j] = true;
+
+ return non_unique;
+}
+
+static int boot_entries_uniquify(BootEntry *entries, size_t n_entries) {
+ char *s;
+ size_t i;
+ int r;
+ bool arr[n_entries];
+
+ assert(entries || n_entries == 0);
+
+ /* Find _all_ non-unique titles */
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add version to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i] && entries[i].version) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].version);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add machine-id to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i] && entries[i].machine_id) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].machine_id);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ if (!find_nonunique(entries, n_entries, arr))
+ return 0;
+
+ /* Add file name to non-unique titles */
+ for (i = 0; i < n_entries; i++)
+ if (arr[i]) {
+ r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].id);
+ if (r < 0)
+ return -ENOMEM;
+
+ free_and_replace(entries[i].show_title, s);
+ }
+
+ return 0;
+}
+
+static int boot_entries_select_default(const BootConfig *config) {
+ int i;
+
+ assert(config);
+ assert(config->entries || config->n_entries == 0);
+
+ if (config->n_entries == 0) {
+ log_debug("Found no default boot entry :(");
+ return -1; /* -1 means "no default" */
+ }
+
+ if (config->entry_oneshot)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (streq(config->entry_oneshot, config->entries[i].id)) {
+ log_debug("Found default: id \"%s\" is matched by LoaderEntryOneShot",
+ config->entries[i].id);
+ return i;
+ }
+
+ if (config->entry_default)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (streq(config->entry_default, config->entries[i].id)) {
+ log_debug("Found default: id \"%s\" is matched by LoaderEntryDefault",
+ config->entries[i].id);
+ return i;
+ }
+
+ if (config->default_pattern)
+ for (i = config->n_entries - 1; i >= 0; i--)
+ if (fnmatch(config->default_pattern, config->entries[i].id, FNM_CASEFOLD) == 0) {
+ log_debug("Found default: id \"%s\" is matched by pattern \"%s\"",
+ config->entries[i].id, config->default_pattern);
+ return i;
+ }
+
+ log_debug("Found default: last entry \"%s\"", config->entries[config->n_entries - 1].id);
+ return config->n_entries - 1;
+}
+
+int boot_entries_load_config(
+ const char *esp_path,
+ const char *xbootldr_path,
+ BootConfig *config) {
+
+ const char *p;
+ int r;
+
+ assert(config);
+
+ if (esp_path) {
+ p = strjoina(esp_path, "/loader/loader.conf");
+ r = boot_loader_read_conf(p, config);
+ if (r < 0)
+ return r;
+
+ p = strjoina(esp_path, "/loader/entries");
+ r = boot_entries_find(esp_path, p, &config->entries, &config->n_entries);
+ if (r < 0)
+ return r;
+
+ p = strjoina(esp_path, "/EFI/Linux/");
+ r = boot_entries_find_unified(esp_path, p, &config->entries, &config->n_entries);
+ if (r < 0)
+ return r;
+ }
+
+ if (xbootldr_path) {
+ p = strjoina(xbootldr_path, "/loader/entries");
+ r = boot_entries_find(xbootldr_path, p, &config->entries, &config->n_entries);
+ if (r < 0)
+ return r;
+
+ p = strjoina(xbootldr_path, "/EFI/Linux/");
+ r = boot_entries_find_unified(xbootldr_path, p, &config->entries, &config->n_entries);
+ if (r < 0)
+ return r;
+ }
+
+ typesafe_qsort(config->entries, config->n_entries, boot_entry_compare);
+
+ r = boot_entries_uniquify(config->entries, config->n_entries);
+ if (r < 0)
+ return log_error_errno(r, "Failed to uniquify boot entries: %m");
+
+ if (is_efi_boot()) {
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryOneShot", &config->entry_oneshot);
+ if (r < 0 && !IN_SET(r, -ENOENT, -ENODATA)) {
+ log_warning_errno(r, "Failed to read EFI variable \"LoaderEntryOneShot\": %m");
+ if (r == -ENOMEM)
+ return r;
+ }
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryDefault", &config->entry_default);
+ if (r < 0 && !IN_SET(r, -ENOENT, -ENODATA)) {
+ log_warning_errno(r, "Failed to read EFI variable \"LoaderEntryDefault\": %m");
+ if (r == -ENOMEM)
+ return r;
+ }
+ }
+
+ config->default_entry = boot_entries_select_default(config);
+ return 0;
+}
+
+int boot_entries_load_config_auto(
+ const char *override_esp_path,
+ const char *override_xbootldr_path,
+ BootConfig *config) {
+
+ _cleanup_free_ char *esp_where = NULL, *xbootldr_where = NULL;
+ int r;
+
+ assert(config);
+
+ /* This function is similar to boot_entries_load_config(), however we automatically search for the
+ * ESP and the XBOOTLDR partition unless it is explicitly specified. Also, if the user did not pass
+ * an ESP or XBOOTLDR path directly, let's see if /run/boot-loader-entries/ exists. If so, let's
+ * read data from there, as if it was an ESP (i.e. loading both entries and loader.conf data from
+ * it). This allows other boot loaders to pass boot loader entry information to our tools if they
+ * want to. */
+
+ if (!override_esp_path && !override_xbootldr_path) {
+ if (access("/run/boot-loader-entries/", F_OK) >= 0)
+ return boot_entries_load_config("/run/boot-loader-entries/", NULL, config);
+
+ if (errno != ENOENT)
+ return log_error_errno(errno,
+ "Failed to determine whether /run/boot-loader-entries/ exists: %m");
+ }
+
+ r = find_esp_and_warn(override_esp_path, false, &esp_where, NULL, NULL, NULL, NULL);
+ if (r < 0) /* we don't log about ENOKEY here, but propagate it, leaving it to the caller to log */
+ return r;
+
+ r = find_xbootldr_and_warn(override_xbootldr_path, false, &xbootldr_where, NULL);
+ if (r < 0 && r != -ENOKEY)
+ return r; /* It's fine if the XBOOTLDR partition doesn't exist, hence we ignore ENOKEY here */
+
+ return boot_entries_load_config(esp_where, xbootldr_where, config);
+}
+
+int boot_entries_augment_from_loader(
+ BootConfig *config,
+ char **found_by_loader,
+ bool only_auto) {
+
+ static const char *const title_table[] = {
+ /* Pretty names for a few well-known automatically discovered entries. */
+ "auto-osx", "macOS",
+ "auto-windows", "Windows Boot Manager",
+ "auto-efi-shell", "EFI Shell",
+ "auto-efi-default", "EFI Default Loader",
+ "auto-reboot-to-firmware-setup", "Reboot Into Firmware Interface",
+ };
+
+ size_t n_allocated;
+ char **i;
+
+ assert(config);
+
+ /* Let's add the entries discovered by the boot loader to the end of our list, unless they are
+ * already included there. */
+
+ n_allocated = config->n_entries;
+
+ STRV_FOREACH(i, found_by_loader) {
+ _cleanup_free_ char *c = NULL, *t = NULL, *p = NULL;
+ char **a, **b;
+
+ if (boot_config_has_entry(config, *i))
+ continue;
+
+ if (only_auto && !startswith(*i, "auto-"))
+ continue;
+
+ c = strdup(*i);
+ if (!c)
+ return log_oom();
+
+ STRV_FOREACH_PAIR(a, b, (char**) title_table)
+ if (streq(*a, *i)) {
+ t = strdup(*b);
+ if (!t)
+ return log_oom();
+ break;
+ }
+
+ p = efi_variable_path(EFI_VENDOR_LOADER, "LoaderEntries");
+ if (!p)
+ return log_oom();
+
+ if (!GREEDY_REALLOC0(config->entries, n_allocated, config->n_entries + 1))
+ return log_oom();
+
+ config->entries[config->n_entries++] = (BootEntry) {
+ .type = BOOT_ENTRY_LOADER,
+ .id = TAKE_PTR(c),
+ .title = TAKE_PTR(t),
+ .path = TAKE_PTR(p),
+ };
+ }
+
+ return 0;
+}
+
+/********************************************************************************/
+
+static int verify_esp_blkid(
+ dev_t devid,
+ bool searching,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ sd_id128_t uuid = SD_ID128_NULL;
+ uint64_t pstart = 0, psize = 0;
+ uint32_t part = 0;
+
+#if HAVE_BLKID
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ _cleanup_free_ char *node = NULL;
+ const char *v;
+ int r;
+
+ r = device_path_make_major_minor(S_IFBLK, devid, &node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor device path: %m");
+
+ errno = 0;
+ b = blkid_new_probe_from_filename(node);
+ if (!b)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(ENOMEM), "Failed to open file system \"%s\": %m", node);
+
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
+ blkid_probe_enable_partitions(b, 1);
+ blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == -2)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" is ambiguous.", node);
+ else if (r == 1)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" does not contain a label.", node);
+ else if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe file system \"%s\": %m", node);
+
+ r = blkid_probe_lookup_value(b, "TYPE", &v, NULL);
+ if (r != 0)
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "No filesystem found on \"%s\": %m", node);
+ if (!streq(v, "vfat"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not FAT.", node);
+
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_SCHEME", &v, NULL);
+ if (r != 0)
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not located on a partitioned block device.", node);
+ if (!streq(v, "gpt"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not on a GPT partition table.", node);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: EIO, "Failed to probe partition type UUID of \"%s\": %m", node);
+ if (!streq(v, "c12a7328-f81f-11d2-ba4b-00a0c93ec93b"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" has wrong type for an EFI System Partition (ESP).", node);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition entry UUID of \"%s\": %m", node);
+ r = sd_id128_from_string(v, &uuid);
+ if (r < 0)
+ return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_NUMBER", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition number of \"%s\": %m", node);
+ r = safe_atou32(v, &part);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_NUMBER field.");
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_OFFSET", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition offset of \"%s\": %m", node);
+ r = safe_atou64(v, &pstart);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_OFFSET field.");
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_SIZE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition size of \"%s\": %m", node);
+ r = safe_atou64(v, &psize);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_SIZE field.");
+#endif
+
+ if (ret_part)
+ *ret_part = part;
+ if (ret_pstart)
+ *ret_pstart = pstart;
+ if (ret_psize)
+ *ret_psize = psize;
+ if (ret_uuid)
+ *ret_uuid = uuid;
+
+ return 0;
+}
+
+static int verify_esp_udev(
+ dev_t devid,
+ bool searching,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_free_ char *node = NULL;
+ sd_id128_t uuid = SD_ID128_NULL;
+ uint64_t pstart = 0, psize = 0;
+ uint32_t part = 0;
+ const char *v;
+ int r;
+
+ r = device_path_make_major_minor(S_IFBLK, devid, &node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor device path: %m");
+
+ r = sd_device_new_from_devnum(&d, 'b', devid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from device number: %m");
+
+ r = sd_device_get_property_value(d, "ID_FS_TYPE", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ if (!streq(v, "vfat"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not FAT.", node );
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_SCHEME", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ if (!streq(v, "gpt"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not on a GPT partition table.", node);
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ if (!streq(v, "c12a7328-f81f-11d2-ba4b-00a0c93ec93b"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" has wrong type for an EFI System Partition (ESP).", node);
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_UUID", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ r = sd_id128_from_string(v, &uuid);
+ if (r < 0)
+ return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v);
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_NUMBER", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ r = safe_atou32(v, &part);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_NUMBER field.");
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_OFFSET", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ r = safe_atou64(v, &pstart);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_OFFSET field.");
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_SIZE", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ r = safe_atou64(v, &psize);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PART_ENTRY_SIZE field.");
+
+ if (ret_part)
+ *ret_part = part;
+ if (ret_pstart)
+ *ret_pstart = pstart;
+ if (ret_psize)
+ *ret_psize = psize;
+ if (ret_uuid)
+ *ret_uuid = uuid;
+
+ return 0;
+}
+
+static int verify_fsroot_dir(
+ const char *path,
+ bool searching,
+ bool unprivileged_mode,
+ dev_t *ret_dev) {
+
+ struct stat st, st2;
+ const char *t2, *trigger;
+ int r;
+
+ assert(path);
+ assert(ret_dev);
+
+ /* So, the ESP and XBOOTLDR partition are commonly located on an autofs mount. stat() on the
+ * directory won't trigger it, if it is not mounted yet. Let's hence explicitly trigger it here,
+ * before stat()ing */
+ trigger = strjoina(path, "/trigger"); /* Filename doesn't matter... */
+ (void) access(trigger, F_OK);
+
+ if (stat(path, &st) < 0)
+ return log_full_errno((searching && errno == ENOENT) ||
+ (unprivileged_mode && errno == EACCES) ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to determine block device node of \"%s\": %m", path);
+
+ if (major(st.st_dev) == 0)
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "Block device node of \"%s\" is invalid.", path);
+
+ t2 = strjoina(path, "/..");
+ if (stat(t2, &st2) < 0) {
+ if (errno != EACCES)
+ r = -errno;
+ else {
+ _cleanup_free_ char *parent = NULL;
+
+ /* If going via ".." didn't work due to EACCESS, then let's determine the parent path
+ * directly instead. It's not as good, due to symlinks and such, but we can't do
+ * anything better here. */
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return log_oom();
+
+ if (stat(parent, &st2) < 0)
+ r = -errno;
+ else
+ r = 0;
+ }
+
+ if (r < 0)
+ return log_full_errno(unprivileged_mode && r == -EACCES ? LOG_DEBUG : LOG_ERR, r,
+ "Failed to determine block device node of parent of \"%s\": %m", path);
+ }
+
+ if (st.st_dev == st2.st_dev)
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "Directory \"%s\" is not the root of the file system.", path);
+
+ if (ret_dev)
+ *ret_dev = st.st_dev;
+
+ return 0;
+}
+
+static int verify_esp(
+ const char *p,
+ bool searching,
+ bool unprivileged_mode,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ bool relax_checks;
+ dev_t devid;
+ int r;
+
+ assert(p);
+
+ /* This logs about all errors, except:
+ *
+ * -ENOENT → if 'searching' is set, and the dir doesn't exist
+ * -EADDRNOTAVAIL → if 'searching' is set, and the dir doesn't look like an ESP
+ * -EACESS → if 'unprivileged_mode' is set, and we have trouble accessing the thing
+ */
+
+ relax_checks = getenv_bool("SYSTEMD_RELAX_ESP_CHECKS") > 0;
+
+ /* Non-root user can only check the status, so if an error occurred in the following, it does not cause any
+ * issues. Let's also, silence the error messages. */
+
+ if (!relax_checks) {
+ struct statfs sfs;
+
+ if (statfs(p, &sfs) < 0)
+ /* If we are searching for the mount point, don't generate a log message if we can't find the path */
+ return log_full_errno((searching && errno == ENOENT) ||
+ (unprivileged_mode && errno == EACCES) ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to check file system type of \"%s\": %m", p);
+
+ if (!F_TYPE_EQUAL(sfs.f_type, MSDOS_SUPER_MAGIC))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ SYNTHETIC_ERRNO(searching ? EADDRNOTAVAIL : ENODEV),
+ "File system \"%s\" is not a FAT EFI System Partition (ESP) file system.", p);
+ }
+
+ r = verify_fsroot_dir(p, searching, unprivileged_mode, &devid);
+ if (r < 0)
+ return r;
+
+ /* In a container we don't have access to block devices, skip this part of the verification, we trust
+ * the container manager set everything up correctly on its own. */
+ if (detect_container() > 0 || relax_checks)
+ goto finish;
+
+ /* If we are unprivileged we ask udev for the metadata about the partition. If we are privileged we
+ * use blkid instead. Why? Because this code is called from 'bootctl' which is pretty much an
+ * emergency recovery tool that should also work when udev isn't up (i.e. from the emergency shell),
+ * however blkid can't work if we have no privileges to access block devices directly, which is why
+ * we use udev in that case. */
+ if (unprivileged_mode)
+ return verify_esp_udev(devid, searching, ret_part, ret_pstart, ret_psize, ret_uuid);
+ else
+ return verify_esp_blkid(devid, searching, ret_part, ret_pstart, ret_psize, ret_uuid);
+
+finish:
+ if (ret_part)
+ *ret_part = 0;
+ if (ret_pstart)
+ *ret_pstart = 0;
+ if (ret_psize)
+ *ret_psize = 0;
+ if (ret_uuid)
+ *ret_uuid = SD_ID128_NULL;
+
+ return 0;
+}
+
+int find_esp_and_warn(
+ const char *path,
+ bool unprivileged_mode,
+ char **ret_path,
+ uint32_t *ret_part,
+ uint64_t *ret_pstart,
+ uint64_t *ret_psize,
+ sd_id128_t *ret_uuid) {
+
+ int r;
+
+ /* This logs about all errors except:
+ *
+ * -ENOKEY → when we can't find the partition
+ * -EACCESS → when unprivileged_mode is true, and we can't access something
+ */
+
+ if (path) {
+ r = verify_esp(path, false, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid);
+ if (r < 0)
+ return r;
+
+ goto found;
+ }
+
+ path = getenv("SYSTEMD_ESP_PATH");
+ if (path) {
+ if (!path_is_valid(path) || !path_is_absolute(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "$SYSTEMD_ESP_PATH does not refer to absolute path, refusing to use it: %s",
+ path);
+
+ /* Note: when the user explicitly configured things with an env var we won't validate the mount
+ * point. After all we want this to be useful for testing. */
+ goto found;
+ }
+
+ FOREACH_STRING(path, "/efi", "/boot", "/boot/efi") {
+
+ r = verify_esp(path, true, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid);
+ if (r >= 0)
+ goto found;
+ if (!IN_SET(r, -ENOENT, -EADDRNOTAVAIL)) /* This one is not it */
+ return r;
+ }
+
+ /* No logging here */
+ return -ENOKEY;
+
+found:
+ if (ret_path) {
+ char *c;
+
+ c = strdup(path);
+ if (!c)
+ return log_oom();
+
+ *ret_path = c;
+ }
+
+ return 0;
+}
+
+static int verify_xbootldr_blkid(
+ dev_t devid,
+ bool searching,
+ sd_id128_t *ret_uuid) {
+
+ sd_id128_t uuid = SD_ID128_NULL;
+
+#if HAVE_BLKID
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ _cleanup_free_ char *node = NULL;
+ const char *v;
+ int r;
+
+ r = device_path_make_major_minor(S_IFBLK, devid, &node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor device path: %m");
+ errno = 0;
+ b = blkid_new_probe_from_filename(node);
+ if (!b)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(ENOMEM), "Failed to open file system \"%s\": %m", node);
+
+ blkid_probe_enable_partitions(b, 1);
+ blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == -2)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" is ambiguous.", node);
+ else if (r == 1)
+ return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "File system \"%s\" does not contain a label.", node);
+ else if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe file system \"%s\": %m", node);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_SCHEME", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition scheme of \"%s\": %m", node);
+ if (streq(v, "gpt")) {
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition type UUID of \"%s\": %m", node);
+ if (!streq(v, "bc13c2ff-59e6-4262-a352-b275fd6f7172"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" has wrong type for extended boot loader partition.", node);
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition entry UUID of \"%s\": %m", node);
+ r = sd_id128_from_string(v, &uuid);
+ if (r < 0)
+ return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v);
+
+ } else if (streq(v, "dos")) {
+
+ errno = 0;
+ r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL);
+ if (r != 0)
+ return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe partition type UUID of \"%s\": %m", node);
+ if (!streq(v, "0xea"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" has wrong type for extended boot loader partition.", node);
+
+ } else
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" is not on a GPT or DOS partition table.", node);
+#endif
+
+ if (ret_uuid)
+ *ret_uuid = uuid;
+
+ return 0;
+}
+
+static int verify_xbootldr_udev(
+ dev_t devid,
+ bool searching,
+ sd_id128_t *ret_uuid) {
+
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_free_ char *node = NULL;
+ sd_id128_t uuid = SD_ID128_NULL;
+ const char *v;
+ int r;
+
+ r = device_path_make_major_minor(S_IFBLK, devid, &node);
+ if (r < 0)
+ return log_error_errno(r, "Failed to format major/minor device path: %m");
+
+ r = sd_device_new_from_devnum(&d, 'b', devid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device from device number: %m");
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_SCHEME", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+
+ if (streq(v, "gpt")) {
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ if (!streq(v, "bc13c2ff-59e6-4262-a352-b275fd6f7172"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" has wrong type for extended boot loader partition.", node);
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_UUID", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ r = sd_id128_from_string(v, &uuid);
+ if (r < 0)
+ return log_error_errno(r, "Partition \"%s\" has invalid UUID \"%s\".", node, v);
+
+ } else if (streq(v, "dos")) {
+
+ r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device property: %m");
+ if (!streq(v, "0xea"))
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" has wrong type for extended boot loader partition.", node);
+ } else
+ return log_full_errno(searching ? LOG_DEBUG : LOG_ERR,
+ searching ? SYNTHETIC_ERRNO(EADDRNOTAVAIL) : SYNTHETIC_ERRNO(ENODEV),
+ "File system \"%s\" is not on a GPT or DOS partition table.", node);
+
+ if (ret_uuid)
+ *ret_uuid = uuid;
+
+ return 0;
+}
+
+static int verify_xbootldr(
+ const char *p,
+ bool searching,
+ bool unprivileged_mode,
+ sd_id128_t *ret_uuid) {
+
+ bool relax_checks;
+ dev_t devid;
+ int r;
+
+ assert(p);
+
+ relax_checks = getenv_bool("SYSTEMD_RELAX_XBOOTLDR_CHECKS") > 0;
+
+ r = verify_fsroot_dir(p, searching, unprivileged_mode, &devid);
+ if (r < 0)
+ return r;
+
+ if (detect_container() > 0 || relax_checks)
+ goto finish;
+
+ if (unprivileged_mode)
+ return verify_xbootldr_udev(devid, searching, ret_uuid);
+ else
+ return verify_xbootldr_blkid(devid, searching, ret_uuid);
+
+finish:
+ if (ret_uuid)
+ *ret_uuid = SD_ID128_NULL;
+
+ return 0;
+}
+
+int find_xbootldr_and_warn(
+ const char *path,
+ bool unprivileged_mode,
+ char **ret_path,
+ sd_id128_t *ret_uuid) {
+
+ int r;
+
+ /* Similar to find_esp_and_warn(), but finds the XBOOTLDR partition. Returns the same errors. */
+
+ if (path) {
+ r = verify_xbootldr(path, false, unprivileged_mode, ret_uuid);
+ if (r < 0)
+ return r;
+
+ goto found;
+ }
+
+ path = getenv("SYSTEMD_XBOOTLDR_PATH");
+ if (path) {
+ if (!path_is_valid(path) || !path_is_absolute(path))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "$SYSTEMD_XBOOTLDR_PATH does not refer to absolute path, refusing to use it: %s",
+ path);
+
+ goto found;
+ }
+
+ r = verify_xbootldr("/boot", true, unprivileged_mode, ret_uuid);
+ if (r >= 0) {
+ path = "/boot";
+ goto found;
+ }
+ if (!IN_SET(r, -ENOENT, -EADDRNOTAVAIL)) /* This one is not it */
+ return r;
+
+ return -ENOKEY;
+
+found:
+ if (ret_path) {
+ char *c;
+
+ c = strdup(path);
+ if (!c)
+ return log_oom();
+
+ *ret_path = c;
+ }
+
+ return 0;
+}
diff --git a/src/shared/bootspec.h b/src/shared/bootspec.h
new file mode 100644
index 0000000..1557bd0
--- /dev/null
+++ b/src/shared/bootspec.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+#include "string-util.h"
+
+typedef enum BootEntryType {
+ BOOT_ENTRY_CONF, /* Type #1 entries: *.conf files */
+ BOOT_ENTRY_UNIFIED, /* Type #2 entries: *.efi files */
+ BOOT_ENTRY_LOADER, /* Additional entries augmented from LoaderEntries EFI var */
+ _BOOT_ENTRY_MAX,
+ _BOOT_ENTRY_INVALID = -1,
+} BootEntryType;
+
+typedef struct BootEntry {
+ BootEntryType type;
+ char *id; /* This is the file basename without extension */
+ char *id_old; /* Old-style ID, for deduplication purposes. */
+ char *path; /* This is the full path to the drop-in file */
+ char *root; /* The root path in which the drop-in was found, i.e. to which 'kernel', 'efi' and 'initrd' are relative */
+ char *title;
+ char *show_title;
+ char *version;
+ char *machine_id;
+ char *architecture;
+ char **options;
+ char *kernel; /* linux is #defined to 1, yikes! */
+ char *efi;
+ char **initrd;
+ char *device_tree;
+} BootEntry;
+
+typedef struct BootConfig {
+ char *default_pattern;
+ char *timeout;
+ char *editor;
+ char *auto_entries;
+ char *auto_firmware;
+ char *console_mode;
+ char *random_seed_mode;
+
+ char *entry_oneshot;
+ char *entry_default;
+
+ BootEntry *entries;
+ size_t n_entries;
+ ssize_t default_entry;
+} BootConfig;
+
+static inline bool boot_config_has_entry(BootConfig *config, const char *id) {
+ size_t j;
+
+ for (j = 0; j < config->n_entries; j++) {
+ const char* entry_id_old = config->entries[j].id_old;
+ if (streq(config->entries[j].id, id) ||
+ (entry_id_old && streq(entry_id_old, id)))
+ return true;
+ }
+
+ return false;
+}
+
+static inline BootEntry* boot_config_default_entry(BootConfig *config) {
+ if (config->default_entry < 0)
+ return NULL;
+
+ return config->entries + config->default_entry;
+}
+
+void boot_config_free(BootConfig *config);
+int boot_entries_load_config(const char *esp_path, const char *xbootldr_path, BootConfig *config);
+int boot_entries_load_config_auto(const char *override_esp_path, const char *override_xbootldr_path, BootConfig *config);
+int boot_entries_augment_from_loader(BootConfig *config, char **list, bool only_auto);
+
+static inline const char* boot_entry_title(const BootEntry *entry) {
+ return entry->show_title ?: entry->title ?: entry->id;
+}
+
+int find_esp_and_warn(const char *path, bool unprivileged_mode, char **ret_path, uint32_t *ret_part, uint64_t *ret_pstart, uint64_t *ret_psize, sd_id128_t *ret_uuid);
+int find_xbootldr_and_warn(const char *path, bool unprivileged_mode, char **ret_path,sd_id128_t *ret_uuid);
diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c
new file mode 100644
index 0000000..1023914
--- /dev/null
+++ b/src/shared/bpf-program.c
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "path-util.h"
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+
+ p = new0(BPFProgram, 1);
+ if (!p)
+ return -ENOMEM;
+
+ p->n_ref = 1;
+ p->prog_type = prog_type;
+ p->kernel_fd = -1;
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static BPFProgram *bpf_program_free(BPFProgram *p) {
+ assert(p);
+
+ /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
+ * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
+ * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
+ * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
+ * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
+ * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
+ * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
+ * whenever we close the BPF fd. */
+ (void) bpf_program_cgroup_detach(p);
+
+ safe_close(p->kernel_fd);
+ free(p->instructions);
+ free(p->attached_path);
+
+ return mfree(p);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
+
+ assert(p);
+
+ if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
+ return -EBUSY;
+
+ if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
+ return -ENOMEM;
+
+ memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
+ p->n_instructions += count;
+
+ return 0;
+}
+
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
+ union bpf_attr attr;
+
+ assert(p);
+
+ if (p->kernel_fd >= 0) { /* make this idempotent */
+ memzero(log_buf, log_size);
+ return 0;
+ }
+
+ // FIXME: Clang doesn't 0-pad with structured initialization, causing
+ // the kernel to reject the bpf_attr as invalid. See:
+ // https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65
+ // Ideally it should behave like GCC, so that we can remove these workarounds.
+ zero(attr);
+ attr.prog_type = p->prog_type;
+ attr.insns = PTR_TO_UINT64(p->instructions);
+ attr.insn_cnt = p->n_instructions;
+ attr.license = PTR_TO_UINT64("GPL");
+ attr.log_buf = PTR_TO_UINT64(log_buf);
+ attr.log_level = !!log_buf;
+ attr.log_size = log_size;
+
+ p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (p->kernel_fd < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path) {
+ union bpf_attr attr;
+
+ assert(p);
+
+ if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */
+ return -EBUSY;
+
+ zero(attr);
+ attr.pathname = PTR_TO_UINT64(path);
+
+ p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+ if (p->kernel_fd < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
+ _cleanup_free_ char *copy = NULL;
+ _cleanup_close_ int fd = -1;
+ union bpf_attr attr;
+ int r;
+
+ assert(p);
+ assert(type >= 0);
+ assert(path);
+
+ if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
+ return -EINVAL;
+
+ /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
+ * refuse this early. */
+ if (p->attached_path) {
+ if (!path_equal(p->attached_path, path))
+ return -EBUSY;
+ if (p->attached_type != type)
+ return -EBUSY;
+ if (p->attached_flags != flags)
+ return -EBUSY;
+
+ /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
+ * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
+ * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
+ * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
+ * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
+ * would remain in effect. */
+ if (flags != BPF_F_ALLOW_OVERRIDE)
+ return 0;
+ }
+
+ /* Ensure we have a kernel object for this. */
+ r = bpf_program_load_kernel(p, NULL, 0);
+ if (r < 0)
+ return r;
+
+ copy = strdup(path);
+ if (!copy)
+ return -ENOMEM;
+
+ fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ zero(attr);
+ attr.attach_type = type;
+ attr.target_fd = fd;
+ attr.attach_bpf_fd = p->kernel_fd;
+ attr.attach_flags = flags;
+
+ if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ free_and_replace(p->attached_path, copy);
+ p->attached_type = type;
+ p->attached_flags = flags;
+
+ return 0;
+}
+
+int bpf_program_cgroup_detach(BPFProgram *p) {
+ _cleanup_close_ int fd = -1;
+
+ assert(p);
+
+ if (!p->attached_path)
+ return -EUNATCH;
+
+ fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
+ * implicitly by the removal, hence don't complain */
+
+ } else {
+ union bpf_attr attr;
+
+ zero(attr);
+ attr.attach_type = p->attached_type;
+ attr.target_fd = fd;
+ attr.attach_bpf_fd = p->kernel_fd;
+
+ if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
+ return -errno;
+ }
+
+ p->attached_path = mfree(p->attached_path);
+
+ return 0;
+}
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
+ union bpf_attr attr;
+ int fd;
+
+ zero(attr);
+ attr.map_type = type;
+ attr.key_size = key_size;
+ attr.value_size = value_size;
+ attr.max_entries = max_entries;
+ attr.map_flags = flags;
+
+ fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
+int bpf_map_update_element(int fd, const void *key, void *value) {
+ union bpf_attr attr;
+
+ zero(attr);
+ attr.map_fd = fd;
+ attr.key = PTR_TO_UINT64(key);
+ attr.value = PTR_TO_UINT64(value);
+
+ if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_map_lookup_element(int fd, const void *key, void *value) {
+ union bpf_attr attr;
+
+ zero(attr);
+ attr.map_fd = fd;
+ attr.key = PTR_TO_UINT64(key);
+ attr.value = PTR_TO_UINT64(value);
+
+ if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h
new file mode 100644
index 0000000..eef77f9
--- /dev/null
+++ b/src/shared/bpf-program.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <sys/syscall.h>
+
+#include "list.h"
+#include "macro.h"
+
+typedef struct BPFProgram BPFProgram;
+
+struct BPFProgram {
+ unsigned n_ref;
+
+ int kernel_fd;
+ uint32_t prog_type;
+
+ size_t n_instructions;
+ size_t allocated;
+ struct bpf_insn *instructions;
+
+ char *attached_path;
+ int attached_type;
+ uint32_t attached_flags;
+};
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret);
+BPFProgram *bpf_program_unref(BPFProgram *p);
+BPFProgram *bpf_program_ref(BPFProgram *p);
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
+int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path);
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
+int bpf_program_cgroup_detach(BPFProgram *p);
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
+int bpf_map_update_element(int fd, const void *key, void *value);
+int bpf_map_lookup_element(int fd, const void *key, void *value);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref);
diff --git a/src/shared/bridge-util.c b/src/shared/bridge-util.c
new file mode 100644
index 0000000..e1a8bcb
--- /dev/null
+++ b/src/shared/bridge-util.c
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bridge-util.h"
+#include "string-table.h"
+
+static const char* const bridge_state_table[_NETDEV_BRIDGE_STATE_MAX] = {
+ [NETDEV_BRIDGE_STATE_DISABLED] = "disabled",
+ [NETDEV_BRIDGE_STATE_LISTENING] = "listening",
+ [NETDEV_BRIDGE_STATE_LEARNING] = "learning",
+ [NETDEV_BRIDGE_STATE_FORWARDING] = "forwarding",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(bridge_state, BridgeState);
diff --git a/src/shared/bridge-util.h b/src/shared/bridge-util.h
new file mode 100644
index 0000000..c9b02d8
--- /dev/null
+++ b/src/shared/bridge-util.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/if_bridge.h>
+
+#include "conf-parser.h"
+
+typedef enum BridgeState {
+ NETDEV_BRIDGE_STATE_DISABLED = BR_STATE_DISABLED,
+ NETDEV_BRIDGE_STATE_LISTENING = BR_STATE_LISTENING,
+ NETDEV_BRIDGE_STATE_LEARNING = BR_STATE_LEARNING,
+ NETDEV_BRIDGE_STATE_FORWARDING = BR_STATE_FORWARDING,
+ NETDEV_BRIDGE_STATE_BLOCKING = BR_STATE_BLOCKING,
+ _NETDEV_BRIDGE_STATE_MAX,
+ _NETDEV_BRIDGE_STATE_INVALID = -1,
+} BridgeState;
+
+const char *bridge_state_to_string(BridgeState d) _const_;
+BridgeState bridge_state_from_string(const char *d) _pure_;
diff --git a/src/shared/bus-get-properties.c b/src/shared/bus-get-properties.c
new file mode 100644
index 0000000..32f68d5
--- /dev/null
+++ b/src/shared/bus-get-properties.c
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-get-properties.h"
+#include "rlimit-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+int bus_property_get_bool(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int b = *(bool*) userdata;
+
+ return sd_bus_message_append_basic(reply, 'b', &b);
+}
+
+int bus_property_set_bool(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int b, r;
+
+ r = sd_bus_message_read(value, "b", &b);
+ if (r < 0)
+ return r;
+
+ *(bool*) userdata = b;
+ return 0;
+}
+
+int bus_property_get_id128(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ sd_id128_t *id = userdata;
+
+ if (sd_id128_is_null(*id)) /* Add an empty array if the ID is zero */
+ return sd_bus_message_append(reply, "ay", 0);
+ else
+ return sd_bus_message_append_array(reply, 'y', id->bytes, 16);
+}
+
+int bus_property_get_percent(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ char pstr[DECIMAL_STR_MAX(int) + 2];
+ int p = *(int*) userdata;
+
+ xsprintf(pstr, "%d%%", p);
+
+ return sd_bus_message_append_basic(reply, 's', pstr);
+}
+
+#if __SIZEOF_SIZE_T__ != 8
+int bus_property_get_size(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t sz = *(size_t*) userdata;
+
+ return sd_bus_message_append_basic(reply, 't', &sz);
+}
+#endif
+
+#if __SIZEOF_LONG__ != 8
+int bus_property_get_long(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ int64_t l = *(long*) userdata;
+
+ return sd_bus_message_append_basic(reply, 'x', &l);
+}
+
+int bus_property_get_ulong(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ uint64_t ul = *(unsigned long*) userdata;
+
+ return sd_bus_message_append_basic(reply, 't', &ul);
+}
+#endif
+
+int bus_property_get_rlimit(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const char *is_soft;
+ struct rlimit *rl;
+ uint64_t u;
+ rlim_t x;
+
+ assert(bus);
+ assert(reply);
+ assert(userdata);
+
+ is_soft = endswith(property, "Soft");
+
+ rl = *(struct rlimit**) userdata;
+ if (rl)
+ x = is_soft ? rl->rlim_cur : rl->rlim_max;
+ else {
+ struct rlimit buf = {};
+ const char *s, *p;
+ int z;
+
+ /* Chop off "Soft" suffix */
+ s = is_soft ? strndupa(property, is_soft - property) : property;
+
+ /* Skip over any prefix, such as "Default" */
+ assert_se(p = strstr(s, "Limit"));
+
+ z = rlimit_from_string(p + 5);
+ assert(z >= 0);
+
+ (void) getrlimit(z, &buf);
+ x = is_soft ? buf.rlim_cur : buf.rlim_max;
+ }
+
+ /* rlim_t might have different sizes, let's map RLIMIT_INFINITY to (uint64_t) -1, so that it is the same on all
+ * archs */
+ u = x == RLIM_INFINITY ? (uint64_t) -1 : (uint64_t) x;
+
+ return sd_bus_message_append(reply, "t", u);
+}
diff --git a/src/shared/bus-get-properties.h b/src/shared/bus-get-properties.h
new file mode 100644
index 0000000..9832c0d
--- /dev/null
+++ b/src/shared/bus-get-properties.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "macro.h"
+
+int bus_property_get_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_set_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error);
+int bus_property_get_id128(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_get_percent(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+#define bus_property_get_usec ((sd_bus_property_get_t) NULL)
+#define bus_property_set_usec ((sd_bus_property_set_t) NULL)
+
+assert_cc(sizeof(int) == sizeof(int32_t));
+#define bus_property_get_int ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(unsigned) == sizeof(uint32_t));
+#define bus_property_get_unsigned ((sd_bus_property_get_t) NULL)
+
+/* On 64bit machines we can use the default serializer for size_t and
+ * friends, otherwise we need to cast this manually */
+#if __SIZEOF_SIZE_T__ == 8
+#define bus_property_get_size ((sd_bus_property_get_t) NULL)
+#else
+int bus_property_get_size(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+#endif
+
+#if __SIZEOF_LONG__ == 8
+#define bus_property_get_long ((sd_bus_property_get_t) NULL)
+#define bus_property_get_ulong ((sd_bus_property_get_t) NULL)
+#else
+int bus_property_get_long(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_get_ulong(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+#endif
+
+/* uid_t and friends on Linux 32 bit. This means we can just use the
+ * default serializer for 32bit unsigned, for serializing it, and map
+ * it to NULL here */
+assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+#define bus_property_get_uid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(gid_t) == sizeof(uint32_t));
+#define bus_property_get_gid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(pid_t) == sizeof(uint32_t));
+#define bus_property_get_pid ((sd_bus_property_get_t) NULL)
+
+assert_cc(sizeof(mode_t) == sizeof(uint32_t));
+#define bus_property_get_mode ((sd_bus_property_get_t) NULL)
+
+int bus_property_get_rlimit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+#define BUS_DEFINE_PROPERTY_GET_GLOBAL(function, bus_type, val) \
+ int function(sd_bus *bus, \
+ const char *path, \
+ const char *interface, \
+ const char *property, \
+ sd_bus_message *reply, \
+ void *userdata, \
+ sd_bus_error *error) { \
+ \
+ assert(bus); \
+ assert(reply); \
+ \
+ return sd_bus_message_append(reply, bus_type, val); \
+ }
+
+#define BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, get2) \
+ int function(sd_bus *bus, \
+ const char *path, \
+ const char *interface, \
+ const char *property, \
+ sd_bus_message *reply, \
+ void *userdata, \
+ sd_bus_error *error) { \
+ \
+ data_type *data = userdata; \
+ \
+ assert(bus); \
+ assert(reply); \
+ assert(data); \
+ \
+ return sd_bus_message_append(reply, bus_type, \
+ get2(get1(data))); \
+ }
+
+#define ident(x) (x)
+#define BUS_DEFINE_PROPERTY_GET(function, bus_type, data_type, get1) \
+ BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, ident)
+
+#define ref(x) (*(x))
+#define BUS_DEFINE_PROPERTY_GET_REF(function, bus_type, data_type, get) \
+ BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, ref, get)
+
+#define BUS_DEFINE_PROPERTY_GET_ENUM(function, name, type) \
+ BUS_DEFINE_PROPERTY_GET_REF(function, "s", type, name##_to_string)
+
+#define BUS_PROPERTY_DUAL_TIMESTAMP(name, offset, flags) \
+ SD_BUS_PROPERTY(name, "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, realtime), (flags)), \
+ SD_BUS_PROPERTY(name "Monotonic", "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, monotonic), (flags))
diff --git a/src/shared/bus-locator.c b/src/shared/bus-locator.c
new file mode 100644
index 0000000..3754d1d
--- /dev/null
+++ b/src/shared/bus-locator.c
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-locator.h"
+#include "macro.h"
+
+const BusLocator* const bus_home_mgr = &(BusLocator){
+ .destination = "org.freedesktop.home1",
+ .path = "/org/freedesktop/home1",
+ .interface = "org.freedesktop.home1.Manager",
+};
+
+const BusLocator* const bus_import_mgr = &(BusLocator){
+ .destination ="org.freedesktop.import1",
+ .path = "/org/freedesktop/import1",
+ .interface = "org.freedesktop.import1.Manager"
+};
+
+const BusLocator* const bus_locale = &(BusLocator){
+ .destination = "org.freedesktop.locale1",
+ .path = "/org/freedesktop/locale1",
+ .interface = "org.freedesktop.locale1"
+};
+
+const BusLocator* const bus_login_mgr = &(BusLocator){
+ .destination = "org.freedesktop.login1",
+ .path = "/org/freedesktop/login1",
+ .interface = "org.freedesktop.login1.Manager"
+};
+
+const BusLocator* const bus_machine_mgr = &(BusLocator){
+ .destination ="org.freedesktop.machine1",
+ .path = "/org/freedesktop/machine1",
+ .interface = "org.freedesktop.machine1.Manager"
+};
+
+const BusLocator* const bus_network_mgr = &(BusLocator){
+ .destination = "org.freedesktop.network1",
+ .path = "/org/freedesktop/network1",
+ .interface = "org.freedesktop.network1.Manager"
+};
+
+const BusLocator* const bus_portable_mgr = &(BusLocator){
+ .destination = "org.freedesktop.portable1",
+ .path = "/org/freedesktop/portable1",
+ .interface = "org.freedesktop.portable1.Manager"
+};
+
+const BusLocator* const bus_resolve_mgr = &(BusLocator){
+ .destination = "org.freedesktop.resolve1",
+ .path = "/org/freedesktop/resolve1",
+ .interface = "org.freedesktop.resolve1.Manager"
+};
+
+const BusLocator* const bus_systemd_mgr = &(BusLocator){
+ .destination = "org.freedesktop.systemd1",
+ .path = "/org/freedesktop/systemd1",
+ .interface = "org.freedesktop.systemd1.Manager"
+};
+
+const BusLocator* const bus_timedate = &(BusLocator){
+ .destination = "org.freedesktop.timedate1",
+ .path = "/org/freedesktop/timedate1",
+ .interface = "org.freedesktop.timedate1"
+};
+
+/* Shorthand flavors of the sd-bus convenience helpers with destination,path,interface strings encapsulated
+ * within a single struct. */
+int bus_call_method_async(
+ sd_bus *bus,
+ sd_bus_slot **slot,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ assert(locator);
+
+ va_start(ap, types);
+ r = sd_bus_call_method_asyncv(bus, slot, locator->destination, locator->path, locator->interface, member, callback, userdata, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int bus_call_method(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *types, ...) {
+
+ va_list ap;
+ int r;
+
+ assert(locator);
+
+ va_start(ap, types);
+ r = sd_bus_call_methodv(bus, locator->destination, locator->path, locator->interface, member, error, reply, types, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int bus_get_property(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ const char *type) {
+
+ assert(locator);
+
+ return sd_bus_get_property(bus, locator->destination, locator->path, locator->interface, member, error, reply, type);
+}
+
+int bus_get_property_trivial(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ char type, void *ptr) {
+
+ assert(locator);
+
+ return sd_bus_get_property_trivial(bus, locator->destination, locator->path, locator->interface, member, error, type, ptr);
+}
+
+int bus_get_property_string(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ char **ret) {
+
+ assert(locator);
+
+ return sd_bus_get_property_string(bus, locator->destination, locator->path, locator->interface, member, error, ret);
+}
+
+int bus_get_property_strv(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ char ***ret) {
+
+ assert(locator);
+
+ return sd_bus_get_property_strv(bus, locator->destination, locator->path, locator->interface, member, error, ret);
+}
+
+int bus_set_property(
+ sd_bus *bus,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_error *error,
+ const char *type, ...) {
+
+ va_list ap;
+ int r;
+
+ assert(locator);
+
+ va_start(ap, type);
+ r = sd_bus_set_propertyv(bus, locator->destination, locator->path, locator->interface, member, error, type, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int bus_match_signal(
+ sd_bus *bus,
+ sd_bus_slot **ret,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ void *userdata) {
+
+ assert(locator);
+
+ return sd_bus_match_signal(bus, ret, locator->destination, locator->path, locator->interface, member, callback, userdata);
+}
+
+int bus_match_signal_async(
+ sd_bus *bus,
+ sd_bus_slot **ret,
+ const BusLocator *locator,
+ const char *member,
+ sd_bus_message_handler_t callback,
+ sd_bus_message_handler_t install_callback,
+ void *userdata) {
+
+ assert(locator);
+
+ return sd_bus_match_signal_async(bus, ret, locator->destination, locator->path, locator->interface, member, callback, install_callback, userdata);
+}
+
+int bus_message_new_method_call(
+ sd_bus *bus,
+ sd_bus_message **m,
+ const BusLocator *locator,
+ const char *member) {
+
+ assert(locator);
+
+ return sd_bus_message_new_method_call(bus, m, locator->destination, locator->path, locator->interface, member);
+}
diff --git a/src/shared/bus-locator.h b/src/shared/bus-locator.h
new file mode 100644
index 0000000..fe3b876
--- /dev/null
+++ b/src/shared/bus-locator.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+typedef struct BusLocator {
+ const char *destination;
+ const char *path;
+ const char *interface;
+} BusLocator;
+
+extern const BusLocator* const bus_home_mgr;
+extern const BusLocator* const bus_import_mgr;
+extern const BusLocator* const bus_locale;
+extern const BusLocator* const bus_login_mgr;
+extern const BusLocator* const bus_machine_mgr;
+extern const BusLocator* const bus_network_mgr;
+extern const BusLocator* const bus_portable_mgr;
+extern const BusLocator* const bus_resolve_mgr;
+extern const BusLocator* const bus_systemd_mgr;
+extern const BusLocator* const bus_timedate;
+
+/* Shorthand flavors of the sd-bus convenience helpers with destination,path,interface strings encapsulated
+ * within a single struct. */
+int bus_call_method_async(sd_bus *bus, sd_bus_slot **slot, const BusLocator *locator, const char *member, sd_bus_message_handler_t callback, void *userdata, const char *types, ...);
+int bus_call_method(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, sd_bus_message **reply, const char *types, ...);
+int bus_get_property(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, sd_bus_message **reply, const char *type);
+int bus_get_property_trivial(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, char type, void *ptr);
+int bus_get_property_string(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, char **ret);
+int bus_get_property_strv(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, char ***ret);
+int bus_set_property(sd_bus *bus, const BusLocator *locator, const char *member, sd_bus_error *error, const char *type, ...);
+int bus_match_signal(sd_bus *bus, sd_bus_slot **ret, const BusLocator *locator, const char *member, sd_bus_message_handler_t callback, void *userdata);
+int bus_match_signal_async(sd_bus *bus, sd_bus_slot **ret, const BusLocator *locator, const char *member, sd_bus_message_handler_t callback, sd_bus_message_handler_t install_callback, void *userdata);
+int bus_message_new_method_call(sd_bus *bus, sd_bus_message **m, const BusLocator *locator, const char *member);
diff --git a/src/shared/bus-log-control-api.c b/src/shared/bus-log-control-api.c
new file mode 100644
index 0000000..06e6697
--- /dev/null
+++ b/src/shared/bus-log-control-api.c
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-get-properties.h"
+#include "bus-log-control-api.h"
+#include "bus-util.h"
+#include "log.h"
+#include "sd-bus.h"
+#include "syslog-util.h"
+
+int bus_property_get_log_level(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ assert(bus);
+ assert(reply);
+
+ r = log_level_to_string_alloc(log_get_max_level(), &t);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_append(reply, "s", t);
+}
+
+int bus_property_set_log_level(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ const char *t;
+ int r;
+
+ assert(bus);
+ assert(value);
+
+ r = sd_bus_message_read(value, "s", &t);
+ if (r < 0)
+ return r;
+
+ r = log_level_from_string(t);
+ if (r < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid log level '%s'", t);
+
+ log_info("Setting log level to %s.", t);
+ log_set_max_level(r);
+
+ return 0;
+}
+
+BUS_DEFINE_PROPERTY_GET_GLOBAL(bus_property_get_log_target, "s", log_target_to_string(log_get_target()));
+
+int bus_property_set_log_target(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *value,
+ void *userdata,
+ sd_bus_error *error) {
+
+ LogTarget target;
+ const char *t;
+ int r;
+
+ assert(bus);
+ assert(value);
+
+ r = sd_bus_message_read(value, "s", &t);
+ if (r < 0)
+ return r;
+
+ target = log_target_from_string(t);
+ if (target < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid log target '%s'", t);
+
+ log_info("Setting log target to %s.", log_target_to_string(target));
+ log_set_target(target);
+ log_open();
+
+ return 0;
+}
+
+BUS_DEFINE_PROPERTY_GET_GLOBAL(bus_property_get_syslog_identifier, "s", program_invocation_short_name);
+
+static const sd_bus_vtable log_control_vtable[] = {
+ SD_BUS_VTABLE_START(0),
+
+ SD_BUS_WRITABLE_PROPERTY("LogLevel", "s", bus_property_get_log_level, bus_property_set_log_level, 0, 0),
+ SD_BUS_WRITABLE_PROPERTY("LogTarget", "s", bus_property_get_log_target, bus_property_set_log_target, 0, 0),
+ SD_BUS_PROPERTY("SyslogIdentifier", "s", bus_property_get_syslog_identifier, 0, 0),
+
+ /* One of those days we might want to add a similar, second interface to cover common service
+ * operations such as Reload(), Reexecute(), Exit() … and maybe some properties exposing version
+ * number and other meta-data of the service. */
+
+ SD_BUS_VTABLE_END,
+};
+
+const BusObjectImplementation log_control_object = {
+ "/org/freedesktop/LogControl1",
+ "org.freedesktop.LogControl1",
+ .vtables = BUS_VTABLES(log_control_vtable),
+};
diff --git a/src/shared/bus-log-control-api.h b/src/shared/bus-log-control-api.h
new file mode 100644
index 0000000..85f60a7
--- /dev/null
+++ b/src/shared/bus-log-control-api.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "bus-object.h"
+
+extern const BusObjectImplementation log_control_object;
+static inline int bus_log_control_api_register(sd_bus *bus) {
+ return bus_add_implementation(bus, &log_control_object, NULL);
+}
+
+int bus_property_get_log_level(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_set_log_level(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error);
+
+int bus_property_get_log_target(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_set_log_target(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+
+int bus_property_get_syslog_identifier(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
diff --git a/src/shared/bus-map-properties.c b/src/shared/bus-map-properties.c
new file mode 100644
index 0000000..8460856
--- /dev/null
+++ b/src/shared/bus-map-properties.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-map-properties.h"
+#include "alloc-util.h"
+#include "strv.h"
+#include "bus-message.h"
+
+int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ sd_id128_t *p = userdata;
+ const void *v;
+ size_t n;
+ int r;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, &v, &n);
+ if (r < 0)
+ return r;
+
+ if (n == 0)
+ *p = SD_ID128_NULL;
+ else if (n == 16)
+ memcpy((*p).bytes, v, n);
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+int bus_map_strv_sort(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***p = userdata;
+ int r;
+
+ r = bus_message_read_strv_extend(m, &l);
+ if (r < 0)
+ return r;
+
+ r = strv_extend_strv(p, l, false);
+ if (r < 0)
+ return r;
+
+ strv_sort(*p);
+ return 0;
+}
+
+static int map_basic(sd_bus *bus, const char *member, sd_bus_message *m, unsigned flags, sd_bus_error *error, void *userdata) {
+ char type;
+ int r;
+
+ r = sd_bus_message_peek_type(m, &type, NULL);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH: {
+ const char **p = userdata;
+ const char *s;
+
+ r = sd_bus_message_read_basic(m, type, &s);
+ if (r < 0)
+ return r;
+
+ if (isempty(s))
+ s = NULL;
+
+ if (flags & BUS_MAP_STRDUP)
+ return free_and_strdup((char **) userdata, s);
+
+ *p = s;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_ARRAY: {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***p = userdata;
+
+ r = bus_message_read_strv_extend(m, &l);
+ if (r < 0)
+ return r;
+
+ return strv_extend_strv(p, l, false);
+ }
+
+ case SD_BUS_TYPE_BOOLEAN: {
+ int b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return r;
+
+ if (flags & BUS_MAP_BOOLEAN_AS_BOOL)
+ *(bool*) userdata = b;
+ else
+ *(int*) userdata = b;
+
+ return 0;
+ }
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t u, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ *p = u;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t t, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &t);
+ if (r < 0)
+ return r;
+
+ *p = t;
+ return 0;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double d, *p = userdata;
+
+ r = sd_bus_message_read_basic(m, type, &d);
+ if (r < 0)
+ return r;
+
+ *p = d;
+ return 0;
+ }}
+
+ return -EOPNOTSUPP;
+}
+
+int bus_message_map_all_properties(
+ sd_bus_message *m,
+ const struct bus_properties_map *map,
+ unsigned flags,
+ sd_bus_error *error,
+ void *userdata) {
+
+ int r;
+
+ assert(m);
+ assert(map);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) {
+ const struct bus_properties_map *prop;
+ const char *member;
+ const char *contents;
+ void *v;
+ unsigned i;
+
+ r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &member);
+ if (r < 0)
+ return r;
+
+ for (i = 0, prop = NULL; map[i].member; i++)
+ if (streq(map[i].member, member)) {
+ prop = &map[i];
+ break;
+ }
+
+ if (prop) {
+ r = sd_bus_message_peek_type(m, NULL, &contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r < 0)
+ return r;
+
+ v = (uint8_t *)userdata + prop->offset;
+ if (map[i].set)
+ r = prop->set(sd_bus_message_get_bus(m), member, m, error, v);
+ else
+ r = map_basic(sd_bus_message_get_bus(m), member, m, flags, error, v);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_bus_message_skip(m, "v");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_exit_container(m);
+}
+
+int bus_map_all_properties(
+ sd_bus *bus,
+ const char *destination,
+ const char *path,
+ const struct bus_properties_map *map,
+ unsigned flags,
+ sd_bus_error *error,
+ sd_bus_message **reply,
+ void *userdata) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ int r;
+
+ assert(bus);
+ assert(destination);
+ assert(path);
+ assert(map);
+ assert(reply || (flags & BUS_MAP_STRDUP));
+
+ r = sd_bus_call_method(
+ bus,
+ destination,
+ path,
+ "org.freedesktop.DBus.Properties",
+ "GetAll",
+ error,
+ &m,
+ "s", "");
+ if (r < 0)
+ return r;
+
+ r = bus_message_map_all_properties(m, map, flags, error, userdata);
+ if (r < 0)
+ return r;
+
+ if (reply)
+ *reply = sd_bus_message_ref(m);
+
+ return r;
+}
diff --git a/src/shared/bus-map-properties.h b/src/shared/bus-map-properties.h
new file mode 100644
index 0000000..2a766e3
--- /dev/null
+++ b/src/shared/bus-map-properties.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+typedef int (*bus_property_set_t) (sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata);
+
+struct bus_properties_map {
+ const char *member;
+ const char *signature;
+ bus_property_set_t set;
+ size_t offset;
+};
+
+enum {
+ BUS_MAP_STRDUP = 1 << 0, /* If set, each "s" message is duplicated. Thus, each pointer needs to be freed. */
+ BUS_MAP_BOOLEAN_AS_BOOL = 1 << 1, /* If set, each "b" message is written to a bool pointer. If not set, "b" is written to a int pointer. */
+};
+
+int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata);
+int bus_map_strv_sort(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata);
+
+int bus_message_map_all_properties(sd_bus_message *m, const struct bus_properties_map *map, unsigned flags, sd_bus_error *error, void *userdata);
+int bus_map_all_properties(sd_bus *bus, const char *destination, const char *path, const struct bus_properties_map *map,
+ unsigned flags, sd_bus_error *error, sd_bus_message **reply, void *userdata);
diff --git a/src/shared/bus-message-util.c b/src/shared/bus-message-util.c
new file mode 100644
index 0000000..19500a5
--- /dev/null
+++ b/src/shared/bus-message-util.c
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-message-util.h"
+
+#include "resolve-util.h"
+
+int bus_message_read_ifindex(sd_bus_message *message, sd_bus_error *error, int *ret) {
+ int ifindex, r;
+
+ assert(message);
+ assert(ret);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "i", &ifindex);
+ if (r < 0)
+ return r;
+
+ if (ifindex <= 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid interface index");
+
+ *ret = ifindex;
+
+ return 0;
+}
+
+int bus_message_read_family(sd_bus_message *message, sd_bus_error *error, int *ret) {
+ int family, r;
+
+ assert(message);
+ assert(ret);
+
+ assert_cc(sizeof(int) == sizeof(int32_t));
+
+ r = sd_bus_message_read(message, "i", &family);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family);
+
+ *ret = family;
+ return 0;
+}
+
+int bus_message_read_in_addr_auto(sd_bus_message *message, sd_bus_error *error, int *ret_family, union in_addr_union *ret_addr) {
+ int family, r;
+ const void *d;
+ size_t sz;
+
+ assert(message);
+
+ r = sd_bus_message_read(message, "i", &family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_array(message, 'y', &d, &sz);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Unknown address family %i", family);
+
+ if (sz != FAMILY_ADDRESS_SIZE(family))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid address size");
+
+ if (ret_family)
+ *ret_family = family;
+ if (ret_addr)
+ memcpy(ret_addr, d, sz);
+ return 0;
+}
+
+static int bus_message_read_dns_one(
+ sd_bus_message *message,
+ sd_bus_error *error,
+ bool extended,
+ int *ret_family,
+ union in_addr_union *ret_address,
+ uint16_t *ret_port,
+ const char **ret_server_name) {
+ const char *server_name = NULL;
+ union in_addr_union a;
+ uint16_t port = 0;
+ int family, r;
+
+ assert(message);
+ assert(ret_family);
+ assert(ret_address);
+ assert(ret_port);
+ assert(ret_server_name);
+
+ r = sd_bus_message_enter_container(message, 'r', extended ? "iayqs" : "iay");
+ if (r <= 0)
+ return r;
+
+ r = bus_message_read_in_addr_auto(message, error, &family, &a);
+ if (r < 0)
+ return r;
+
+ if (!dns_server_address_valid(family, &a))
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid DNS server address");
+
+ if (extended) {
+ r = sd_bus_message_read(message, "q", &port);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(port, 53, 853))
+ port = 0;
+
+ r = sd_bus_message_read(message, "s", &server_name);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ *ret_family = family;
+ *ret_address = a;
+ *ret_port = port;
+ *ret_server_name = server_name;
+
+ return 1;
+}
+
+int bus_message_read_dns_servers(
+ sd_bus_message *message,
+ sd_bus_error *error,
+ bool extended,
+ struct in_addr_full ***ret_dns,
+ size_t *ret_n_dns) {
+
+ struct in_addr_full **dns = NULL;
+ size_t n = 0, allocated = 0;
+ int r;
+
+ assert(message);
+ assert(ret_dns);
+ assert(ret_n_dns);
+
+ r = sd_bus_message_enter_container(message, 'a', extended ? "(iayqs)" : "(iay)");
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ const char *server_name;
+ union in_addr_union a;
+ uint16_t port;
+ int family;
+
+ r = bus_message_read_dns_one(message, error, extended, &family, &a, &port, &server_name);
+ if (r < 0)
+ goto clear;
+ if (r == 0)
+ break;
+
+ if (!GREEDY_REALLOC(dns, allocated, n+1)) {
+ r = -ENOMEM;
+ goto clear;
+ }
+
+ r = in_addr_full_new(family, &a, port, 0, server_name, dns + n);
+ if (r < 0)
+ goto clear;
+
+ n++;
+ }
+
+ *ret_dns = TAKE_PTR(dns);
+ *ret_n_dns = n;
+ return 0;
+
+clear:
+ for (size_t i = 0; i < n; i++)
+ in_addr_full_free(dns[i]);
+ free(dns);
+
+ return r;
+}
diff --git a/src/shared/bus-message-util.h b/src/shared/bus-message-util.h
new file mode 100644
index 0000000..b82c083
--- /dev/null
+++ b/src/shared/bus-message-util.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "in-addr-util.h"
+#include "socket-netlink.h"
+
+int bus_message_read_ifindex(sd_bus_message *message, sd_bus_error *error, int *ret);
+int bus_message_read_family(sd_bus_message *message, sd_bus_error *error, int *ret);
+int bus_message_read_in_addr_auto(sd_bus_message *message, sd_bus_error *error, int *ret_family, union in_addr_union *ret_addr);
+
+int bus_message_read_dns_servers(
+ sd_bus_message *message,
+ sd_bus_error *error,
+ bool extended,
+ struct in_addr_full ***ret_dns,
+ size_t *ret_n_dns);
diff --git a/src/shared/bus-object.c b/src/shared/bus-object.c
new file mode 100644
index 0000000..f2e5391
--- /dev/null
+++ b/src/shared/bus-object.c
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-introspect.h"
+#include "bus-object.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+
+int bus_add_implementation(sd_bus *bus, const BusObjectImplementation *impl, void *userdata) {
+ int r;
+
+ log_debug("Registering bus object implementation for path=%s iface=%s", impl->path, impl->interface);
+
+ for (const sd_bus_vtable **p = impl->vtables; p && *p; p++) {
+ r = sd_bus_add_object_vtable(bus, NULL,
+ impl->path,
+ impl->interface,
+ *p,
+ userdata);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register bus path %s with interface %s: %m",
+ impl->path,
+ impl->interface);
+ }
+
+ for (const BusObjectVtablePair *p = impl->fallback_vtables; p && p->vtable; p++) {
+ r = sd_bus_add_fallback_vtable(bus, NULL,
+ impl->path,
+ impl->interface,
+ p->vtable,
+ p->object_find,
+ userdata);
+ if (r < 0)
+ return log_error_errno(r, "Failed to register bus path %s with interface %s: %m",
+ impl->path,
+ impl->interface);
+ }
+
+ if (impl->node_enumerator) {
+ r = sd_bus_add_node_enumerator(bus, NULL,
+ impl->path,
+ impl->node_enumerator,
+ userdata);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add node enumerator for %s: %m",
+ impl->path);
+ }
+
+ if (impl->manager) {
+ r = sd_bus_add_object_manager(bus, NULL, impl->path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add object manager for %s: %m", impl->path);
+ }
+
+ for (size_t i = 0; impl->children && impl->children[i]; i++) {
+ r = bus_add_implementation(bus, impl->children[i], userdata);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static const BusObjectImplementation* find_implementation(
+ const char *pattern,
+ const BusObjectImplementation* const* bus_objects) {
+
+ for (size_t i = 0; bus_objects && bus_objects[i]; i++) {
+ const BusObjectImplementation *impl = bus_objects[i];
+
+ if (STR_IN_SET(pattern, impl->path, impl->interface))
+ return impl;
+
+ impl = find_implementation(pattern, impl->children);
+ if (impl)
+ return impl;
+ }
+
+ return NULL;
+}
+
+static int bus_introspect_implementation(
+ struct introspect *intro,
+ const BusObjectImplementation *impl) {
+ int r;
+
+ for (const sd_bus_vtable **p = impl->vtables; p && *p; p++) {
+ r = introspect_write_interface(intro, impl->interface, *p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write introspection data: %m");
+ }
+
+ for (const BusObjectVtablePair *p = impl->fallback_vtables; p && p->vtable; p++) {
+ r = introspect_write_interface(intro, impl->interface, p->vtable);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write introspection data: %m");
+ }
+
+ return 0;
+}
+
+static void list_paths(
+ FILE *out,
+ const BusObjectImplementation* const* bus_objects) {
+
+ for (size_t i = 0; bus_objects[i]; i++) {
+ fprintf(out, "%s\t%s\n", bus_objects[i]->path, bus_objects[i]->interface);
+ if (bus_objects[i]->children)
+ list_paths(out, bus_objects[i]->children);
+ }
+}
+
+int bus_introspect_implementations(
+ FILE *out,
+ const char *pattern,
+ const BusObjectImplementation* const* bus_objects) {
+
+ const BusObjectImplementation *impl, *main_impl = NULL;
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ if (streq(pattern, "list")) {
+ list_paths(out, bus_objects);
+ return 0;
+ }
+
+ struct introspect intro = {};
+ bool is_interface = sd_bus_interface_name_is_valid(pattern);
+
+ impl = find_implementation(pattern, bus_objects);
+ if (!impl)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "%s %s not found",
+ is_interface ? "Interface" : "Object path",
+ pattern);
+
+ /* We use trusted=false here to get all the @org.freedesktop.systemd1.Privileged annotations. */
+ r = introspect_begin(&intro, false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write introspection data: %m");
+
+ r = introspect_write_default_interfaces(&intro, impl->manager);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write introspection data: %m");
+
+ /* Check if there is a non-fallback path that applies to the given interface, also
+ * print it. This is useful in the case of units: o.fd.systemd1.Service is declared
+ * as a fallback vtable for o/fd/systemd1/unit, and we also want to print
+ * o.fd.systemd1.Unit, which is the non-fallback implementation. */
+ if (impl->fallback_vtables && is_interface)
+ main_impl = find_implementation(impl->path, bus_objects);
+
+ if (main_impl)
+ bus_introspect_implementation(&intro, main_impl);
+
+ if (impl != main_impl)
+ bus_introspect_implementation(&intro, impl);
+
+ _cleanup_set_free_ Set *nodes = NULL;
+
+ for (size_t i = 0; impl->children && impl->children[i]; i++) {
+ r = set_put_strdup(&nodes, impl->children[i]->path);
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = introspect_write_child_nodes(&intro, nodes, impl->path);
+ if (r < 0)
+ return r;
+
+ r = introspect_finish(&intro, &s);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write introspection data: %m");
+
+ fputs(s, out);
+ return 0;
+}
diff --git a/src/shared/bus-object.h b/src/shared/bus-object.h
new file mode 100644
index 0000000..145bbd2
--- /dev/null
+++ b/src/shared/bus-object.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "sd-bus.h"
+
+typedef struct BusObjectImplementation BusObjectImplementation;
+
+typedef struct BusObjectVtablePair {
+ const sd_bus_vtable *vtable;
+ sd_bus_object_find_t object_find;
+} BusObjectVtablePair;
+
+struct BusObjectImplementation {
+ const char *path;
+ const char *interface;
+ const sd_bus_vtable **vtables;
+ const BusObjectVtablePair *fallback_vtables;
+ sd_bus_node_enumerator_t node_enumerator;
+ bool manager;
+ const BusObjectImplementation **children;
+};
+
+#define BUS_VTABLES(...) ((const sd_bus_vtable* []){ __VA_ARGS__, NULL })
+#define BUS_FALLBACK_VTABLES(...) ((const BusObjectVtablePair[]) { __VA_ARGS__, {} })
+#define BUS_IMPLEMENTATIONS(...) ((const BusObjectImplementation* []) { __VA_ARGS__, NULL })
+
+int bus_add_implementation(sd_bus *bus, const BusObjectImplementation *impl, void *userdata);
+int bus_introspect_implementations(
+ FILE *out,
+ const char *pattern,
+ const BusObjectImplementation* const* bus_objects);
diff --git a/src/shared/bus-polkit.c b/src/shared/bus-polkit.c
new file mode 100644
index 0000000..14122e0
--- /dev/null
+++ b/src/shared/bus-polkit.c
@@ -0,0 +1,415 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-internal.h"
+#include "bus-message.h"
+#include "bus-polkit.h"
+#include "bus-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int check_good_user(sd_bus_message *m, uid_t good_user) {
+ _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+ uid_t sender_uid;
+ int r;
+
+ assert(m);
+
+ if (good_user == UID_INVALID)
+ return 0;
+
+ r = sd_bus_query_sender_creds(m, SD_BUS_CREDS_EUID, &creds);
+ if (r < 0)
+ return r;
+
+ /* Don't trust augmented credentials for authorization */
+ assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_EUID) == 0, -EPERM);
+
+ r = sd_bus_creds_get_euid(creds, &sender_uid);
+ if (r < 0)
+ return r;
+
+ return sender_uid == good_user;
+}
+
+#if ENABLE_POLKIT
+static int bus_message_append_strv_key_value(
+ sd_bus_message *m,
+ const char **l) {
+
+ const char **k, **v;
+ int r;
+
+ assert(m);
+
+ r = sd_bus_message_open_container(m, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(k, v, l) {
+ r = sd_bus_message_append(m, "{ss}", *k, *v);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return r;
+
+ return r;
+}
+#endif
+
+int bus_test_polkit(
+ sd_bus_message *call,
+ int capability,
+ const char *action,
+ const char **details,
+ uid_t good_user,
+ bool *_challenge,
+ sd_bus_error *ret_error) {
+
+ int r;
+
+ assert(call);
+ assert(action);
+
+ /* Tests non-interactively! */
+
+ r = check_good_user(call, good_user);
+ if (r != 0)
+ return r;
+
+ r = sd_bus_query_sender_privilege(call, capability);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ return 1;
+#if ENABLE_POLKIT
+ else {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *request = NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ int authorized = false, challenge = false;
+ const char *sender;
+
+ sender = sd_bus_message_get_sender(call);
+ if (!sender)
+ return -EBADMSG;
+
+ r = sd_bus_message_new_method_call(
+ call->bus,
+ &request,
+ "org.freedesktop.PolicyKit1",
+ "/org/freedesktop/PolicyKit1/Authority",
+ "org.freedesktop.PolicyKit1.Authority",
+ "CheckAuthorization");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(
+ request,
+ "(sa{sv})s",
+ "system-bus-name", 1, "name", "s", sender,
+ action);
+ if (r < 0)
+ return r;
+
+ r = bus_message_append_strv_key_value(request, details);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(request, "us", 0, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_call(call->bus, request, 0, ret_error, &reply);
+ if (r < 0) {
+ /* Treat no PK available as access denied */
+ if (bus_error_is_unknown_service(ret_error)) {
+ sd_bus_error_free(ret_error);
+ return -EACCES;
+ }
+
+ return r;
+ }
+
+ r = sd_bus_message_enter_container(reply, 'r', "bba{ss}");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(reply, "bb", &authorized, &challenge);
+ if (r < 0)
+ return r;
+
+ if (authorized)
+ return 1;
+
+ if (_challenge) {
+ *_challenge = challenge;
+ return 0;
+ }
+ }
+#endif
+
+ return -EACCES;
+}
+
+#if ENABLE_POLKIT
+
+typedef struct AsyncPolkitQuery {
+ char *action;
+ char **details;
+
+ sd_bus_message *request, *reply;
+ sd_bus_slot *slot;
+
+ Hashmap *registry;
+ sd_event_source *defer_event_source;
+} AsyncPolkitQuery;
+
+static void async_polkit_query_free(AsyncPolkitQuery *q) {
+ if (!q)
+ return;
+
+ sd_bus_slot_unref(q->slot);
+
+ if (q->registry && q->request)
+ hashmap_remove(q->registry, q->request);
+
+ sd_bus_message_unref(q->request);
+ sd_bus_message_unref(q->reply);
+
+ free(q->action);
+ strv_free(q->details);
+
+ sd_event_source_disable_unref(q->defer_event_source);
+ free(q);
+}
+
+static int async_polkit_defer(sd_event_source *s, void *userdata) {
+ AsyncPolkitQuery *q = userdata;
+
+ assert(s);
+
+ /* This is called as idle event source after we processed the async polkit reply, hopefully after the
+ * method call we re-enqueued has been properly processed. */
+
+ async_polkit_query_free(q);
+ return 0;
+}
+
+static int async_polkit_callback(sd_bus_message *reply, void *userdata, sd_bus_error *error) {
+ AsyncPolkitQuery *q = userdata;
+ int r;
+
+ assert(reply);
+ assert(q);
+
+ assert(q->slot);
+ q->slot = sd_bus_slot_unref(q->slot);
+
+ assert(!q->reply);
+ q->reply = sd_bus_message_ref(reply);
+
+ /* Now, let's dispatch the original message a second time be re-enqueing. This will then traverse the
+ * whole message processing again, and thus re-validating and re-retrieving the "userdata" field
+ * again.
+ *
+ * We install an idle event loop event to clean-up the PolicyKit request data when we are idle again,
+ * i.e. after the second time the message is processed is complete. */
+
+ assert(!q->defer_event_source);
+ r = sd_event_add_defer(sd_bus_get_event(sd_bus_message_get_bus(reply)), &q->defer_event_source, async_polkit_defer, q);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(q->defer_event_source, SD_EVENT_PRIORITY_IDLE);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_enabled(q->defer_event_source, SD_EVENT_ONESHOT);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_message_rewind(q->request, true);
+ if (r < 0)
+ goto fail;
+
+ r = sd_bus_enqueue_for_read(sd_bus_message_get_bus(q->request), q->request);
+ if (r < 0)
+ goto fail;
+
+ return 1;
+
+fail:
+ log_debug_errno(r, "Processing asynchronous PolicyKit reply failed, ignoring: %m");
+ (void) sd_bus_reply_method_errno(q->request, r, NULL);
+ async_polkit_query_free(q);
+ return r;
+}
+
+#endif
+
+int bus_verify_polkit_async(
+ sd_bus_message *call,
+ int capability,
+ const char *action,
+ const char **details,
+ bool interactive,
+ uid_t good_user,
+ Hashmap **registry,
+ sd_bus_error *ret_error) {
+
+#if ENABLE_POLKIT
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *pk = NULL;
+ AsyncPolkitQuery *q;
+ int c;
+#endif
+ const char *sender;
+ int r;
+
+ assert(call);
+ assert(action);
+ assert(registry);
+
+ r = check_good_user(call, good_user);
+ if (r != 0)
+ return r;
+
+#if ENABLE_POLKIT
+ q = hashmap_get(*registry, call);
+ if (q) {
+ int authorized, challenge;
+
+ /* This is the second invocation of this function, and there's already a response from
+ * polkit, let's process it */
+ assert(q->reply);
+
+ /* If the operation we want to authenticate changed between the first and the second time,
+ * let's not use this authentication, it might be out of date as the object and context we
+ * operate on might have changed. */
+ if (!streq(q->action, action) ||
+ !strv_equal(q->details, (char**) details))
+ return -ESTALE;
+
+ if (sd_bus_message_is_method_error(q->reply, NULL)) {
+ const sd_bus_error *e;
+
+ e = sd_bus_message_get_error(q->reply);
+
+ /* Treat no PK available as access denied */
+ if (bus_error_is_unknown_service(e))
+ return -EACCES;
+
+ /* Copy error from polkit reply */
+ sd_bus_error_copy(ret_error, e);
+ return -sd_bus_error_get_errno(e);
+ }
+
+ r = sd_bus_message_enter_container(q->reply, 'r', "bba{ss}");
+ if (r >= 0)
+ r = sd_bus_message_read(q->reply, "bb", &authorized, &challenge);
+ if (r < 0)
+ return r;
+
+ if (authorized)
+ return 1;
+
+ if (challenge)
+ return sd_bus_error_set(ret_error, SD_BUS_ERROR_INTERACTIVE_AUTHORIZATION_REQUIRED, "Interactive authentication required.");
+
+ return -EACCES;
+ }
+#endif
+
+ r = sd_bus_query_sender_privilege(call, capability);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ return 1;
+
+ sender = sd_bus_message_get_sender(call);
+ if (!sender)
+ return -EBADMSG;
+
+#if ENABLE_POLKIT
+ c = sd_bus_message_get_allow_interactive_authorization(call);
+ if (c < 0)
+ return c;
+ if (c > 0)
+ interactive = true;
+
+ r = hashmap_ensure_allocated(registry, NULL);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_new_method_call(
+ call->bus,
+ &pk,
+ "org.freedesktop.PolicyKit1",
+ "/org/freedesktop/PolicyKit1/Authority",
+ "org.freedesktop.PolicyKit1.Authority",
+ "CheckAuthorization");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(
+ pk,
+ "(sa{sv})s",
+ "system-bus-name", 1, "name", "s", sender,
+ action);
+ if (r < 0)
+ return r;
+
+ r = bus_message_append_strv_key_value(pk, details);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(pk, "us", interactive, NULL);
+ if (r < 0)
+ return r;
+
+ q = new(AsyncPolkitQuery, 1);
+ if (!q)
+ return -ENOMEM;
+
+ *q = (AsyncPolkitQuery) {
+ .request = sd_bus_message_ref(call),
+ };
+
+ q->action = strdup(action);
+ if (!q->action) {
+ async_polkit_query_free(q);
+ return -ENOMEM;
+ }
+
+ q->details = strv_copy((char**) details);
+ if (!q->details) {
+ async_polkit_query_free(q);
+ return -ENOMEM;
+ }
+
+ r = hashmap_put(*registry, call, q);
+ if (r < 0) {
+ async_polkit_query_free(q);
+ return r;
+ }
+
+ q->registry = *registry;
+
+ r = sd_bus_call_async(call->bus, &q->slot, pk, async_polkit_callback, q, 0);
+ if (r < 0) {
+ async_polkit_query_free(q);
+ return r;
+ }
+
+ return 0;
+#endif
+
+ return -EACCES;
+}
+
+void bus_verify_polkit_async_registry_free(Hashmap *registry) {
+#if ENABLE_POLKIT
+ hashmap_free_with_destructor(registry, async_polkit_query_free);
+#endif
+}
diff --git a/src/shared/bus-polkit.h b/src/shared/bus-polkit.h
new file mode 100644
index 0000000..91a88a2
--- /dev/null
+++ b/src/shared/bus-polkit.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "hashmap.h"
+
+int bus_test_polkit(sd_bus_message *call, int capability, const char *action, const char **details, uid_t good_user, bool *_challenge, sd_bus_error *e);
+
+int bus_verify_polkit_async(sd_bus_message *call, int capability, const char *action, const char **details, bool interactive, uid_t good_user, Hashmap **registry, sd_bus_error *error);
+void bus_verify_polkit_async_registry_free(Hashmap *registry);
diff --git a/src/shared/bus-print-properties.c b/src/shared/bus-print-properties.c
new file mode 100644
index 0000000..4cea250
--- /dev/null
+++ b/src/shared/bus-print-properties.c
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-print-properties.h"
+#include "cap-list.h"
+#include "cgroup-util.h"
+#include "escape.h"
+#include "mountpoint-util.h"
+#include "nsflags.h"
+#include "parse-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "user-util.h"
+
+int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *value) {
+ assert(name);
+
+ if (expected_value && !streq_ptr(expected_value, value))
+ return 0;
+
+ if (only_value)
+ puts(value);
+ else
+ printf("%s=%s\n", name, value);
+
+ return 0;
+}
+
+int bus_print_property_valuef(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) {
+ va_list ap;
+ int r;
+
+ assert(name);
+ assert(fmt);
+
+ if (expected_value) {
+ _cleanup_free_ char *s = NULL;
+
+ va_start(ap, fmt);
+ r = vasprintf(&s, fmt, ap);
+ va_end(ap);
+ if (r < 0)
+ return -ENOMEM;
+
+ if (streq_ptr(expected_value, s)) {
+ if (only_value)
+ puts(s);
+ else
+ printf("%s=%s\n", name, s);
+ }
+
+ return 0;
+ }
+
+ if (!only_value)
+ printf("%s=", name);
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ puts("");
+
+ return 0;
+}
+
+static int bus_print_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) {
+ char type;
+ const char *contents;
+ int r;
+
+ assert(name);
+ assert(m);
+
+ r = sd_bus_message_peek_type(m, &type, &contents);
+ if (r < 0)
+ return r;
+
+ switch (type) {
+
+ case SD_BUS_TYPE_STRING: {
+ const char *s;
+
+ r = sd_bus_message_read_basic(m, type, &s);
+ if (r < 0)
+ return r;
+
+ if (all || !isempty(s)) {
+ bool good;
+
+ /* This property has a single value, so we need to take
+ * care not to print a new line, everything else is OK. */
+ good = !strchr(s, '\n');
+ bus_print_property_value(name, expected_value, value, good ? s : "[unprintable]");
+ }
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_BOOLEAN: {
+ int b;
+
+ r = sd_bus_message_read_basic(m, type, &b);
+ if (r < 0)
+ return r;
+
+ if (expected_value && parse_boolean(expected_value) != b)
+ return 1;
+
+ bus_print_property_value(name, NULL, value, yes_no(b));
+ return 1;
+ }
+
+ case SD_BUS_TYPE_UINT64: {
+ uint64_t u;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ /* Yes, heuristics! But we can change this check
+ * should it turn out to not be sufficient */
+
+ if (endswith(name, "Timestamp") ||
+ STR_IN_SET(name, "NextElapseUSecRealtime", "LastTriggerUSec", "TimeUSec", "RTCTimeUSec")) {
+ char timestamp[FORMAT_TIMESTAMP_MAX];
+ const char *t;
+
+ t = format_timestamp(timestamp, sizeof(timestamp), u);
+ if (t || all)
+ bus_print_property_value(name, expected_value, value, strempty(t));
+
+ } else if (strstr(name, "USec")) {
+ char timespan[FORMAT_TIMESPAN_MAX];
+
+ (void) format_timespan(timespan, sizeof(timespan), u, 0);
+ bus_print_property_value(name, expected_value, value, timespan);
+
+ } else if (streq(name, "CoredumpFilter")) {
+ char buf[STRLEN("0xFFFFFFFF")];
+
+ xsprintf(buf, "0x%"PRIx64, u);
+ bus_print_property_value(name, expected_value, value, buf);
+
+ } else if (streq(name, "RestrictNamespaces")) {
+ _cleanup_free_ char *s = NULL;
+ const char *result;
+
+ if ((u & NAMESPACE_FLAGS_ALL) == 0)
+ result = "yes";
+ else if (FLAGS_SET(u, NAMESPACE_FLAGS_ALL))
+ result = "no";
+ else {
+ r = namespace_flags_to_string(u, &s);
+ if (r < 0)
+ return r;
+
+ result = strempty(s);
+ }
+
+ bus_print_property_value(name, expected_value, value, result);
+
+ } else if (streq(name, "MountFlags")) {
+ const char *result;
+
+ result = mount_propagation_flags_to_string(u);
+ if (!result)
+ return -EINVAL;
+
+ bus_print_property_value(name, expected_value, value, result);
+
+ } else if (STR_IN_SET(name, "CapabilityBoundingSet", "AmbientCapabilities")) {
+ _cleanup_free_ char *s = NULL;
+
+ r = capability_set_to_string_alloc(u, &s);
+ if (r < 0)
+ return r;
+
+ bus_print_property_value(name, expected_value, value, s);
+
+ } else if ((STR_IN_SET(name, "CPUWeight", "StartupCPUWeight", "IOWeight", "StartupIOWeight") && u == CGROUP_WEIGHT_INVALID) ||
+ (STR_IN_SET(name, "CPUShares", "StartupCPUShares") && u == CGROUP_CPU_SHARES_INVALID) ||
+ (STR_IN_SET(name, "BlockIOWeight", "StartupBlockIOWeight") && u == CGROUP_BLKIO_WEIGHT_INVALID) ||
+ (STR_IN_SET(name, "MemoryCurrent", "TasksCurrent") && u == (uint64_t) -1) ||
+ (endswith(name, "NSec") && u == (uint64_t) -1))
+
+ bus_print_property_value(name, expected_value, value, "[not set]");
+
+ else if ((STR_IN_SET(name, "DefaultMemoryLow", "DefaultMemoryMin", "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit") && u == CGROUP_LIMIT_MAX) ||
+ (STR_IN_SET(name, "TasksMax", "DefaultTasksMax") && u == (uint64_t) -1) ||
+ (startswith(name, "Limit") && u == (uint64_t) -1) ||
+ (startswith(name, "DefaultLimit") && u == (uint64_t) -1))
+
+ bus_print_property_value(name, expected_value, value, "infinity");
+ else if (STR_IN_SET(name, "IPIngressBytes", "IPIngressPackets", "IPEgressBytes", "IPEgressPackets") && u == (uint64_t) -1)
+ bus_print_property_value(name, expected_value, value, "[no data]");
+ else
+ bus_print_property_valuef(name, expected_value, value, "%"PRIu64, u);
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_INT64: {
+ int64_t i;
+
+ r = sd_bus_message_read_basic(m, type, &i);
+ if (r < 0)
+ return r;
+
+ bus_print_property_valuef(name, expected_value, value, "%"PRIi64, i);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_UINT32: {
+ uint32_t u;
+
+ r = sd_bus_message_read_basic(m, type, &u);
+ if (r < 0)
+ return r;
+
+ if (strstr(name, "UMask") || strstr(name, "Mode"))
+ bus_print_property_valuef(name, expected_value, value, "%04o", u);
+
+ else if (streq(name, "UID")) {
+ if (u == UID_INVALID)
+ bus_print_property_value(name, expected_value, value, "[not set]");
+ else
+ bus_print_property_valuef(name, expected_value, value, "%"PRIu32, u);
+ } else if (streq(name, "GID")) {
+ if (u == GID_INVALID)
+ bus_print_property_value(name, expected_value, value, "[not set]");
+ else
+ bus_print_property_valuef(name, expected_value, value, "%"PRIu32, u);
+ } else
+ bus_print_property_valuef(name, expected_value, value, "%"PRIu32, u);
+
+ return 1;
+ }
+
+ case SD_BUS_TYPE_INT32: {
+ int32_t i;
+
+ r = sd_bus_message_read_basic(m, type, &i);
+ if (r < 0)
+ return r;
+
+ bus_print_property_valuef(name, expected_value, value, "%"PRIi32, i);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_DOUBLE: {
+ double d;
+
+ r = sd_bus_message_read_basic(m, type, &d);
+ if (r < 0)
+ return r;
+
+ bus_print_property_valuef(name, expected_value, value, "%g", d);
+ return 1;
+ }
+
+ case SD_BUS_TYPE_ARRAY:
+ if (streq(contents, "s")) {
+ bool first = true;
+ const char *str;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, contents);
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &str)) > 0) {
+ _cleanup_free_ char *e = NULL;
+
+ e = shell_maybe_quote(str, ESCAPE_BACKSLASH_ONELINE);
+ if (!e)
+ return -ENOMEM;
+
+ if (first) {
+ if (!value)
+ printf("%s=", name);
+ first = false;
+ } else
+ fputs(" ", stdout);
+
+ fputs(e, stdout);
+ }
+ if (r < 0)
+ return r;
+
+ if (first && all && !value)
+ printf("%s=", name);
+ if (!first || all)
+ puts("");
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 1;
+
+ } else if (streq(contents, "y")) {
+ const uint8_t *u;
+ size_t n;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, (const void**) &u, &n);
+ if (r < 0)
+ return r;
+
+ if (all || n > 0) {
+ unsigned i;
+
+ if (!value)
+ printf("%s=", name);
+
+ for (i = 0; i < n; i++)
+ printf("%02x", u[i]);
+
+ puts("");
+ }
+
+ return 1;
+
+ } else if (streq(contents, "u")) {
+ uint32_t *u;
+ size_t n;
+
+ r = sd_bus_message_read_array(m, SD_BUS_TYPE_UINT32, (const void**) &u, &n);
+ if (r < 0)
+ return r;
+
+ if (all || n > 0) {
+ unsigned i;
+
+ if (!value)
+ printf("%s=", name);
+
+ for (i = 0; i < n; i++)
+ printf("%08x", u[i]);
+
+ puts("");
+ }
+
+ return 1;
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int bus_message_print_all_properties(
+ sd_bus_message *m,
+ bus_message_print_t func,
+ char **filter,
+ bool value,
+ bool all,
+ Set **found_properties) {
+
+ int r;
+
+ assert(m);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}");
+ if (r < 0)
+ return r;
+
+ while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) {
+ _cleanup_free_ char *name_with_equal = NULL;
+ const char *name, *contents, *expected_value = NULL;
+
+ r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &name);
+ if (r < 0)
+ return r;
+
+ if (found_properties) {
+ r = set_ensure_put(found_properties, &string_hash_ops, name);
+ if (r < 0)
+ return log_oom();
+ }
+
+ name_with_equal = strjoin(name, "=");
+ if (!name_with_equal)
+ return log_oom();
+
+ if (!filter || strv_find(filter, name) ||
+ (expected_value = strv_find_startswith(filter, name_with_equal))) {
+ r = sd_bus_message_peek_type(m, NULL, &contents);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r < 0)
+ return r;
+
+ if (func)
+ r = func(name, expected_value, m, value, all);
+ if (!func || r == 0)
+ r = bus_print_property(name, expected_value, m, value, all);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (all && !expected_value)
+ printf("%s=[unprintable]\n", name);
+ /* skip what we didn't read */
+ r = sd_bus_message_skip(m, contents);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ } else {
+ r = sd_bus_message_skip(m, "v");
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int bus_print_all_properties(
+ sd_bus *bus,
+ const char *dest,
+ const char *path,
+ bus_message_print_t func,
+ char **filter,
+ bool value,
+ bool all,
+ Set **found_properties) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ assert(bus);
+ assert(path);
+
+ r = sd_bus_call_method(bus,
+ dest,
+ path,
+ "org.freedesktop.DBus.Properties",
+ "GetAll",
+ &error,
+ &reply,
+ "s", "");
+ if (r < 0)
+ return r;
+
+ return bus_message_print_all_properties(reply, func, filter, value, all, found_properties);
+}
diff --git a/src/shared/bus-print-properties.h b/src/shared/bus-print-properties.h
new file mode 100644
index 0000000..a457475
--- /dev/null
+++ b/src/shared/bus-print-properties.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-bus.h"
+
+#include "macro.h"
+#include "set.h"
+
+typedef int (*bus_message_print_t) (const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all);
+
+int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *value);
+int bus_print_property_valuef(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) _printf_(4,5);
+int bus_message_print_all_properties(sd_bus_message *m, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties);
+int bus_print_all_properties(sd_bus *bus, const char *dest, const char *path, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties);
diff --git a/src/shared/bus-unit-procs.c b/src/shared/bus-unit-procs.c
new file mode 100644
index 0000000..3e97be9
--- /dev/null
+++ b/src/shared/bus-unit-procs.c
@@ -0,0 +1,407 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-unit-procs.h"
+#include "hashmap.h"
+#include "list.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+
+struct CGroupInfo {
+ char *cgroup_path;
+ bool is_const; /* If false, cgroup_path should be free()'d */
+
+ Hashmap *pids; /* PID → process name */
+ bool done;
+
+ struct CGroupInfo *parent;
+ LIST_FIELDS(struct CGroupInfo, siblings);
+ LIST_HEAD(struct CGroupInfo, children);
+ size_t n_children;
+};
+
+static int add_cgroup(Hashmap *cgroups, const char *path, bool is_const, struct CGroupInfo **ret) {
+ struct CGroupInfo *parent = NULL, *cg;
+ int r;
+
+ assert(cgroups);
+ assert(ret);
+
+ path = empty_to_root(path);
+
+ cg = hashmap_get(cgroups, path);
+ if (cg) {
+ *ret = cg;
+ return 0;
+ }
+
+ if (!empty_or_root(path)) {
+ const char *e, *pp;
+
+ e = strrchr(path, '/');
+ if (!e)
+ return -EINVAL;
+
+ pp = strndupa(path, e - path);
+
+ r = add_cgroup(cgroups, pp, false, &parent);
+ if (r < 0)
+ return r;
+ }
+
+ cg = new0(struct CGroupInfo, 1);
+ if (!cg)
+ return -ENOMEM;
+
+ if (is_const)
+ cg->cgroup_path = (char*) path;
+ else {
+ cg->cgroup_path = strdup(path);
+ if (!cg->cgroup_path) {
+ free(cg);
+ return -ENOMEM;
+ }
+ }
+
+ cg->is_const = is_const;
+ cg->parent = parent;
+
+ r = hashmap_put(cgroups, cg->cgroup_path, cg);
+ if (r < 0) {
+ if (!is_const)
+ free(cg->cgroup_path);
+ free(cg);
+ return r;
+ }
+
+ if (parent) {
+ LIST_PREPEND(siblings, parent->children, cg);
+ parent->n_children++;
+ }
+
+ *ret = cg;
+ return 1;
+}
+
+static int add_process(
+ Hashmap *cgroups,
+ const char *path,
+ pid_t pid,
+ const char *name) {
+
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(cgroups);
+ assert(name);
+ assert(pid > 0);
+
+ r = add_cgroup(cgroups, path, true, &cg);
+ if (r < 0)
+ return r;
+
+ r = hashmap_ensure_allocated(&cg->pids, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ return hashmap_put(cg->pids, PID_TO_PTR(pid), (void*) name);
+}
+
+static void remove_cgroup(Hashmap *cgroups, struct CGroupInfo *cg) {
+ assert(cgroups);
+ assert(cg);
+
+ while (cg->children)
+ remove_cgroup(cgroups, cg->children);
+
+ hashmap_remove(cgroups, cg->cgroup_path);
+
+ if (!cg->is_const)
+ free(cg->cgroup_path);
+
+ hashmap_free(cg->pids);
+
+ if (cg->parent)
+ LIST_REMOVE(siblings, cg->parent->children, cg);
+
+ free(cg);
+}
+
+static int cgroup_info_compare_func(struct CGroupInfo * const *a, struct CGroupInfo * const *b) {
+ return strcmp((*a)->cgroup_path, (*b)->cgroup_path);
+}
+
+static int dump_processes(
+ Hashmap *cgroups,
+ const char *cgroup_path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(prefix);
+
+ cgroup_path = empty_to_root(cgroup_path);
+
+ cg = hashmap_get(cgroups, cgroup_path);
+ if (!cg)
+ return 0;
+
+ if (!hashmap_isempty(cg->pids)) {
+ const char *name;
+ size_t n = 0, i;
+ pid_t *pids;
+ void *pidp;
+ int width;
+
+ /* Order processes by their PID */
+ pids = newa(pid_t, hashmap_size(cg->pids));
+
+ HASHMAP_FOREACH_KEY(name, pidp, cg->pids)
+ pids[n++] = PTR_TO_PID(pidp);
+
+ assert(n == hashmap_size(cg->pids));
+ typesafe_qsort(pids, n, pid_compare_func);
+
+ width = DECIMAL_STR_WIDTH(pids[n-1]);
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *e = NULL;
+ const char *special;
+ bool more;
+
+ name = hashmap_get(cg->pids, PID_TO_PTR(pids[i]));
+ assert(name);
+
+ if (n_columns != 0) {
+ unsigned k;
+
+ k = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U);
+
+ e = ellipsize(name, k, 100);
+ if (e)
+ name = e;
+ }
+
+ more = i+1 < n || cg->children;
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT);
+
+ fprintf(stdout, "%s%s%*"PID_PRI" %s\n",
+ prefix,
+ special,
+ width, pids[i],
+ name);
+ }
+ }
+
+ if (cg->children) {
+ struct CGroupInfo **children, *child;
+ size_t n = 0, i;
+
+ /* Order subcgroups by their name */
+ children = newa(struct CGroupInfo*, cg->n_children);
+ LIST_FOREACH(siblings, child, cg->children)
+ children[n++] = child;
+ assert(n == cg->n_children);
+ typesafe_qsort(children, n, cgroup_info_compare_func);
+
+ if (n_columns != 0)
+ n_columns = MAX(LESS_BY(n_columns, 2U), 20U);
+
+ for (i = 0; i < n; i++) {
+ _cleanup_free_ char *pp = NULL;
+ const char *name, *special;
+ bool more;
+
+ child = children[i];
+
+ name = strrchr(child->cgroup_path, '/');
+ if (!name)
+ return -EINVAL;
+ name++;
+
+ more = i+1 < n;
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT);
+
+ fputs(prefix, stdout);
+ fputs(special, stdout);
+ fputs(name, stdout);
+ fputc('\n', stdout);
+
+ special = special_glyph(more ? SPECIAL_GLYPH_TREE_VERTICAL : SPECIAL_GLYPH_TREE_SPACE);
+
+ pp = strjoin(prefix, special);
+ if (!pp)
+ return -ENOMEM;
+
+ r = dump_processes(cgroups, child->cgroup_path, pp, n_columns, flags);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ cg->done = true;
+ return 0;
+}
+
+static int dump_extra_processes(
+ Hashmap *cgroups,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags) {
+
+ _cleanup_free_ pid_t *pids = NULL;
+ _cleanup_hashmap_free_ Hashmap *names = NULL;
+ struct CGroupInfo *cg;
+ size_t n_allocated = 0, n = 0, k;
+ int width, r;
+
+ /* Prints the extra processes, i.e. those that are in cgroups we haven't displayed yet. We show them as
+ * combined, sorted, linear list. */
+
+ HASHMAP_FOREACH(cg, cgroups) {
+ const char *name;
+ void *pidp;
+
+ if (cg->done)
+ continue;
+
+ if (hashmap_isempty(cg->pids))
+ continue;
+
+ r = hashmap_ensure_allocated(&names, &trivial_hash_ops);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC(pids, n_allocated, n + hashmap_size(cg->pids)))
+ return -ENOMEM;
+
+ HASHMAP_FOREACH_KEY(name, pidp, cg->pids) {
+ pids[n++] = PTR_TO_PID(pidp);
+
+ r = hashmap_put(names, pidp, (void*) name);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (n == 0)
+ return 0;
+
+ typesafe_qsort(pids, n, pid_compare_func);
+ width = DECIMAL_STR_WIDTH(pids[n-1]);
+
+ for (k = 0; k < n; k++) {
+ _cleanup_free_ char *e = NULL;
+ const char *name;
+
+ name = hashmap_get(names, PID_TO_PTR(pids[k]));
+ assert(name);
+
+ if (n_columns != 0) {
+ unsigned z;
+
+ z = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U);
+
+ e = ellipsize(name, z, 100);
+ if (e)
+ name = e;
+ }
+
+ fprintf(stdout, "%s%s %*" PID_PRI " %s\n",
+ prefix,
+ special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET),
+ width, pids[k],
+ name);
+ }
+
+ return 0;
+}
+
+int unit_show_processes(
+ sd_bus *bus,
+ const char *unit,
+ const char *cgroup_path,
+ const char *prefix,
+ unsigned n_columns,
+ OutputFlags flags,
+ sd_bus_error *error) {
+
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ Hashmap *cgroups = NULL;
+ struct CGroupInfo *cg;
+ int r;
+
+ assert(bus);
+ assert(unit);
+
+ if (flags & OUTPUT_FULL_WIDTH)
+ n_columns = 0;
+ else if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "GetUnitProcesses",
+ error,
+ &reply,
+ "s",
+ unit);
+ if (r < 0)
+ return r;
+
+ cgroups = hashmap_new(&path_hash_ops);
+ if (!cgroups)
+ return -ENOMEM;
+
+ r = sd_bus_message_enter_container(reply, 'a', "(sus)");
+ if (r < 0)
+ goto finish;
+
+ for (;;) {
+ const char *path = NULL, *name = NULL;
+ uint32_t pid;
+
+ r = sd_bus_message_read(reply, "(sus)", &path, &pid, &name);
+ if (r < 0)
+ goto finish;
+ if (r == 0)
+ break;
+
+ r = add_process(cgroups, path, pid, name);
+ if (r == -ENOMEM)
+ goto finish;
+ if (r < 0)
+ log_warning_errno(r, "Invalid process description in GetUnitProcesses reply: cgroup=\"%s\" pid="PID_FMT" command=\"%s\", ignoring: %m",
+ path, pid, name);
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0)
+ goto finish;
+
+ r = dump_processes(cgroups, cgroup_path, prefix, n_columns, flags);
+ if (r < 0)
+ goto finish;
+
+ r = dump_extra_processes(cgroups, prefix, n_columns, flags);
+
+finish:
+ while ((cg = hashmap_first(cgroups)))
+ remove_cgroup(cgroups, cg);
+
+ hashmap_free(cgroups);
+
+ return r;
+}
diff --git a/src/shared/bus-unit-procs.h b/src/shared/bus-unit-procs.h
new file mode 100644
index 0000000..78c5569
--- /dev/null
+++ b/src/shared/bus-unit-procs.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "output-mode.h"
+
+int unit_show_processes(sd_bus *bus, const char *unit, const char *cgroup_path, const char *prefix, unsigned n_columns, OutputFlags flags, sd_bus_error *error);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
new file mode 100644
index 0000000..2bab229
--- /dev/null
+++ b/src/shared/bus-unit-util.c
@@ -0,0 +1,2432 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-unit-util.h"
+#include "bus-util.h"
+#include "cap-list.h"
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "coredump-util.h"
+#include "cpu-set-util.h"
+#include "dissect-image.h"
+#include "escape.h"
+#include "exec-util.h"
+#include "exit-status.h"
+#include "fileio.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
+#include "ip-protocol-list.h"
+#include "libmount-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "missing_fs.h"
+#include "mountpoint-util.h"
+#include "nsflags.h"
+#include "numa-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#if HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+#include "securebits-util.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "sort-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "syslog-util.h"
+#include "terminal-util.h"
+#include "unit-def.h"
+#include "user-util.h"
+#include "utf8.h"
+
+int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) {
+ assert(message);
+ assert(u);
+
+ u->machine = NULL;
+
+ return sd_bus_message_read(
+ message,
+ "(ssssssouso)",
+ &u->id,
+ &u->description,
+ &u->load_state,
+ &u->active_state,
+ &u->sub_state,
+ &u->following,
+ &u->unit_path,
+ &u->job_id,
+ &u->job_type,
+ &u->job_path);
+}
+
+#define DEFINE_BUS_APPEND_PARSE_PTR(bus_type, cast_type, type, parse_func) \
+ static int bus_append_##parse_func( \
+ sd_bus_message *m, \
+ const char *field, \
+ const char *eq) { \
+ type val; \
+ int r; \
+ \
+ r = parse_func(eq, &val); \
+ if (r < 0) \
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); \
+ \
+ r = sd_bus_message_append(m, "(sv)", field, \
+ bus_type, (cast_type) val); \
+ if (r < 0) \
+ return bus_log_create_error(r); \
+ \
+ return 1; \
+ }
+
+#define DEFINE_BUS_APPEND_PARSE(bus_type, parse_func) \
+ static int bus_append_##parse_func( \
+ sd_bus_message *m, \
+ const char *field, \
+ const char *eq) { \
+ int r; \
+ \
+ r = parse_func(eq); \
+ if (r < 0) \
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s: %s", field, eq); \
+ \
+ r = sd_bus_message_append(m, "(sv)", field, \
+ bus_type, (int32_t) r); \
+ if (r < 0) \
+ return bus_log_create_error(r); \
+ \
+ return 1; \
+ }
+
+DEFINE_BUS_APPEND_PARSE("b", parse_boolean);
+DEFINE_BUS_APPEND_PARSE("i", ioprio_class_from_string);
+DEFINE_BUS_APPEND_PARSE("i", ip_tos_from_string);
+DEFINE_BUS_APPEND_PARSE("i", log_facility_unshifted_from_string);
+DEFINE_BUS_APPEND_PARSE("i", log_level_from_string);
+#if !HAVE_SECCOMP
+static inline int seccomp_parse_errno_or_action(const char *eq) { return -EINVAL; }
+#endif
+DEFINE_BUS_APPEND_PARSE("i", seccomp_parse_errno_or_action);
+DEFINE_BUS_APPEND_PARSE("i", sched_policy_from_string);
+DEFINE_BUS_APPEND_PARSE("i", secure_bits_from_string);
+DEFINE_BUS_APPEND_PARSE("i", signal_from_string);
+DEFINE_BUS_APPEND_PARSE("i", parse_ip_protocol);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, ioprio_parse_priority);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, parse_nice);
+DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, safe_atoi);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, nsec_t, parse_nsec);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_blkio_weight_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_cpu_shares_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_weight_parse);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, unsigned long, mount_propagation_flags_from_string);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, safe_atou64);
+DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, mode_t, parse_mode);
+DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, unsigned, safe_atou);
+DEFINE_BUS_APPEND_PARSE_PTR("x", int64_t, int64_t, safe_atoi64);
+DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, coredump_filter_mask_from_string);
+
+static int bus_append_string(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ r = sd_bus_message_append(m, "(sv)", field, "s", eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_strv(sd_bus_message *m, const char *field, const char *eq, ExtractFlags flags) {
+ const char *p;
+ int r;
+
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "as");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (p = eq;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, flags);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax: %s", eq);
+
+ r = sd_bus_message_append_basic(m, 's', word);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_byte_array(sd_bus_message *m, const char *field, const void *buf, size_t n) {
+ int r;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "ay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'y', buf, n);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_parse_sec_rename(sd_bus_message *m, const char *field, const char *eq) {
+ char *n;
+ usec_t t;
+ size_t l;
+ int r;
+
+ r = parse_sec(eq, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ l = strlen(field);
+ n = newa(char, l + 2);
+ /* Change suffix Sec → USec */
+ strcpy(mempcpy(n, field, l - 3), "USec");
+
+ r = sd_bus_message_append(m, "(sv)", n, "t", t);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_parse_size(sd_bus_message *m, const char *field, const char *eq, uint64_t base) {
+ uint64_t v;
+ int r;
+
+ r = parse_size(eq, base, &v);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", v);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_exec_command(sd_bus_message *m, const char *field, const char *eq) {
+ bool explicit_path = false, done = false;
+ _cleanup_strv_free_ char **l = NULL, **ex_opts = NULL;
+ _cleanup_free_ char *path = NULL, *upgraded_name = NULL;
+ ExecCommandFlags flags = 0;
+ bool is_ex_prop = endswith(field, "Ex");
+ int r;
+
+ do {
+ switch (*eq) {
+
+ case '-':
+ if (FLAGS_SET(flags, EXEC_COMMAND_IGNORE_FAILURE))
+ done = true;
+ else {
+ flags |= EXEC_COMMAND_IGNORE_FAILURE;
+ eq++;
+ }
+ break;
+
+ case '@':
+ if (explicit_path)
+ done = true;
+ else {
+ explicit_path = true;
+ eq++;
+ }
+ break;
+
+ case ':':
+ if (FLAGS_SET(flags, EXEC_COMMAND_NO_ENV_EXPAND))
+ done = true;
+ else {
+ flags |= EXEC_COMMAND_NO_ENV_EXPAND;
+ eq++;
+ }
+ break;
+
+ case '+':
+ if (flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID|EXEC_COMMAND_AMBIENT_MAGIC))
+ done = true;
+ else {
+ flags |= EXEC_COMMAND_FULLY_PRIVILEGED;
+ eq++;
+ }
+ break;
+
+ case '!':
+ if (flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_AMBIENT_MAGIC))
+ done = true;
+ else if (FLAGS_SET(flags, EXEC_COMMAND_NO_SETUID)) {
+ flags &= ~EXEC_COMMAND_NO_SETUID;
+ flags |= EXEC_COMMAND_AMBIENT_MAGIC;
+ eq++;
+ } else {
+ flags |= EXEC_COMMAND_NO_SETUID;
+ eq++;
+ }
+ break;
+
+ default:
+ done = true;
+ break;
+ }
+ } while (!done);
+
+ if (!is_ex_prop && (flags & (EXEC_COMMAND_NO_ENV_EXPAND|EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID|EXEC_COMMAND_AMBIENT_MAGIC))) {
+ /* Upgrade the ExecXYZ= property to ExecXYZEx= for convenience */
+ is_ex_prop = true;
+ upgraded_name = strjoin(field, "Ex");
+ if (!upgraded_name)
+ return log_oom();
+ }
+
+ if (is_ex_prop) {
+ r = exec_command_flags_to_strv(flags, &ex_opts);
+ if (r < 0)
+ return log_error_errno(r, "Failed to convert ExecCommandFlags to strv: %m");
+ }
+
+ if (explicit_path) {
+ r = extract_first_word(&eq, &path, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse path: %m");
+ }
+
+ r = strv_split_full(&l, eq, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse command line: %m");
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, upgraded_name ?: field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', is_ex_prop ? "a(sasas)" : "a(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', is_ex_prop ? "(sasas)" : "(sasb)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (!strv_isempty(l)) {
+
+ r = sd_bus_message_open_container(m, 'r', is_ex_prop ? "sasas" : "sasb");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", path ?: l[0]);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_strv(m, l);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = is_ex_prop ? sd_bus_message_append_strv(m, ex_opts) : sd_bus_message_append(m, "b", FLAGS_SET(flags, EXEC_COMMAND_IGNORE_FAILURE));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+}
+
+static int bus_append_ip_address_access(sd_bus_message *m, int family, const union in_addr_union *prefix, unsigned char prefixlen) {
+ int r;
+
+ assert(m);
+ assert(prefix);
+
+ r = sd_bus_message_open_container(m, 'r', "iayu");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "i", family);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append_array(m, 'y', prefix, FAMILY_ADDRESS_SIZE(family));
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(m, "u", prefixlen);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(m);
+}
+
+static int bus_append_cgroup_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "DevicePolicy",
+ "Slice",
+ "ManagedOOMSwap",
+ "ManagedOOMMemoryPressure",
+ "ManagedOOMMemoryPressureLimitPercent"))
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUAccounting",
+ "MemoryAccounting",
+ "IOAccounting",
+ "BlockIOAccounting",
+ "TasksAccounting",
+ "IPAccounting"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUWeight",
+ "StartupCPUWeight",
+ "IOWeight",
+ "StartupIOWeight"))
+ return bus_append_cg_weight_parse(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUShares",
+ "StartupCPUShares"))
+ return bus_append_cg_cpu_shares_parse(m, field, eq);
+
+ if (STR_IN_SET(field, "AllowedCPUs",
+ "AllowedMemoryNodes")) {
+ _cleanup_(cpu_set_reset) CPUSet cpuset = {};
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ r = parse_cpu_set(eq, &cpuset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+
+ r = cpu_set_to_dbus(&cpuset, &array, &allocated);
+ if (r < 0)
+ return log_error_errno(r, "Failed to serialize CPUSet: %m");
+
+ return bus_append_byte_array(m, field, array, allocated);
+ }
+
+ if (STR_IN_SET(field, "BlockIOWeight",
+ "StartupBlockIOWeight"))
+ return bus_append_cg_blkio_weight_parse(m, field, eq);
+
+ if (streq(field, "DisableControllers"))
+ return bus_append_strv(m, "DisableControllers", eq, EXTRACT_UNQUOTE);
+
+ if (streq(field, "Delegate")) {
+ r = parse_boolean(eq);
+ if (r < 0)
+ return bus_append_strv(m, "DelegateControllers", eq, EXTRACT_UNQUOTE);
+
+ r = sd_bus_message_append(m, "(sv)", "Delegate", "b", r);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "MemoryMin",
+ "DefaultMemoryLow",
+ "DefaultMemoryMin",
+ "MemoryLow",
+ "MemoryHigh",
+ "MemoryMax",
+ "MemorySwapMax",
+ "MemoryLimit",
+ "TasksMax")) {
+
+ if (streq(eq, "infinity")) {
+ r = sd_bus_message_append(m, "(sv)", field, "t", CGROUP_LIMIT_MAX);
+ if (r < 0)
+ return bus_log_create_error(r);
+ return 1;
+ } else if (isempty(eq)) {
+ uint64_t empty_value = STR_IN_SET(field,
+ "DefaultMemoryLow",
+ "DefaultMemoryMin",
+ "MemoryLow",
+ "MemoryMin") ?
+ CGROUP_LIMIT_MIN :
+ CGROUP_LIMIT_MAX;
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", empty_value);
+ if (r < 0)
+ return bus_log_create_error(r);
+ return 1;
+ }
+
+ r = parse_permille(eq);
+ if (r >= 0) {
+ char *n;
+
+ /* When this is a percentage we'll convert this into a relative value in the range 0…UINT32_MAX
+ * and pass it in the MemoryLowScale property (and related ones). This way the physical memory
+ * size can be determined server-side. */
+
+ n = strjoina(field, "Scale");
+ r = sd_bus_message_append(m, "(sv)", n, "u", (uint32_t) (((uint64_t) r * UINT32_MAX) / 1000U));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "TasksMax"))
+ return bus_append_safe_atou64(m, field, eq);
+
+ return bus_append_parse_size(m, field, eq, 1024);
+ }
+
+ if (streq(field, "CPUQuota")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", USEC_INFINITY);
+ else {
+ r = parse_permille_unbounded(eq);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ERANGE),
+ "CPU quota too small.");
+ if (r < 0)
+ return log_error_errno(r, "CPU quota '%s' invalid.", eq);
+
+ r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", (((uint64_t) r * USEC_PER_SEC) / 1000U));
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "CPUQuotaPeriodSec")) {
+ usec_t u = USEC_INFINITY;
+
+ r = parse_sec_def_infinity(eq, &u);
+ if (r < 0)
+ return log_error_errno(r, "CPU quota period '%s' invalid.", eq);
+
+ r = sd_bus_message_append(m, "(sv)", "CPUQuotaPeriodUSec", "t", u);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "DeviceAllow")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 0);
+ else {
+ const char *path = eq, *rwm = NULL, *e;
+
+ e = strchr(eq, ' ');
+ if (e) {
+ path = strndupa(eq, e - eq);
+ rwm = e+1;
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 1, path, strempty(rwm));
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (cgroup_io_limit_type_from_string(field) >= 0 || STR_IN_SET(field, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0);
+ else {
+ const char *path, *bandwidth, *e;
+ uint64_t bytes;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ bandwidth = e+1;
+
+ if (streq(bandwidth, "infinity"))
+ bytes = CGROUP_LIMIT_MAX;
+ else {
+ r = parse_size(bandwidth, 1000, &bytes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse byte value %s: %m", bandwidth);
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, bytes);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "IODeviceWeight",
+ "BlockIODeviceWeight")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0);
+ else {
+ const char *path, *weight, *e;
+ uint64_t u;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ weight = e+1;
+
+ r = safe_atou64(weight, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, weight);
+
+ r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, u);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "IODeviceLatencyTargetSec")) {
+ const char *field_usec = "IODeviceLatencyTargetUSec";
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", USEC_INFINITY);
+ else {
+ const char *path, *target, *e;
+ usec_t usec;
+
+ e = strchr(eq, ' ');
+ if (!e)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse %s value %s.",
+ field, eq);
+
+ path = strndupa(eq, e - eq);
+ target = e+1;
+
+ r = parse_sec(target, &usec);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, target);
+
+ r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", 1, path, usec);
+ }
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "IPAddressAllow",
+ "IPAddressDeny")) {
+ unsigned char prefixlen;
+ union in_addr_union prefix = {};
+ int family;
+
+ if (isempty(eq)) {
+ r = sd_bus_message_append(m, "(sv)", field, "a(iayu)", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(iayu)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(iayu)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (streq(eq, "any")) {
+ /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else if (is_localhost(eq)) {
+ /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+ prefix.in.s_addr = htobe32(0x7f000000);
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 8);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 128);
+ if (r < 0)
+ return r;
+
+ } else if (streq(eq, "link-local")) {
+ /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+ prefix.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 16);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xfe800000)
+ };
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 64);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else if (streq(eq, "multicast")) {
+ /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+ prefix.in.s_addr = htobe32((UINT32_C(224) << 24));
+ r = bus_append_ip_address_access(m, AF_INET, &prefix, 4);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ prefix.in6 = (struct in6_addr) {
+ .s6_addr32[0] = htobe32(0xff000000)
+ };
+ r = bus_append_ip_address_access(m, AF_INET6, &prefix, 8);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ } else {
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&eq, &word, NULL, 0);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s: %s", field, eq);
+
+ r = in_addr_prefix_from_string_auto(word, &family, &prefix, &prefixlen);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse IP address prefix: %s", word);
+
+ r = bus_append_ip_address_access(m, family, &prefix, prefixlen);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "IPIngressFilterPath",
+ "IPEgressFilterPath")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "as", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", field, "as", 1, eq);
+
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_automount_property(sd_bus_message *m, const char *field, const char *eq) {
+ if (streq(field, "Where"))
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+ return bus_append_parse_mode(m, field, eq);
+
+ if (streq(field, "TimeoutIdleSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_execute_property(sd_bus_message *m, const char *field, const char *eq) {
+ const char *suffix;
+ int r;
+
+ if (STR_IN_SET(field, "User",
+ "Group",
+ "UtmpIdentifier",
+ "UtmpMode",
+ "PAMName",
+ "TTYPath",
+ "WorkingDirectory",
+ "RootDirectory",
+ "SyslogIdentifier",
+ "ProtectSystem",
+ "ProtectHome",
+ "SELinuxContext",
+ "RootImage",
+ "RootVerity",
+ "RuntimeDirectoryPreserve",
+ "Personality",
+ "KeyringMode",
+ "ProtectProc",
+ "ProcSubset",
+ "NetworkNamespacePath",
+ "LogNamespace"))
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "IgnoreSIGPIPE",
+ "TTYVHangup",
+ "TTYReset",
+ "TTYVTDisallocate",
+ "PrivateTmp",
+ "PrivateDevices",
+ "PrivateNetwork",
+ "PrivateUsers",
+ "PrivateMounts",
+ "NoNewPrivileges",
+ "SyslogLevelPrefix",
+ "MemoryDenyWriteExecute",
+ "RestrictRealtime",
+ "DynamicUser",
+ "RemoveIPC",
+ "ProtectKernelTunables",
+ "ProtectKernelModules",
+ "ProtectKernelLogs",
+ "ProtectClock",
+ "ProtectControlGroups",
+ "MountAPIVFS",
+ "CPUSchedulingResetOnFork",
+ "LockPersonality",
+ "ProtectHostname",
+ "RestrictSUIDSGID"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "ReadWriteDirectories",
+ "ReadOnlyDirectories",
+ "InaccessibleDirectories",
+ "ReadWritePaths",
+ "ReadOnlyPaths",
+ "InaccessiblePaths",
+ "RuntimeDirectory",
+ "StateDirectory",
+ "CacheDirectory",
+ "LogsDirectory",
+ "ConfigurationDirectory",
+ "SupplementaryGroups",
+ "SystemCallArchitectures"))
+ return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE);
+
+ if (STR_IN_SET(field, "SyslogLevel",
+ "LogLevelMax"))
+ return bus_append_log_level_from_string(m, field, eq);
+
+ if (streq(field, "SyslogFacility"))
+ return bus_append_log_facility_unshifted_from_string(m, field, eq);
+
+ if (streq(field, "SecureBits"))
+ return bus_append_secure_bits_from_string(m, field, eq);
+
+ if (streq(field, "CPUSchedulingPolicy"))
+ return bus_append_sched_policy_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "CPUSchedulingPriority",
+ "OOMScoreAdjust"))
+ return bus_append_safe_atoi(m, field, eq);
+
+ if (streq(field, "CoredumpFilter"))
+ return bus_append_coredump_filter_mask_from_string(m, field, eq);
+
+ if (streq(field, "Nice"))
+ return bus_append_parse_nice(m, field, eq);
+
+ if (streq(field, "SystemCallErrorNumber"))
+ return bus_append_seccomp_parse_errno_or_action(m, field, eq);
+
+ if (streq(field, "IOSchedulingClass"))
+ return bus_append_ioprio_class_from_string(m, field, eq);
+
+ if (streq(field, "IOSchedulingPriority"))
+ return bus_append_ioprio_parse_priority(m, field, eq);
+
+ if (STR_IN_SET(field, "RuntimeDirectoryMode",
+ "StateDirectoryMode",
+ "CacheDirectoryMode",
+ "LogsDirectoryMode",
+ "ConfigurationDirectoryMode",
+ "UMask"))
+ return bus_append_parse_mode(m, field, eq);
+
+ if (streq(field, "TimerSlackNSec"))
+ return bus_append_parse_nsec(m, field, eq);
+
+ if (streq(field, "LogRateLimitIntervalSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "LogRateLimitBurst"))
+ return bus_append_safe_atou(m, field, eq);
+
+ if (streq(field, "MountFlags"))
+ return bus_append_mount_propagation_flags_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "Environment",
+ "UnsetEnvironment",
+ "PassEnvironment"))
+ return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE);
+
+ if (streq(field, "EnvironmentFile")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 1,
+ eq[0] == '-' ? eq + 1 : eq,
+ eq[0] == '-');
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "SetCredential")) {
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', "SetCredential");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(say)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "a(say)", 0);
+ else {
+ _cleanup_free_ char *word = NULL, *unescaped = NULL;
+ const char *p = eq;
+ int l;
+
+ r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse SetCredential= parameter: %s", eq);
+ if (r == 0 || !p)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing argument to SetCredential=.");
+
+ l = cunescape(p, UNESCAPE_ACCEPT_NUL, &unescaped);
+ if (l < 0)
+ return log_error_errno(l, "Failed to unescape SetCredential= value: %s", p);
+
+ r = sd_bus_message_open_container(m, 'a', "(say)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "say");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "s", word);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'y', unescaped, l);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "LoadCredential")) {
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', "LoadCredential");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "a(ss)", 0);
+ else {
+ _cleanup_free_ char *word = NULL;
+ const char *p = eq;
+
+ r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse LoadCredential= parameter: %s", eq);
+ if (r == 0 || !p)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing argument to LoadCredential=.");
+
+ r = sd_bus_message_append(m, "a(ss)", 1, word, p);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "LogExtraFields")) {
+ r = sd_bus_message_open_container(m, 'r', "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 's', "LogExtraFields");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "aay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "ay");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'y', eq, strlen(eq));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "StandardInput",
+ "StandardOutput",
+ "StandardError")) {
+ const char *n, *appended;
+
+ if ((n = startswith(eq, "fd:"))) {
+ appended = strjoina(field, "FileDescriptorName");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else if ((n = startswith(eq, "file:"))) {
+ appended = strjoina(field, "File");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else if ((n = startswith(eq, "append:"))) {
+ appended = strjoina(field, "FileToAppend");
+ r = sd_bus_message_append(m, "(sv)", appended, "s", n);
+ } else
+ r = sd_bus_message_append(m, "(sv)", field, "s", eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "StandardInputText")) {
+ _cleanup_free_ char *unescaped = NULL;
+
+ r = cunescape(eq, 0, &unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape text '%s': %m", eq);
+
+ if (!strextend(&unescaped, "\n", NULL))
+ return log_oom();
+
+ /* Note that we don't expand specifiers here, but that should be OK, as this is a programmatic
+ * interface anyway */
+
+ return bus_append_byte_array(m, field, unescaped, strlen(unescaped));
+ }
+
+ if (streq(field, "StandardInputData")) {
+ _cleanup_free_ void *decoded = NULL;
+ size_t sz;
+
+ r = unbase64mem(eq, (size_t) -1, &decoded, &sz);
+ if (r < 0)
+ return log_error_errno(r, "Failed to decode base64 data '%s': %m", eq);
+
+ return bus_append_byte_array(m, field, decoded, sz);
+ }
+
+ if ((suffix = startswith(field, "Limit"))) {
+ int rl;
+
+ rl = rlimit_from_string(suffix);
+ if (rl >= 0) {
+ const char *sn;
+ struct rlimit l;
+
+ r = rlimit_parse(rl, eq, &l);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse resource limit: %s", eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", l.rlim_max);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ sn = strjoina(field, "Soft");
+ r = sd_bus_message_append(m, "(sv)", sn, "t", l.rlim_cur);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+ }
+
+ if (STR_IN_SET(field, "AppArmorProfile",
+ "SmackProcessLabel")) {
+ int ignore = 0;
+ const char *s = eq;
+
+ if (eq[0] == '-') {
+ ignore = 1;
+ s = eq + 1;
+ }
+
+ r = sd_bus_message_append(m, "(sv)", field, "(bs)", ignore, s);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "CapabilityBoundingSet",
+ "AmbientCapabilities")) {
+ uint64_t sum = 0;
+ bool invert = false;
+ const char *p = eq;
+
+ if (*p == '~') {
+ invert = true;
+ p++;
+ }
+
+ r = capability_set_from_string(p, &sum);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s: %m", field, eq);
+
+ sum = invert ? ~sum : sum;
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", sum);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "CPUAffinity")) {
+ _cleanup_(cpu_set_reset) CPUSet cpuset = {};
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ if (eq && streq(eq, "numa")) {
+ r = sd_bus_message_append(m, "(sv)", "CPUAffinityFromNUMA", "b", true);
+ if (r < 0)
+ return bus_log_create_error(r);
+ return r;
+ }
+
+ r = parse_cpu_set(eq, &cpuset);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+
+ r = cpu_set_to_dbus(&cpuset, &array, &allocated);
+ if (r < 0)
+ return log_error_errno(r, "Failed to serialize CPUAffinity: %m");
+
+ return bus_append_byte_array(m, field, array, allocated);
+ }
+
+ if (streq(field, "NUMAPolicy")) {
+ r = mpol_from_string(eq);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "i", (int32_t) r);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "NUMAMask")) {
+ _cleanup_(cpu_set_reset) CPUSet nodes = {};
+ _cleanup_free_ uint8_t *array = NULL;
+ size_t allocated;
+
+ if (eq && streq(eq, "all")) {
+ r = numa_mask_add_all(&nodes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create NUMA mask representing \"all\" NUMA nodes: %m");
+ } else {
+ r = parse_cpu_set(eq, &nodes);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value: %s", field, eq);
+ }
+
+ r = cpu_set_to_dbus(&nodes, &array, &allocated);
+ if (r < 0)
+ return log_error_errno(r, "Failed to serialize NUMAMask: %m");
+
+ return bus_append_byte_array(m, field, array, allocated);
+ }
+
+ if (STR_IN_SET(field, "RestrictAddressFamilies",
+ "SystemCallFilter",
+ "SystemCallLog")) {
+ int allow_list = 1;
+ const char *p = eq;
+
+ if (*p == '~') {
+ allow_list = 0;
+ p++;
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "(bas)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "bas");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, 'b', &allow_list);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "s");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax: %s", eq);
+
+ r = sd_bus_message_append_basic(m, 's', word);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "RestrictNamespaces")) {
+ bool invert = false;
+ unsigned long flags;
+
+ r = parse_boolean(eq);
+ if (r > 0)
+ flags = 0;
+ else if (r == 0)
+ flags = NAMESPACE_FLAGS_ALL;
+ else {
+ if (eq[0] == '~') {
+ invert = true;
+ eq++;
+ }
+
+ r = namespace_flags_from_string(eq, &flags);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s value %s.", field, eq);
+ }
+
+ if (invert)
+ flags = (~flags) & NAMESPACE_FLAGS_ALL;
+
+ r = sd_bus_message_append(m, "(sv)", field, "t", (uint64_t) flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (STR_IN_SET(field, "BindPaths",
+ "BindReadOnlyPaths")) {
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ssbt)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ssbt)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *source = NULL, *destination = NULL;
+ char *s = NULL, *d = NULL;
+ bool ignore_enoent = false;
+ uint64_t flags = MS_REC;
+
+ r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_UNQUOTE|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ break;
+
+ s = source;
+ if (s[0] == '-') {
+ ignore_enoent = true;
+ s++;
+ }
+
+ if (p && p[-1] == ':') {
+ r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_UNQUOTE|EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Missing argument after ':': %s",
+ eq);
+
+ d = destination;
+
+ if (p && p[-1] == ':') {
+ _cleanup_free_ char *options = NULL;
+
+ r = extract_first_word(&p, &options, NULL, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+
+ if (isempty(options) || streq(options, "rbind"))
+ flags = MS_REC;
+ else if (streq(options, "norbind"))
+ flags = 0;
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown options: %s",
+ eq);
+ }
+ } else
+ d = s;
+
+ r = sd_bus_message_append(m, "(ssbt)", s, d, ignore_enoent, flags);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "TemporaryFileSystem")) {
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *path = NULL;
+ const char *w;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ break;
+
+ w = word;
+ r = extract_first_word(&w, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse argument: %s",
+ p);
+
+ r = sd_bus_message_append(m, "(ss)", path, w);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "RootHash")) {
+ _cleanup_free_ void *roothash_decoded = NULL;
+ size_t roothash_decoded_size = 0;
+
+ /* We have the path to a roothash to load and decode, eg: RootHash=/foo/bar.roothash */
+ if (path_is_absolute(eq))
+ return bus_append_string(m, "RootHashPath", eq);
+
+ /* We have a roothash to decode, eg: RootHash=012345789abcdef */
+ r = unhexmem(eq, strlen(eq), &roothash_decoded, &roothash_decoded_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to decode RootHash= '%s': %m", eq);
+ if (roothash_decoded_size < sizeof(sd_id128_t))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "RootHash= '%s' is too short: %m", eq);
+
+ return bus_append_byte_array(m, field, roothash_decoded, roothash_decoded_size);
+ }
+
+ if (streq(field, "RootHashSignature")) {
+ _cleanup_free_ void *roothash_sig_decoded = NULL;
+ char *value;
+ size_t roothash_sig_decoded_size = 0;
+
+ /* We have the path to a roothash signature to load and decode, eg: RootHash=/foo/bar.roothash.p7s */
+ if (path_is_absolute(eq))
+ return bus_append_string(m, "RootHashSignaturePath", eq);
+
+ if (!(value = startswith(eq, "base64:")))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to decode RootHashSignature= '%s', not a path but doesn't start with 'base64:': %m", eq);
+
+ /* We have a roothash signature to decode, eg: RootHashSignature=base64:012345789abcdef */
+ r = unbase64mem(value, strlen(value), &roothash_sig_decoded, &roothash_sig_decoded_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to decode RootHashSignature= '%s': %m", eq);
+
+ return bus_append_byte_array(m, field, roothash_sig_decoded, roothash_sig_decoded_size);
+ }
+
+ if (streq(field, "RootImageOptions")) {
+ _cleanup_strv_free_ char **l = NULL;
+ char **first = NULL, **second = NULL;
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = strv_split_colon_pairs(&l, p);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse argument: %m");
+
+ STRV_FOREACH_PAIR(first, second, l) {
+ /* Format is either 'root:foo' or 'foo' (root is implied) */
+ if (!isempty(*second) && partition_designator_from_string(*first) < 0)
+ return bus_log_create_error(-EINVAL);
+
+ r = sd_bus_message_append(m, "(ss)",
+ !isempty(*second) ? *first : "root",
+ !isempty(*second) ? *second : *first);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "MountImages")) {
+ const char *p = eq;
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "a(ssba(ss))");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ssba(ss))");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *first = NULL, *second = NULL, *tuple = NULL;
+ const char *q = NULL, *source = NULL;
+ bool permissive = false;
+
+ r = extract_first_word(&p, &tuple, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ q = tuple;
+ r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &first, &second, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ source = first;
+ if (source[0] == '-') {
+ permissive = true;
+ source++;
+ }
+
+ if (isempty(second))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Missing argument after ':': %s",
+ eq);
+
+ r = sd_bus_message_open_container(m, 'r', "ssba(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "ssb", source, second, permissive);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'a', "(ss)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ for (;;) {
+ _cleanup_free_ char *partition = NULL, *mount_options = NULL;
+
+ r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+ /* Single set of options, applying to the root partition/single filesystem */
+ if (r == 1) {
+ r = sd_bus_message_append(m, "(ss)", "root", partition);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ break;
+ }
+
+ if (partition_designator_from_string(partition) < 0)
+ return bus_log_create_error(-EINVAL);
+
+ r = sd_bus_message_append(m, "(ss)", partition, mount_options);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_kill_property(sd_bus_message *m, const char *field, const char *eq) {
+ if (streq(field, "KillMode"))
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "SendSIGHUP",
+ "SendSIGKILL"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "KillSignal",
+ "RestartKillSignal",
+ "FinalKillSignal",
+ "WatchdogSignal"))
+ return bus_append_signal_from_string(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_mount_property(sd_bus_message *m, const char *field, const char *eq) {
+
+ if (STR_IN_SET(field, "What",
+ "Where",
+ "Options",
+ "Type"))
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "TimeoutSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field, "SloppyOptions",
+ "LazyUnmount",
+ "ForceUnmount",
+ "ReadwriteOnly"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_path_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (streq(field, "MakeDirectory"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (streq(field, "DirectoryMode"))
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field, "PathExists",
+ "PathExistsGlob",
+ "PathChanged",
+ "PathModified",
+ "DirectoryNotEmpty")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 1, field, eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_scope_property(sd_bus_message *m, const char *field, const char *eq) {
+ if (streq(field, "RuntimeMaxSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "TimeoutStopSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ return 0;
+}
+
+static int bus_append_service_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "PIDFile",
+ "Type",
+ "Restart",
+ "BusName",
+ "NotifyAccess",
+ "USBFunctionDescriptors",
+ "USBFunctionStrings",
+ "OOMPolicy",
+ "TimeoutStartFailureMode",
+ "TimeoutStopFailureMode"))
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "PermissionsStartOnly",
+ "RootDirectoryStartOnly",
+ "RemainAfterExit",
+ "GuessMainPID"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "RestartSec",
+ "TimeoutStartSec",
+ "TimeoutStopSec",
+ "TimeoutAbortSec",
+ "RuntimeMaxSec",
+ "WatchdogSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "TimeoutSec")) {
+ r = bus_append_parse_sec_rename(m, "TimeoutStartSec", eq);
+ if (r < 0)
+ return r;
+
+ return bus_append_parse_sec_rename(m, "TimeoutStopSec", eq);
+ }
+
+ if (streq(field, "FileDescriptorStoreMax"))
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field, "ExecCondition",
+ "ExecStartPre",
+ "ExecStart",
+ "ExecStartPost",
+ "ExecConditionEx",
+ "ExecStartPreEx",
+ "ExecStartEx",
+ "ExecStartPostEx",
+ "ExecReload",
+ "ExecStop",
+ "ExecStopPost",
+ "ExecReloadEx",
+ "ExecStopEx",
+ "ExecStopPostEx"))
+ return bus_append_exec_command(m, field, eq);
+
+ if (STR_IN_SET(field, "RestartPreventExitStatus",
+ "RestartForceExitStatus",
+ "SuccessExitStatus")) {
+ _cleanup_free_ int *status = NULL, *signal = NULL;
+ size_t n_status = 0, n_signal = 0;
+ const char *p;
+
+ for (p = eq;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == 0)
+ break;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0)
+ return log_error_errno(r, "Invalid syntax in %s: %s", field, eq);
+
+ /* We need to call exit_status_from_string() first, because we want
+ * to parse numbers as exit statuses, not signals. */
+
+ r = exit_status_from_string(word);
+ if (r >= 0) {
+ assert(r >= 0 && r < 256);
+
+ status = reallocarray(status, n_status + 1, sizeof(int));
+ if (!status)
+ return log_oom();
+
+ status[n_status++] = r;
+
+ } else if ((r = signal_from_string(word)) >= 0) {
+ signal = reallocarray(signal, n_signal + 1, sizeof(int));
+ if (!signal)
+ return log_oom();
+
+ signal[n_signal++] = r;
+
+ } else
+ /* original r from exit_status_to_string() */
+ return log_error_errno(r, "Invalid status or signal %s in %s: %m",
+ word, field);
+ }
+
+ r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'v', "(aiai)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_open_container(m, 'r', "aiai");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'i', status, n_status * sizeof(int));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append_array(m, 'i', signal, n_signal * sizeof(int));
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_socket_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "Accept",
+ "FlushPending",
+ "Writable",
+ "KeepAlive",
+ "NoDelay",
+ "FreeBind",
+ "Transparent",
+ "Broadcast",
+ "PassCredentials",
+ "PassSecurity",
+ "PassPacketInfo",
+ "ReusePort",
+ "RemoveOnStop",
+ "SELinuxContextFromNet"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "Priority",
+ "IPTTL",
+ "Mark"))
+ return bus_append_safe_atoi(m, field, eq);
+
+ if (streq(field, "IPTOS"))
+ return bus_append_ip_tos_from_string(m, field, eq);
+
+ if (STR_IN_SET(field, "Backlog",
+ "MaxConnections",
+ "MaxConnectionsPerSource",
+ "KeepAliveProbes",
+ "TriggerLimitBurst"))
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field, "SocketMode",
+ "DirectoryMode"))
+ return bus_append_parse_mode(m, field, eq);
+
+ if (STR_IN_SET(field, "MessageQueueMaxMessages",
+ "MessageQueueMessageSize"))
+ return bus_append_safe_atoi64(m, field, eq);
+
+ if (STR_IN_SET(field, "TimeoutSec",
+ "KeepAliveTimeSec",
+ "KeepAliveIntervalSec",
+ "DeferAcceptSec",
+ "TriggerLimitIntervalSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (STR_IN_SET(field, "ReceiveBuffer",
+ "SendBuffer",
+ "PipeSize"))
+ return bus_append_parse_size(m, field, eq, 1024);
+
+ if (STR_IN_SET(field, "ExecStartPre",
+ "ExecStartPost",
+ "ExecReload",
+ "ExecStopPost"))
+ return bus_append_exec_command(m, field, eq);
+
+ if (STR_IN_SET(field, "SmackLabel",
+ "SmackLabelIPIn",
+ "SmackLabelIPOut",
+ "TCPCongestion",
+ "BindToDevice",
+ "BindIPv6Only",
+ "FileDescriptorName",
+ "SocketUser",
+ "SocketGroup",
+ "Timestamping"))
+ return bus_append_string(m, field, eq);
+
+ if (streq(field, "Symlinks"))
+ return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE);
+
+ if (streq(field, "SocketProtocol"))
+ return bus_append_parse_ip_protocol(m, field, eq);
+
+ if (STR_IN_SET(field, "ListenStream",
+ "ListenDatagram",
+ "ListenSequentialPacket",
+ "ListenNetlink",
+ "ListenSpecial",
+ "ListenMessageQueue",
+ "ListenFIFO",
+ "ListenUSBFunction")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 1, field + STRLEN("Listen"), eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+static int bus_append_timer_property(sd_bus_message *m, const char *field, const char *eq) {
+ int r;
+
+ if (STR_IN_SET(field, "WakeSystem",
+ "RemainAfterElapse",
+ "Persistent",
+ "OnTimezoneChange",
+ "OnClockChange",
+ "FixedRandomDelay"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "AccuracySec",
+ "RandomizedDelaySec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (STR_IN_SET(field, "OnActiveSec",
+ "OnBootSec",
+ "OnStartupSec",
+ "OnUnitActiveSec",
+ "OnUnitInactiveSec")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 0);
+ else {
+ usec_t t;
+ r = parse_sec(eq, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 1, field, t);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (streq(field, "OnCalendar")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 0);
+ else
+ r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 1, field, eq);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bus_append_unit_property(sd_bus_message *m, const char *field, const char *eq) {
+ ConditionType t = _CONDITION_TYPE_INVALID;
+ bool is_condition = false;
+ int r;
+
+ if (STR_IN_SET(field, "Description",
+ "SourcePath",
+ "OnFailureJobMode",
+ "JobTimeoutAction",
+ "JobTimeoutRebootArgument",
+ "StartLimitAction",
+ "FailureAction",
+ "SuccessAction",
+ "RebootArgument",
+ "CollectMode"))
+ return bus_append_string(m, field, eq);
+
+ if (STR_IN_SET(field, "StopWhenUnneeded",
+ "RefuseManualStart",
+ "RefuseManualStop",
+ "AllowIsolate",
+ "IgnoreOnIsolate",
+ "DefaultDependencies"))
+ return bus_append_parse_boolean(m, field, eq);
+
+ if (STR_IN_SET(field, "JobTimeoutSec",
+ "JobRunningTimeoutSec",
+ "StartLimitIntervalSec"))
+ return bus_append_parse_sec_rename(m, field, eq);
+
+ if (streq(field, "StartLimitBurst"))
+ return bus_append_safe_atou(m, field, eq);
+
+ if (STR_IN_SET(field, "SuccessActionExitStatus",
+ "FailureActionExitStatus")) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", field, "i", -1);
+ else {
+ uint8_t u;
+
+ r = safe_atou8(eq, &u);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse %s=%s", field, eq);
+
+ r = sd_bus_message_append(m, "(sv)", field, "i", (int) u);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ if (unit_dependency_from_string(field) >= 0 ||
+ STR_IN_SET(field, "Documentation",
+ "RequiresMountsFor"))
+ return bus_append_strv(m, field, eq, EXTRACT_UNQUOTE);
+
+ t = condition_type_from_string(field);
+ if (t >= 0)
+ is_condition = true;
+ else
+ t = assert_type_from_string(field);
+ if (t >= 0) {
+ if (isempty(eq))
+ r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 0);
+ else {
+ const char *p = eq;
+ int trigger, negate;
+
+ trigger = *p == '|';
+ if (trigger)
+ p++;
+
+ negate = *p == '!';
+ if (negate)
+ p++;
+
+ r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 1,
+ field, trigger, negate, p);
+ }
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment) {
+ const char *eq, *field;
+ int r;
+
+ assert(m);
+ assert(assignment);
+
+ eq = strchr(assignment, '=');
+ if (!eq)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not an assignment: %s", assignment);
+
+ field = strndupa(assignment, eq - assignment);
+ eq++;
+
+ switch (t) {
+ case UNIT_SERVICE:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_service_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SOCKET:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_socket_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_TIMER:
+ r = bus_append_timer_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_PATH:
+ r = bus_append_path_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SLICE:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_SCOPE:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_scope_property(m, field, eq);
+ if (r != 0)
+ return r;
+ break;
+
+ case UNIT_MOUNT:
+ r = bus_append_cgroup_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_execute_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_kill_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ r = bus_append_mount_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ break;
+
+ case UNIT_AUTOMOUNT:
+ r = bus_append_automount_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ break;
+
+ case UNIT_TARGET:
+ case UNIT_DEVICE:
+ case UNIT_SWAP:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Not supported unit type");
+
+ default:
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid unit type");
+ }
+
+ r = bus_append_unit_property(m, field, eq);
+ if (r != 0)
+ return r;
+
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown assignment: %s", assignment);
+}
+
+int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l) {
+ char **i;
+ int r;
+
+ assert(m);
+
+ STRV_FOREACH(i, l) {
+ r = bus_append_unit_property_assignment(m, t, *i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes) {
+ const char *type, *path, *source;
+ int r;
+
+ /* changes is dereferenced when calling unit_file_dump_changes() later,
+ * so we have to make sure this is not NULL. */
+ assert(changes);
+ assert(n_changes);
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sss)");
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ while ((r = sd_bus_message_read(m, "(sss)", &type, &path, &source)) > 0) {
+ /* We expect only "success" changes to be sent over the bus.
+ Hence, reject anything negative. */
+ UnitFileChangeType ch = unit_file_change_type_from_string(type);
+
+ if (ch < 0) {
+ log_notice("Manager reported unknown change type \"%s\" for path \"%s\", ignoring.", type, path);
+ continue;
+ }
+
+ r = unit_file_changes_add(changes, n_changes, ch, path, source);
+ if (r < 0)
+ return r;
+ }
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_message_exit_container(m);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ unit_file_dump_changes(0, NULL, *changes, *n_changes, quiet);
+ return 0;
+}
+
+int unit_load_state(sd_bus *bus, const char *name, char **load_state) {
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ path = unit_dbus_path_from_name(name);
+ if (!path)
+ return log_oom();
+
+ /* This function warns on it's own, because otherwise it'd be awkward to pass
+ * the dbus error message around. */
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ "org.freedesktop.systemd1.Unit",
+ "LoadState",
+ &error,
+ load_state);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get load state of %s: %s", name, bus_error_message(&error, r));
+
+ return 0;
+}
+
+int unit_info_compare(const UnitInfo *a, const UnitInfo *b) {
+ int r;
+
+ /* First, order by machine */
+ r = strcasecmp_ptr(a->machine, b->machine);
+ if (r != 0)
+ return r;
+
+ /* Second, order by unit type */
+ r = strcasecmp_ptr(strrchr(a->id, '.'), strrchr(b->id, '.'));
+ if (r != 0)
+ return r;
+
+ /* Third, order by name */
+ return strcasecmp(a->id, b->id);
+}
diff --git a/src/shared/bus-unit-util.h b/src/shared/bus-unit-util.h
new file mode 100644
index 0000000..999caf6
--- /dev/null
+++ b/src/shared/bus-unit-util.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "install.h"
+#include "unit-def.h"
+
+typedef struct UnitInfo {
+ const char *machine;
+ const char *id;
+ const char *description;
+ const char *load_state;
+ const char *active_state;
+ const char *sub_state;
+ const char *following;
+ const char *unit_path;
+ uint32_t job_id;
+ const char *job_type;
+ const char *job_path;
+} UnitInfo;
+
+int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u);
+
+int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment);
+int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l);
+
+int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes);
+
+int unit_load_state(sd_bus *bus, const char *name, char **load_state);
+
+int unit_info_compare(const UnitInfo *a, const UnitInfo *b);
diff --git a/src/shared/bus-util.c b/src/shared/bus-util.c
new file mode 100644
index 0000000..fbda218
--- /dev/null
+++ b/src/shared/bus-util.c
@@ -0,0 +1,577 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#include "sd-bus.h"
+#include "sd-daemon.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "bus-common-errors.h"
+#include "bus-internal.h"
+#include "bus-label.h"
+#include "bus-util.h"
+#include "path-util.h"
+#include "socket-util.h"
+#include "stdio-util.h"
+
+static int name_owner_change_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) {
+ sd_event *e = userdata;
+
+ assert(m);
+ assert(e);
+
+ sd_bus_close(sd_bus_message_get_bus(m));
+ sd_event_exit(e, 0);
+
+ return 1;
+}
+
+int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name) {
+ const char *match;
+ const char *unique;
+ int r;
+
+ assert(e);
+ assert(bus);
+ assert(name);
+
+ /* We unregister the name here and then wait for the
+ * NameOwnerChanged signal for this event to arrive before we
+ * quit. We do this in order to make sure that any queued
+ * requests are still processed before we really exit. */
+
+ r = sd_bus_get_unique_name(bus, &unique);
+ if (r < 0)
+ return r;
+
+ match = strjoina(
+ "sender='org.freedesktop.DBus',"
+ "type='signal',"
+ "interface='org.freedesktop.DBus',"
+ "member='NameOwnerChanged',"
+ "path='/org/freedesktop/DBus',"
+ "arg0='", name, "',",
+ "arg1='", unique, "',",
+ "arg2=''");
+
+ r = sd_bus_add_match_async(bus, NULL, match, name_owner_change_callback, NULL, e);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_release_name_async(bus, NULL, name, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int bus_event_loop_with_idle(
+ sd_event *e,
+ sd_bus *bus,
+ const char *name,
+ usec_t timeout,
+ check_idle_t check_idle,
+ void *userdata) {
+ bool exiting = false;
+ int r, code;
+
+ assert(e);
+ assert(bus);
+ assert(name);
+
+ for (;;) {
+ bool idle;
+
+ r = sd_event_get_state(e);
+ if (r < 0)
+ return r;
+ if (r == SD_EVENT_FINISHED)
+ break;
+
+ if (check_idle)
+ idle = check_idle(userdata);
+ else
+ idle = true;
+
+ r = sd_event_run(e, exiting || !idle ? (uint64_t) -1 : timeout);
+ if (r < 0)
+ return r;
+
+ if (r == 0 && !exiting && idle) {
+ /* Inform the service manager that we are going down, so that it will queue all
+ * further start requests, instead of assuming we are already running. */
+ sd_notify(false, "STOPPING=1");
+
+ r = bus_async_unregister_and_exit(e, bus, name);
+ if (r < 0)
+ return r;
+
+ exiting = true;
+ continue;
+ }
+ }
+
+ r = sd_event_get_exit_code(e, &code);
+ if (r < 0)
+ return r;
+
+ return code;
+}
+
+int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *rep = NULL;
+ int r, has_owner = 0;
+
+ assert(c);
+ assert(name);
+
+ r = sd_bus_call_method(c,
+ "org.freedesktop.DBus",
+ "/org/freedesktop/dbus",
+ "org.freedesktop.DBus",
+ "NameHasOwner",
+ error,
+ &rep,
+ "s",
+ name);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_basic(rep, 'b', &has_owner);
+ if (r < 0)
+ return sd_bus_error_set_errno(error, r);
+
+ return has_owner;
+}
+
+bool bus_error_is_unknown_service(const sd_bus_error *error) {
+ return sd_bus_error_has_names(error,
+ SD_BUS_ERROR_SERVICE_UNKNOWN,
+ SD_BUS_ERROR_NAME_HAS_NO_OWNER,
+ BUS_ERROR_NO_SUCH_UNIT);
+}
+
+int bus_check_peercred(sd_bus *c) {
+ struct ucred ucred;
+ int fd, r;
+
+ assert(c);
+
+ fd = sd_bus_get_fd(c);
+ if (fd < 0)
+ return fd;
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ if (ucred.uid != 0 && ucred.uid != geteuid())
+ return -EPERM;
+
+ return 1;
+}
+
+int bus_connect_system_systemd(sd_bus **_bus) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(_bus);
+
+ if (geteuid() != 0)
+ return sd_bus_default_system(_bus);
+
+ /* If we are root then let's talk directly to the system
+ * instance, instead of going via the bus */
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_address(bus, "unix:path=/run/systemd/private");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return sd_bus_default_system(_bus);
+
+ r = bus_check_peercred(bus);
+ if (r < 0)
+ return r;
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_connect_user_systemd(sd_bus **_bus) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *ee = NULL;
+ const char *e;
+ int r;
+
+ assert(_bus);
+
+ e = secure_getenv("XDG_RUNTIME_DIR");
+ if (!e)
+ return sd_bus_default_user(_bus);
+
+ ee = bus_address_escape(e);
+ if (!ee)
+ return -ENOMEM;
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ bus->address = strjoin("unix:path=", ee, "/systemd/private");
+ if (!bus->address)
+ return -ENOMEM;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return sd_bus_default_user(_bus);
+
+ r = bus_check_peercred(bus);
+ if (r < 0)
+ return r;
+
+ *_bus = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **ret) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(transport >= 0);
+ assert(transport < _BUS_TRANSPORT_MAX);
+ assert(ret);
+
+ assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL);
+ assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP);
+
+ switch (transport) {
+
+ case BUS_TRANSPORT_LOCAL:
+ if (user)
+ r = sd_bus_default_user(&bus);
+ else {
+ if (sd_booted() <= 0)
+ /* Print a friendly message when the local system is actually not running systemd as PID 1. */
+ return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
+ "System has not been booted with systemd as init system (PID 1). Can't operate.");
+ r = sd_bus_default_system(&bus);
+ }
+ break;
+
+ case BUS_TRANSPORT_REMOTE:
+ r = sd_bus_open_system_remote(&bus, host);
+ break;
+
+ case BUS_TRANSPORT_MACHINE:
+ r = sd_bus_open_system_machine(&bus, host);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unknown transport type.");
+ }
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_exit_on_disconnect(bus, true);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus) {
+ int r;
+
+ assert(transport >= 0);
+ assert(transport < _BUS_TRANSPORT_MAX);
+ assert(bus);
+
+ assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL);
+ assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP);
+
+ switch (transport) {
+
+ case BUS_TRANSPORT_LOCAL:
+ if (user)
+ r = bus_connect_user_systemd(bus);
+ else {
+ if (sd_booted() <= 0)
+ /* Print a friendly message when the local system is actually not running systemd as PID 1. */
+ return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
+ "System has not been booted with systemd as init system (PID 1). Can't operate.");
+ r = bus_connect_system_systemd(bus);
+ }
+ break;
+
+ case BUS_TRANSPORT_REMOTE:
+ r = sd_bus_open_system_remote(bus, host);
+ break;
+
+ case BUS_TRANSPORT_MACHINE:
+ r = sd_bus_open_system_machine(bus, host);
+ break;
+
+ default:
+ assert_not_reached("Hmm, unknown transport type.");
+ }
+
+ return r;
+}
+
+/**
+ * bus_path_encode_unique() - encode unique object path
+ * @b: bus connection or NULL
+ * @prefix: object path prefix
+ * @sender_id: unique-name of client, or NULL
+ * @external_id: external ID to be chosen by client, or NULL
+ * @ret_path: storage for encoded object path pointer
+ *
+ * Whenever we provide a bus API that allows clients to create and manage
+ * server-side objects, we need to provide a unique name for these objects. If
+ * we let the server choose the name, we suffer from a race condition: If a
+ * client creates an object asynchronously, it cannot destroy that object until
+ * it received the method reply. It cannot know the name of the new object,
+ * thus, it cannot destroy it. Furthermore, it enforces a round-trip.
+ *
+ * Therefore, many APIs allow the client to choose the unique name for newly
+ * created objects. There're two problems to solve, though:
+ * 1) Object names are usually defined via dbus object paths, which are
+ * usually globally namespaced. Therefore, multiple clients must be able
+ * to choose unique object names without interference.
+ * 2) If multiple libraries share the same bus connection, they must be
+ * able to choose unique object names without interference.
+ * The first problem is solved easily by prefixing a name with the
+ * unique-bus-name of a connection. The server side must enforce this and
+ * reject any other name. The second problem is solved by providing unique
+ * suffixes from within sd-bus.
+ *
+ * This helper allows clients to create unique object-paths. It uses the
+ * template '/prefix/sender_id/external_id' and returns the new path in
+ * @ret_path (must be freed by the caller).
+ * If @sender_id is NULL, the unique-name of @b is used. If @external_id is
+ * NULL, this function allocates a unique suffix via @b (by requesting a new
+ * cookie). If both @sender_id and @external_id are given, @b can be passed as
+ * NULL.
+ *
+ * Returns: 0 on success, negative error code on failure.
+ */
+int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path) {
+ _cleanup_free_ char *sender_label = NULL, *external_label = NULL;
+ char external_buf[DECIMAL_STR_MAX(uint64_t)], *p;
+ int r;
+
+ assert_return(b || (sender_id && external_id), -EINVAL);
+ assert_return(sd_bus_object_path_is_valid(prefix), -EINVAL);
+ assert_return(ret_path, -EINVAL);
+
+ if (!sender_id) {
+ r = sd_bus_get_unique_name(b, &sender_id);
+ if (r < 0)
+ return r;
+ }
+
+ if (!external_id) {
+ xsprintf(external_buf, "%"PRIu64, ++b->cookie);
+ external_id = external_buf;
+ }
+
+ sender_label = bus_label_escape(sender_id);
+ if (!sender_label)
+ return -ENOMEM;
+
+ external_label = bus_label_escape(external_id);
+ if (!external_label)
+ return -ENOMEM;
+
+ p = path_join(prefix, sender_label, external_label);
+ if (!p)
+ return -ENOMEM;
+
+ *ret_path = p;
+ return 0;
+}
+
+/**
+ * bus_path_decode_unique() - decode unique object path
+ * @path: object path to decode
+ * @prefix: object path prefix
+ * @ret_sender: output parameter for sender-id label
+ * @ret_external: output parameter for external-id label
+ *
+ * This does the reverse of bus_path_encode_unique() (see its description for
+ * details). Both trailing labels, sender-id and external-id, are unescaped and
+ * returned in the given output parameters (the caller must free them).
+ *
+ * Note that this function returns 0 if the path does not match the template
+ * (see bus_path_encode_unique()), 1 if it matched.
+ *
+ * Returns: Negative error code on failure, 0 if the given object path does not
+ * match the template (return parameters are set to NULL), 1 if it was
+ * parsed successfully (return parameters contain allocated labels).
+ */
+int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external) {
+ const char *p, *q;
+ char *sender, *external;
+
+ assert(sd_bus_object_path_is_valid(path));
+ assert(sd_bus_object_path_is_valid(prefix));
+ assert(ret_sender);
+ assert(ret_external);
+
+ p = object_path_startswith(path, prefix);
+ if (!p) {
+ *ret_sender = NULL;
+ *ret_external = NULL;
+ return 0;
+ }
+
+ q = strchr(p, '/');
+ if (!q) {
+ *ret_sender = NULL;
+ *ret_external = NULL;
+ return 0;
+ }
+
+ sender = bus_label_unescape_n(p, q - p);
+ external = bus_label_unescape(q + 1);
+ if (!sender || !external) {
+ free(sender);
+ free(external);
+ return -ENOMEM;
+ }
+
+ *ret_sender = sender;
+ *ret_external = external;
+ return 1;
+}
+
+int bus_track_add_name_many(sd_bus_track *t, char **l) {
+ int r = 0;
+ char **i;
+
+ assert(t);
+
+ /* Continues adding after failure, and returns the first failure. */
+
+ STRV_FOREACH(i, l) {
+ int k;
+
+ k = sd_bus_track_add_name(t, *i);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
+
+int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description) {
+ _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL;
+ const char *e;
+ int r;
+
+ assert(ret);
+
+ /* Match like sd_bus_open_system(), but with the "watch_bind" feature and the Connected() signal
+ * turned on. */
+
+ r = sd_bus_new(&bus);
+ if (r < 0)
+ return r;
+
+ if (description) {
+ r = sd_bus_set_description(bus, description);
+ if (r < 0)
+ return r;
+ }
+
+ e = secure_getenv("DBUS_SYSTEM_BUS_ADDRESS");
+ if (!e)
+ e = DEFAULT_SYSTEM_BUS_ADDRESS;
+
+ r = sd_bus_set_address(bus, e);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_bus_client(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_negotiate_creds(bus, true, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_watch_bind(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_set_connected_signal(bus, true);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_start(bus);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(bus);
+
+ return 0;
+}
+
+int bus_reply_pair_array(sd_bus_message *m, char **l) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+ char **k, **v;
+ int r;
+
+ assert(m);
+
+ /* Reply to the specified message with a message containing a dictionary put together from the
+ * specified strv */
+
+ r = sd_bus_message_new_method_return(m, &reply);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_open_container(reply, 'a', "{ss}");
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(k, v, l) {
+ r = sd_bus_message_append(reply, "{ss}", *k, *v);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_bus_message_close_container(reply);
+ if (r < 0)
+ return r;
+
+ return sd_bus_send(NULL, reply, NULL);
+}
+
+static void bus_message_unref_wrapper(void *m) {
+ sd_bus_message_unref(m);
+}
+
+const struct hash_ops bus_message_hash_ops = {
+ .hash = trivial_hash_func,
+ .compare = trivial_compare_func,
+ .free_value = bus_message_unref_wrapper,
+};
diff --git a/src/shared/bus-util.h b/src/shared/bus-util.h
new file mode 100644
index 0000000..27dd6c1
--- /dev/null
+++ b/src/shared/bus-util.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "sd-bus.h"
+#include "sd-event.h"
+
+#include "errno-util.h"
+#include "macro.h"
+#include "string-util.h"
+#include "time-util.h"
+
+typedef enum BusTransport {
+ BUS_TRANSPORT_LOCAL,
+ BUS_TRANSPORT_REMOTE,
+ BUS_TRANSPORT_MACHINE,
+ _BUS_TRANSPORT_MAX,
+ _BUS_TRANSPORT_INVALID = -1
+} BusTransport;
+
+int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name);
+
+typedef bool (*check_idle_t)(void *userdata);
+
+int bus_event_loop_with_idle(sd_event *e, sd_bus *bus, const char *name, usec_t timeout, check_idle_t check_idle, void *userdata);
+
+int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error);
+bool bus_error_is_unknown_service(const sd_bus_error *error);
+
+int bus_check_peercred(sd_bus *c);
+
+int bus_connect_system_systemd(sd_bus **_bus);
+int bus_connect_user_systemd(sd_bus **_bus);
+
+int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **bus);
+int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus);
+
+#define bus_log_address_error(r) \
+ ({ \
+ int _k = (r); \
+ log_error_errno(_k, \
+ _k == -ENOMEDIUM ? "Failed to set bus address: $DBUS_SESSION_BUS_ADDRESS and $XDG_RUNTIME_DIR not defined (consider using --machine=<user>@.host --user to connect to bus of other user)" : \
+ "Failed to set bus address: %m"); \
+ })
+
+#define bus_log_connect_error(r) \
+ ({ \
+ int _k = (r); \
+ log_error_errno(_k, \
+ _k == -ENOMEDIUM ? "Failed to connect to bus: $DBUS_SESSION_BUS_ADDRESS and $XDG_RUNTIME_DIR not defined (consider using --machine=<user>@.host --user to connect to bus of other user)" : \
+ ERRNO_IS_PRIVILEGE(_k) ? "Failed to connect to bus: Operation not permitted (consider using --machine=<user>@.host --user to connect to bus of other user)" : \
+ "Failed to connect to bus: %m"); \
+ })
+
+#define bus_log_parse_error(r) \
+ log_error_errno(r, "Failed to parse bus message: %m")
+
+#define bus_log_create_error(r) \
+ log_error_errno(r, "Failed to create bus message: %m")
+
+int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path);
+int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external);
+
+int bus_track_add_name_many(sd_bus_track *t, char **l);
+
+int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description);
+static inline int bus_open_system_watch_bind(sd_bus **ret) {
+ return bus_open_system_watch_bind_with_description(ret, NULL);
+}
+
+int bus_reply_pair_array(sd_bus_message *m, char **l);
+
+extern const struct hash_ops bus_message_hash_ops;
diff --git a/src/shared/bus-wait-for-jobs.c b/src/shared/bus-wait-for-jobs.c
new file mode 100644
index 0000000..b2a9e03
--- /dev/null
+++ b/src/shared/bus-wait-for-jobs.c
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "bus-wait-for-jobs.h"
+#include "set.h"
+#include "bus-util.h"
+#include "bus-internal.h"
+#include "unit-def.h"
+#include "escape.h"
+#include "strv.h"
+
+typedef struct BusWaitForJobs {
+ sd_bus *bus;
+
+ /* The set of jobs to wait for, as bus object paths */
+ Set *jobs;
+
+ /* The unit name and job result of the last Job message */
+ char *name;
+ char *result;
+
+ sd_bus_slot *slot_job_removed;
+ sd_bus_slot *slot_disconnected;
+} BusWaitForJobs;
+
+static int match_disconnected(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ assert(m);
+
+ log_error("Warning! D-Bus connection terminated.");
+ sd_bus_close(sd_bus_message_get_bus(m));
+
+ return 0;
+}
+
+static int match_job_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ const char *path, *unit, *result;
+ BusWaitForJobs *d = userdata;
+ uint32_t id;
+ char *found;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ r = sd_bus_message_read(m, "uoss", &id, &path, &unit, &result);
+ if (r < 0) {
+ bus_log_parse_error(r);
+ return 0;
+ }
+
+ found = set_remove(d->jobs, (char*) path);
+ if (!found)
+ return 0;
+
+ free(found);
+
+ (void) free_and_strdup(&d->result, empty_to_null(result));
+
+ (void) free_and_strdup(&d->name, empty_to_null(unit));
+
+ return 0;
+}
+
+void bus_wait_for_jobs_free(BusWaitForJobs *d) {
+ if (!d)
+ return;
+
+ set_free(d->jobs);
+
+ sd_bus_slot_unref(d->slot_disconnected);
+ sd_bus_slot_unref(d->slot_job_removed);
+
+ sd_bus_unref(d->bus);
+
+ free(d->name);
+ free(d->result);
+
+ free(d);
+}
+
+int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret) {
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *d = NULL;
+ int r;
+
+ assert(bus);
+ assert(ret);
+
+ d = new(BusWaitForJobs, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (BusWaitForJobs) {
+ .bus = sd_bus_ref(bus),
+ };
+
+ /* When we are a bus client we match by sender. Direct
+ * connections OTOH have no initialized sender field, and
+ * hence we ignore the sender then */
+ r = sd_bus_match_signal_async(
+ bus,
+ &d->slot_job_removed,
+ bus->bus_client ? "org.freedesktop.systemd1" : NULL,
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobRemoved",
+ match_job_removed, NULL, d);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_match_signal_async(
+ bus,
+ &d->slot_disconnected,
+ "org.freedesktop.DBus.Local",
+ NULL,
+ "org.freedesktop.DBus.Local",
+ "Disconnected",
+ match_disconnected, NULL, d);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(d);
+
+ return 0;
+}
+
+static int bus_process_wait(sd_bus *bus) {
+ int r;
+
+ for (;;) {
+ r = sd_bus_process(bus, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ r = sd_bus_wait(bus, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+}
+
+static int bus_job_get_service_result(BusWaitForJobs *d, char **result) {
+ _cleanup_free_ char *dbus_path = NULL;
+
+ assert(d);
+ assert(d->name);
+ assert(result);
+
+ if (!endswith(d->name, ".service"))
+ return -EINVAL;
+
+ dbus_path = unit_dbus_path_from_name(d->name);
+ if (!dbus_path)
+ return -ENOMEM;
+
+ return sd_bus_get_property_string(d->bus,
+ "org.freedesktop.systemd1",
+ dbus_path,
+ "org.freedesktop.systemd1.Service",
+ "Result",
+ NULL,
+ result);
+}
+
+static void log_job_error_with_service_result(const char* service, const char *result, const char* const* extra_args) {
+ _cleanup_free_ char *service_shell_quoted = NULL;
+ const char *systemctl = "systemctl", *journalctl = "journalctl";
+
+ static const struct {
+ const char *result, *explanation;
+ } explanations[] = {
+ { "resources", "of unavailable resources or another system error" },
+ { "protocol", "the service did not take the steps required by its unit configuration" },
+ { "timeout", "a timeout was exceeded" },
+ { "exit-code", "the control process exited with error code" },
+ { "signal", "a fatal signal was delivered to the control process" },
+ { "core-dump", "a fatal signal was delivered causing the control process to dump core" },
+ { "watchdog", "the service failed to send watchdog ping" },
+ { "start-limit", "start of the service was attempted too often" }
+ };
+
+ assert(service);
+
+ service_shell_quoted = shell_maybe_quote(service, ESCAPE_BACKSLASH);
+
+ if (!strv_isempty((char**) extra_args)) {
+ _cleanup_free_ char *t;
+
+ t = strv_join((char**) extra_args, " ");
+ systemctl = strjoina("systemctl ", t ? : "<args>");
+ journalctl = strjoina("journalctl ", t ? : "<args>");
+ }
+
+ if (!isempty(result)) {
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(explanations); ++i)
+ if (streq(result, explanations[i].result))
+ break;
+
+ if (i < ELEMENTSOF(explanations)) {
+ log_error("Job for %s failed because %s.\n"
+ "See \"%s status %s\" and \"%s -xe\" for details.\n",
+ service,
+ explanations[i].explanation,
+ systemctl,
+ service_shell_quoted ?: "<service>",
+ journalctl);
+ goto finish;
+ }
+ }
+
+ log_error("Job for %s failed.\n"
+ "See \"%s status %s\" and \"%s -xe\" for details.\n",
+ service,
+ systemctl,
+ service_shell_quoted ?: "<service>",
+ journalctl);
+
+finish:
+ /* For some results maybe additional explanation is required */
+ if (streq_ptr(result, "start-limit"))
+ log_info("To force a start use \"%1$s reset-failed %2$s\"\n"
+ "followed by \"%1$s start %2$s\" again.",
+ systemctl,
+ service_shell_quoted ?: "<service>");
+}
+
+static int check_wait_response(BusWaitForJobs *d, bool quiet, const char* const* extra_args) {
+ assert(d);
+ assert(d->name);
+ assert(d->result);
+
+ if (!quiet) {
+ if (streq(d->result, "canceled"))
+ log_error("Job for %s canceled.", strna(d->name));
+ else if (streq(d->result, "timeout"))
+ log_error("Job for %s timed out.", strna(d->name));
+ else if (streq(d->result, "dependency"))
+ log_error("A dependency job for %s failed. See 'journalctl -xe' for details.", strna(d->name));
+ else if (streq(d->result, "invalid"))
+ log_error("%s is not active, cannot reload.", strna(d->name));
+ else if (streq(d->result, "assert"))
+ log_error("Assertion failed on job for %s.", strna(d->name));
+ else if (streq(d->result, "unsupported"))
+ log_error("Operation on or unit type of %s not supported on this system.", strna(d->name));
+ else if (streq(d->result, "collected"))
+ log_error("Queued job for %s was garbage collected.", strna(d->name));
+ else if (streq(d->result, "once"))
+ log_error("Unit %s was started already once and can't be started again.", strna(d->name));
+ else if (!STR_IN_SET(d->result, "done", "skipped")) {
+
+ if (d->name && endswith(d->name, ".service")) {
+ _cleanup_free_ char *result = NULL;
+ int q;
+
+ q = bus_job_get_service_result(d, &result);
+ if (q < 0)
+ log_debug_errno(q, "Failed to get Result property of unit %s: %m", d->name);
+
+ log_job_error_with_service_result(d->name, result, extra_args);
+ } else
+ log_error("Job failed. See \"journalctl -xe\" for details.");
+ }
+ }
+
+ if (STR_IN_SET(d->result, "canceled", "collected"))
+ return -ECANCELED;
+ else if (streq(d->result, "timeout"))
+ return -ETIME;
+ else if (streq(d->result, "dependency"))
+ return -EIO;
+ else if (streq(d->result, "invalid"))
+ return -ENOEXEC;
+ else if (streq(d->result, "assert"))
+ return -EPROTO;
+ else if (streq(d->result, "unsupported"))
+ return -EOPNOTSUPP;
+ else if (streq(d->result, "once"))
+ return -ESTALE;
+ else if (STR_IN_SET(d->result, "done", "skipped"))
+ return 0;
+
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "Unexpected job result, assuming server side newer than us: %s", d->result);
+}
+
+int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args) {
+ int r = 0;
+
+ assert(d);
+
+ while (!set_isempty(d->jobs)) {
+ int q;
+
+ q = bus_process_wait(d->bus);
+ if (q < 0)
+ return log_error_errno(q, "Failed to wait for response: %m");
+
+ if (d->name && d->result) {
+ q = check_wait_response(d, quiet, extra_args);
+ /* Return the first error as it is most likely to be
+ * meaningful. */
+ if (q < 0 && r == 0)
+ r = q;
+
+ log_debug_errno(q, "Got result %s/%m for job %s", d->result, d->name);
+ }
+
+ d->name = mfree(d->name);
+ d->result = mfree(d->result);
+ }
+
+ return r;
+}
+
+int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path) {
+ assert(d);
+
+ return set_put_strdup(&d->jobs, path);
+}
+
+int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet) {
+ int r;
+
+ r = bus_wait_for_jobs_add(d, path);
+ if (r < 0)
+ return log_oom();
+
+ return bus_wait_for_jobs(d, quiet, NULL);
+}
diff --git a/src/shared/bus-wait-for-jobs.h b/src/shared/bus-wait-for-jobs.h
new file mode 100644
index 0000000..0155887
--- /dev/null
+++ b/src/shared/bus-wait-for-jobs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-bus.h"
+
+#include "macro.h"
+
+typedef struct BusWaitForJobs BusWaitForJobs;
+
+int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret);
+void bus_wait_for_jobs_free(BusWaitForJobs *d);
+int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path);
+int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args);
+int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BusWaitForJobs*, bus_wait_for_jobs_free);
diff --git a/src/shared/bus-wait-for-units.c b/src/shared/bus-wait-for-units.c
new file mode 100644
index 0000000..4f1c505
--- /dev/null
+++ b/src/shared/bus-wait-for-units.c
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bus-map-properties.h"
+#include "bus-wait-for-units.h"
+#include "hashmap.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-def.h"
+
+typedef struct WaitForItem {
+ BusWaitForUnits *parent;
+
+ BusWaitForUnitsFlags flags;
+
+ char *bus_path;
+
+ sd_bus_slot *slot_get_all;
+ sd_bus_slot *slot_properties_changed;
+
+ bus_wait_for_units_unit_callback unit_callback;
+ void *userdata;
+
+ char *active_state;
+ uint32_t job_id;
+ char *clean_result;
+} WaitForItem;
+
+typedef struct BusWaitForUnits {
+ sd_bus *bus;
+ sd_bus_slot *slot_disconnected;
+
+ Hashmap *items;
+
+ bus_wait_for_units_ready_callback ready_callback;
+ void *userdata;
+
+ WaitForItem *current;
+
+ BusWaitForUnitsState state;
+ bool has_failed:1;
+} BusWaitForUnits;
+
+static WaitForItem *wait_for_item_free(WaitForItem *item) {
+ int r;
+
+ if (!item)
+ return NULL;
+
+ if (item->parent) {
+ if (FLAGS_SET(item->flags, BUS_WAIT_REFFED) && item->bus_path && item->parent->bus) {
+ r = sd_bus_call_method_async(
+ item->parent->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ item->bus_path,
+ "org.freedesktop.systemd1.Unit",
+ "Unref",
+ NULL,
+ NULL,
+ NULL);
+ if (r < 0)
+ log_debug_errno(r, "Failed to drop reference to unit %s, ignoring: %m", item->bus_path);
+ }
+
+ assert_se(hashmap_remove(item->parent->items, item->bus_path) == item);
+
+ if (item->parent->current == item)
+ item->parent->current = NULL;
+ }
+
+ sd_bus_slot_unref(item->slot_properties_changed);
+ sd_bus_slot_unref(item->slot_get_all);
+
+ free(item->bus_path);
+ free(item->active_state);
+ free(item->clean_result);
+
+ return mfree(item);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(WaitForItem*, wait_for_item_free);
+
+static void call_unit_callback_and_wait(BusWaitForUnits *d, WaitForItem *item, bool good) {
+ d->current = item;
+
+ if (item->unit_callback)
+ item->unit_callback(d, item->bus_path, good, item->userdata);
+
+ wait_for_item_free(item);
+}
+
+static void bus_wait_for_units_clear(BusWaitForUnits *d) {
+ WaitForItem *item;
+
+ assert(d);
+
+ d->slot_disconnected = sd_bus_slot_unref(d->slot_disconnected);
+ d->bus = sd_bus_unref(d->bus);
+
+ while ((item = hashmap_first(d->items)))
+ call_unit_callback_and_wait(d, item, false);
+
+ d->items = hashmap_free(d->items);
+}
+
+static int match_disconnected(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ BusWaitForUnits *d = userdata;
+
+ assert(m);
+ assert(d);
+
+ log_error("Warning! D-Bus connection terminated.");
+
+ bus_wait_for_units_clear(d);
+
+ if (d->ready_callback)
+ d->ready_callback(d, false, d->userdata);
+ else /* If no ready callback is specified close the connection so that the event loop exits */
+ sd_bus_close(sd_bus_message_get_bus(m));
+
+ return 0;
+}
+
+int bus_wait_for_units_new(sd_bus *bus, BusWaitForUnits **ret) {
+ _cleanup_(bus_wait_for_units_freep) BusWaitForUnits *d = NULL;
+ int r;
+
+ assert(bus);
+ assert(ret);
+
+ d = new(BusWaitForUnits, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (BusWaitForUnits) {
+ .state = BUS_WAIT_SUCCESS,
+ .bus = sd_bus_ref(bus),
+ };
+
+ r = sd_bus_match_signal_async(
+ bus,
+ &d->slot_disconnected,
+ "org.freedesktop.DBus.Local",
+ NULL,
+ "org.freedesktop.DBus.Local",
+ "Disconnected",
+ match_disconnected, NULL, d);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(d);
+ return 0;
+}
+
+BusWaitForUnits* bus_wait_for_units_free(BusWaitForUnits *d) {
+ if (!d)
+ return NULL;
+
+ bus_wait_for_units_clear(d);
+ sd_bus_slot_unref(d->slot_disconnected);
+ sd_bus_unref(d->bus);
+
+ return mfree(d);
+}
+
+static bool bus_wait_for_units_is_ready(BusWaitForUnits *d) {
+ assert(d);
+
+ if (!d->bus) /* Disconnected? */
+ return true;
+
+ return hashmap_isempty(d->items);
+}
+
+void bus_wait_for_units_set_ready_callback(BusWaitForUnits *d, bus_wait_for_units_ready_callback callback, void *userdata) {
+ assert(d);
+
+ d->ready_callback = callback;
+ d->userdata = userdata;
+}
+
+static void bus_wait_for_units_check_ready(BusWaitForUnits *d) {
+ assert(d);
+
+ if (!bus_wait_for_units_is_ready(d))
+ return;
+
+ d->state = d->has_failed ? BUS_WAIT_FAILURE : BUS_WAIT_SUCCESS;
+
+ if (d->ready_callback)
+ d->ready_callback(d, d->state, d->userdata);
+}
+
+static void wait_for_item_check_ready(WaitForItem *item) {
+ BusWaitForUnits *d;
+
+ assert(item);
+ assert_se(d = item->parent);
+
+ if (FLAGS_SET(item->flags, BUS_WAIT_FOR_MAINTENANCE_END)) {
+
+ if (item->clean_result && !streq(item->clean_result, "success"))
+ d->has_failed = true;
+
+ if (!item->active_state || streq(item->active_state, "maintenance"))
+ return;
+ }
+
+ if (FLAGS_SET(item->flags, BUS_WAIT_NO_JOB) && item->job_id != 0)
+ return;
+
+ if (FLAGS_SET(item->flags, BUS_WAIT_FOR_INACTIVE)) {
+
+ if (streq_ptr(item->active_state, "failed"))
+ d->has_failed = true;
+ else if (!streq_ptr(item->active_state, "inactive"))
+ return;
+ }
+
+ call_unit_callback_and_wait(d, item, true);
+ bus_wait_for_units_check_ready(d);
+}
+
+static int property_map_job(
+ sd_bus *bus,
+ const char *member,
+ sd_bus_message *m,
+ sd_bus_error *error,
+ void *userdata) {
+
+ WaitForItem *item = userdata;
+ const char *path;
+ uint32_t id;
+ int r;
+
+ assert(item);
+
+ r = sd_bus_message_read(m, "(uo)", &id, &path);
+ if (r < 0)
+ return r;
+
+ item->job_id = id;
+ return 0;
+}
+
+static int wait_for_item_parse_properties(WaitForItem *item, sd_bus_message *m) {
+
+ static const struct bus_properties_map map[] = {
+ { "ActiveState", "s", NULL, offsetof(WaitForItem, active_state) },
+ { "Job", "(uo)", property_map_job, 0 },
+ { "CleanResult", "s", NULL, offsetof(WaitForItem, clean_result) },
+ {}
+ };
+
+ int r;
+
+ assert(item);
+ assert(m);
+
+ r = bus_message_map_all_properties(m, map, BUS_MAP_STRDUP, NULL, item);
+ if (r < 0)
+ return r;
+
+ wait_for_item_check_ready(item);
+ return 0;
+}
+
+static int on_properties_changed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ WaitForItem *item = userdata;
+ const char *interface;
+ int r;
+
+ assert(item);
+
+ r = sd_bus_message_read(m, "s", &interface);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse PropertiesChanged signal: %m");
+ return 0;
+ }
+
+ if (!streq(interface, "org.freedesktop.systemd1.Unit"))
+ return 0;
+
+ r = wait_for_item_parse_properties(item, m);
+ if (r < 0)
+ log_debug_errno(r, "Failed to process PropertiesChanged signal: %m");
+
+ return 0;
+}
+
+static int on_get_all_properties(sd_bus_message *m, void *userdata, sd_bus_error *error) {
+ WaitForItem *item = userdata;
+ int r;
+
+ assert(item);
+
+ if (sd_bus_error_is_set(error)) {
+ BusWaitForUnits *d = item->parent;
+
+ d->has_failed = true;
+
+ log_debug_errno(sd_bus_error_get_errno(error), "GetAll() failed for %s: %s",
+ item->bus_path, error->message);
+
+ call_unit_callback_and_wait(d, item, false);
+ bus_wait_for_units_check_ready(d);
+ return 0;
+ }
+
+ r = wait_for_item_parse_properties(item, m);
+ if (r < 0)
+ log_debug_errno(r, "Failed to process GetAll method reply: %m");
+
+ return 0;
+}
+
+int bus_wait_for_units_add_unit(
+ BusWaitForUnits *d,
+ const char *unit,
+ BusWaitForUnitsFlags flags,
+ bus_wait_for_units_unit_callback callback,
+ void *userdata) {
+
+ _cleanup_(wait_for_item_freep) WaitForItem *item = NULL;
+ int r;
+
+ assert(d);
+ assert(unit);
+
+ assert(flags != 0);
+
+ r = hashmap_ensure_allocated(&d->items, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ item = new(WaitForItem, 1);
+ if (!item)
+ return -ENOMEM;
+
+ *item = (WaitForItem) {
+ .flags = flags,
+ .bus_path = unit_dbus_path_from_name(unit),
+ .unit_callback = callback,
+ .userdata = userdata,
+ .job_id = UINT32_MAX,
+ };
+
+ if (!item->bus_path)
+ return -ENOMEM;
+
+ if (!FLAGS_SET(item->flags, BUS_WAIT_REFFED)) {
+ r = sd_bus_call_method_async(
+ d->bus,
+ NULL,
+ "org.freedesktop.systemd1",
+ item->bus_path,
+ "org.freedesktop.systemd1.Unit",
+ "Ref",
+ NULL,
+ NULL,
+ NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add reference to unit %s: %m", unit);
+
+ item->flags |= BUS_WAIT_REFFED;
+ }
+
+ r = sd_bus_match_signal_async(
+ d->bus,
+ &item->slot_properties_changed,
+ "org.freedesktop.systemd1",
+ item->bus_path,
+ "org.freedesktop.DBus.Properties",
+ "PropertiesChanged",
+ on_properties_changed,
+ NULL,
+ item);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to request match for PropertiesChanged signal: %m");
+
+ r = sd_bus_call_method_async(
+ d->bus,
+ &item->slot_get_all,
+ "org.freedesktop.systemd1",
+ item->bus_path,
+ "org.freedesktop.DBus.Properties",
+ "GetAll",
+ on_get_all_properties,
+ item,
+ "s", FLAGS_SET(item->flags, BUS_WAIT_FOR_MAINTENANCE_END) ? NULL : "org.freedesktop.systemd1.Unit");
+ if (r < 0)
+ return log_debug_errno(r, "Failed to request properties of unit %s: %m", unit);
+
+ r = hashmap_put(d->items, item->bus_path, item);
+ if (r < 0)
+ return r;
+
+ d->state = BUS_WAIT_RUNNING;
+ item->parent = d;
+ TAKE_PTR(item);
+ return 0;
+}
+
+int bus_wait_for_units_run(BusWaitForUnits *d) {
+ int r;
+
+ assert(d);
+
+ while (d->state == BUS_WAIT_RUNNING) {
+
+ r = sd_bus_process(d->bus, NULL);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = sd_bus_wait(d->bus, (uint64_t) -1);
+ if (r < 0)
+ return r;
+ }
+
+ return d->state;
+}
+
+BusWaitForUnitsState bus_wait_for_units_state(BusWaitForUnits *d) {
+ assert(d);
+
+ return d->state;
+}
diff --git a/src/shared/bus-wait-for-units.h b/src/shared/bus-wait-for-units.h
new file mode 100644
index 0000000..f7ab666
--- /dev/null
+++ b/src/shared/bus-wait-for-units.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+#include "sd-bus.h"
+
+typedef struct BusWaitForUnits BusWaitForUnits;
+
+typedef enum BusWaitForUnitsState {
+ BUS_WAIT_SUCCESS, /* Nothing to wait for anymore and nothing failed */
+ BUS_WAIT_FAILURE, /* dito, but something failed */
+ BUS_WAIT_RUNNING, /* Still something to wait for */
+ _BUS_WAIT_FOR_UNITS_STATE_MAX,
+ _BUS_WAIT_FOR_UNITS_STATE_INVALID = -1,
+} BusWaitForUnitsState;
+
+typedef enum BusWaitForUnitsFlags {
+ BUS_WAIT_FOR_MAINTENANCE_END = 1 << 0, /* Wait until the unit is no longer in maintenance state */
+ BUS_WAIT_FOR_INACTIVE = 1 << 1, /* Wait until the unit is back in inactive or dead state */
+ BUS_WAIT_NO_JOB = 1 << 2, /* Wait until there's no more job pending */
+ BUS_WAIT_REFFED = 1 << 3, /* The unit is already reffed with RefUnit() */
+} BusWaitForUnitsFlags;
+
+typedef void (*bus_wait_for_units_ready_callback)(BusWaitForUnits *d, BusWaitForUnitsState state, void *userdata);
+typedef void (*bus_wait_for_units_unit_callback)(BusWaitForUnits *d, const char *unit_path, bool good, void *userdata);
+
+int bus_wait_for_units_new(sd_bus *bus, BusWaitForUnits **ret);
+BusWaitForUnits* bus_wait_for_units_free(BusWaitForUnits *d);
+
+BusWaitForUnitsState bus_wait_for_units_state(BusWaitForUnits *d);
+void bus_wait_for_units_set_ready_callback(BusWaitForUnits *d, bus_wait_for_units_ready_callback callback, void *userdata);
+int bus_wait_for_units_add_unit(BusWaitForUnits *d, const char *unit, BusWaitForUnitsFlags flags, bus_wait_for_units_unit_callback callback, void *userdata);
+int bus_wait_for_units_run(BusWaitForUnits *d);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BusWaitForUnits*, bus_wait_for_units_free);
diff --git a/src/shared/calendarspec.c b/src/shared/calendarspec.c
new file mode 100644
index 0000000..7162592
--- /dev/null
+++ b/src/shared/calendarspec.c
@@ -0,0 +1,1405 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "alloc-util.h"
+#include "calendarspec.h"
+#include "errno-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+#define BITS_WEEKDAYS 127
+#define MIN_YEAR 1970
+#define MAX_YEAR 2199
+
+/* An arbitrary limit on the length of the chains of components. We don't want to
+ * build a very long linked list, which would be slow to iterate over and might cause
+ * our stack to overflow. It's unlikely that legitimate uses require more than a few
+ * linked compenents anyway. */
+#define CALENDARSPEC_COMPONENTS_MAX 240
+
+/* Let's make sure that the microsecond component is safe to be stored in an 'int' */
+assert_cc(INT_MAX >= USEC_PER_SEC);
+
+static void chain_free(CalendarComponent *c) {
+ CalendarComponent *n;
+
+ while (c) {
+ n = c->next;
+ free(c);
+ c = n;
+ }
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(CalendarComponent*, chain_free);
+
+CalendarSpec* calendar_spec_free(CalendarSpec *c) {
+
+ if (!c)
+ return NULL;
+
+ chain_free(c->year);
+ chain_free(c->month);
+ chain_free(c->day);
+ chain_free(c->hour);
+ chain_free(c->minute);
+ chain_free(c->microsecond);
+ free(c->timezone);
+
+ return mfree(c);
+}
+
+static int component_compare(CalendarComponent * const *a, CalendarComponent * const *b) {
+ int r;
+
+ r = CMP((*a)->start, (*b)->start);
+ if (r != 0)
+ return r;
+
+ r = CMP((*a)->stop, (*b)->stop);
+ if (r != 0)
+ return r;
+
+ return CMP((*a)->repeat, (*b)->repeat);
+}
+
+static void normalize_chain(CalendarComponent **c) {
+ CalendarComponent **b, *i, **j, *next;
+ size_t n = 0, k;
+
+ assert(c);
+
+ for (i = *c; i; i = i->next) {
+ n++;
+
+ /*
+ * While we're counting the chain, also normalize `stop`
+ * so the length of the range is a multiple of `repeat`
+ */
+ if (i->stop > i->start && i->repeat > 0)
+ i->stop -= (i->stop - i->start) % i->repeat;
+
+ /* If a repeat value is specified, but it cannot even be triggered once, let's suppress
+ * it.
+ *
+ * Similar, if the stop value is the same as the start value, then let's just make this a
+ * non-repeating chain element */
+ if ((i->stop > i->start && i->repeat > 0 && i->start + i->repeat > i->stop) ||
+ i->start == i->stop) {
+ i->repeat = 0;
+ i->stop = -1;
+ }
+ }
+
+ if (n <= 1)
+ return;
+
+ j = b = newa(CalendarComponent*, n);
+ for (i = *c; i; i = i->next)
+ *(j++) = i;
+
+ typesafe_qsort(b, n, component_compare);
+
+ b[n-1]->next = NULL;
+ next = b[n-1];
+
+ /* Drop non-unique entries */
+ for (k = n-1; k > 0; k--) {
+ if (component_compare(&b[k-1], &next) == 0) {
+ free(b[k-1]);
+ continue;
+ }
+
+ b[k-1]->next = next;
+ next = b[k-1];
+ }
+
+ *c = next;
+}
+
+static void fix_year(CalendarComponent *c) {
+ /* Turns 12 → 2012, 89 → 1989 */
+
+ while (c) {
+ if (c->start >= 0 && c->start < 70)
+ c->start += 2000;
+
+ if (c->stop >= 0 && c->stop < 70)
+ c->stop += 2000;
+
+ if (c->start >= 70 && c->start < 100)
+ c->start += 1900;
+
+ if (c->stop >= 70 && c->stop < 100)
+ c->stop += 1900;
+
+ c = c->next;
+ }
+}
+
+int calendar_spec_normalize(CalendarSpec *c) {
+ assert(c);
+
+ if (streq_ptr(c->timezone, "UTC")) {
+ c->utc = true;
+ c->timezone = mfree(c->timezone);
+ }
+
+ if (c->weekdays_bits <= 0 || c->weekdays_bits >= BITS_WEEKDAYS)
+ c->weekdays_bits = -1;
+
+ if (c->end_of_month && !c->day)
+ c->end_of_month = false;
+
+ fix_year(c->year);
+
+ normalize_chain(&c->year);
+ normalize_chain(&c->month);
+ normalize_chain(&c->day);
+ normalize_chain(&c->hour);
+ normalize_chain(&c->minute);
+ normalize_chain(&c->microsecond);
+
+ return 0;
+}
+
+static bool chain_valid(CalendarComponent *c, int from, int to, bool end_of_month) {
+ assert(to >= from);
+
+ if (!c)
+ return true;
+
+ /* Forbid dates more than 28 days from the end of the month */
+ if (end_of_month)
+ to -= 3;
+
+ if (c->start < from || c->start > to)
+ return false;
+
+ /* Avoid overly large values that could cause overflow */
+ if (c->repeat > to - from)
+ return false;
+
+ /*
+ * c->repeat must be short enough so at least one repetition may
+ * occur before the end of the interval. For dates scheduled
+ * relative to the end of the month, c->start and c->stop
+ * correspond to the Nth last day of the month.
+ */
+ if (c->stop >= 0) {
+ if (c->stop < from || c ->stop > to)
+ return false;
+
+ if (c->start + c->repeat > c->stop)
+ return false;
+ } else {
+ if (end_of_month && c->start - c->repeat < from)
+ return false;
+
+ if (!end_of_month && c->start + c->repeat > to)
+ return false;
+ }
+
+ if (c->next)
+ return chain_valid(c->next, from, to, end_of_month);
+
+ return true;
+}
+
+_pure_ bool calendar_spec_valid(CalendarSpec *c) {
+ assert(c);
+
+ if (c->weekdays_bits > BITS_WEEKDAYS)
+ return false;
+
+ if (!chain_valid(c->year, MIN_YEAR, MAX_YEAR, false))
+ return false;
+
+ if (!chain_valid(c->month, 1, 12, false))
+ return false;
+
+ if (!chain_valid(c->day, 1, 31, c->end_of_month))
+ return false;
+
+ if (!chain_valid(c->hour, 0, 23, false))
+ return false;
+
+ if (!chain_valid(c->minute, 0, 59, false))
+ return false;
+
+ if (!chain_valid(c->microsecond, 0, 60*USEC_PER_SEC-1, false))
+ return false;
+
+ return true;
+}
+
+static void format_weekdays(FILE *f, const CalendarSpec *c) {
+ static const char *const days[] = {
+ "Mon",
+ "Tue",
+ "Wed",
+ "Thu",
+ "Fri",
+ "Sat",
+ "Sun"
+ };
+
+ int l, x;
+ bool need_comma = false;
+
+ assert(f);
+ assert(c);
+ assert(c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS);
+
+ for (x = 0, l = -1; x < (int) ELEMENTSOF(days); x++) {
+
+ if (c->weekdays_bits & (1 << x)) {
+
+ if (l < 0) {
+ if (need_comma)
+ fputc(',', f);
+ else
+ need_comma = true;
+
+ fputs(days[x], f);
+ l = x;
+ }
+
+ } else if (l >= 0) {
+
+ if (x > l + 1) {
+ fputs(x > l + 2 ? ".." : ",", f);
+ fputs(days[x-1], f);
+ }
+
+ l = -1;
+ }
+ }
+
+ if (l >= 0 && x > l + 1) {
+ fputs(x > l + 2 ? ".." : ",", f);
+ fputs(days[x-1], f);
+ }
+}
+
+static void format_chain(FILE *f, int space, const CalendarComponent *c, bool usec) {
+ int d = usec ? (int) USEC_PER_SEC : 1;
+
+ assert(f);
+
+ if (!c) {
+ fputc('*', f);
+ return;
+ }
+
+ if (usec && c->start == 0 && c->repeat == USEC_PER_SEC && !c->next) {
+ fputc('*', f);
+ return;
+ }
+
+ assert(c->start >= 0);
+
+ fprintf(f, "%0*i", space, c->start / d);
+ if (c->start % d > 0)
+ fprintf(f, ".%06i", c->start % d);
+
+ if (c->stop > 0)
+ fprintf(f, "..%0*i", space, c->stop / d);
+ if (c->stop % d > 0)
+ fprintf(f, ".%06i", c->stop % d);
+
+ if (c->repeat > 0 && !(c->stop > 0 && c->repeat == d))
+ fprintf(f, "/%i", c->repeat / d);
+ if (c->repeat % d > 0)
+ fprintf(f, ".%06i", c->repeat % d);
+
+ if (c->next) {
+ fputc(',', f);
+ format_chain(f, space, c->next, usec);
+ }
+}
+
+int calendar_spec_to_string(const CalendarSpec *c, char **p) {
+ char *buf = NULL;
+ size_t sz = 0;
+ FILE *f;
+ int r;
+
+ assert(c);
+ assert(p);
+
+ f = open_memstream_unlocked(&buf, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ if (c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS) {
+ format_weekdays(f, c);
+ fputc(' ', f);
+ }
+
+ format_chain(f, 4, c->year, false);
+ fputc('-', f);
+ format_chain(f, 2, c->month, false);
+ fputc(c->end_of_month ? '~' : '-', f);
+ format_chain(f, 2, c->day, false);
+ fputc(' ', f);
+ format_chain(f, 2, c->hour, false);
+ fputc(':', f);
+ format_chain(f, 2, c->minute, false);
+ fputc(':', f);
+ format_chain(f, 2, c->microsecond, true);
+
+ if (c->utc)
+ fputs(" UTC", f);
+ else if (c->timezone) {
+ fputc(' ', f);
+ fputs(c->timezone, f);
+ } else if (IN_SET(c->dst, 0, 1)) {
+
+ /* If daylight saving is explicitly on or off, let's show the used timezone. */
+
+ tzset();
+
+ if (!isempty(tzname[c->dst])) {
+ fputc(' ', f);
+ fputs(tzname[c->dst], f);
+ }
+ }
+
+ r = fflush_and_check(f);
+ fclose(f);
+
+ if (r < 0) {
+ free(buf);
+ return r;
+ }
+
+ *p = buf;
+ return 0;
+}
+
+static int parse_weekdays(const char **p, CalendarSpec *c) {
+ static const struct {
+ const char *name;
+ const int nr;
+ } day_nr[] = {
+ { "Monday", 0 },
+ { "Mon", 0 },
+ { "Tuesday", 1 },
+ { "Tue", 1 },
+ { "Wednesday", 2 },
+ { "Wed", 2 },
+ { "Thursday", 3 },
+ { "Thu", 3 },
+ { "Friday", 4 },
+ { "Fri", 4 },
+ { "Saturday", 5 },
+ { "Sat", 5 },
+ { "Sunday", 6 },
+ { "Sun", 6 }
+ };
+
+ int l = -1;
+ bool first = true;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ for (;;) {
+ size_t i;
+
+ for (i = 0; i < ELEMENTSOF(day_nr); i++) {
+ size_t skip;
+
+ if (!startswith_no_case(*p, day_nr[i].name))
+ continue;
+
+ skip = strlen(day_nr[i].name);
+
+ if (!IN_SET((*p)[skip], 0, '-', '.', ',', ' '))
+ return -EINVAL;
+
+ c->weekdays_bits |= 1 << day_nr[i].nr;
+
+ if (l >= 0) {
+ int j;
+
+ if (l > day_nr[i].nr)
+ return -EINVAL;
+
+ for (j = l + 1; j < day_nr[i].nr; j++)
+ c->weekdays_bits |= 1 << j;
+ }
+
+ *p += skip;
+ break;
+ }
+
+ /* Couldn't find this prefix, so let's assume the
+ weekday was not specified and let's continue with
+ the date */
+ if (i >= ELEMENTSOF(day_nr))
+ return first ? 0 : -EINVAL;
+
+ /* We reached the end of the string */
+ if (**p == 0)
+ return 0;
+
+ /* We reached the end of the weekday spec part */
+ if (**p == ' ') {
+ *p += strspn(*p, " ");
+ return 0;
+ }
+
+ if (**p == '.') {
+ if (l >= 0)
+ return -EINVAL;
+
+ if ((*p)[1] != '.')
+ return -EINVAL;
+
+ l = day_nr[i].nr;
+ *p += 2;
+
+ /* Support ranges with "-" for backwards compatibility */
+ } else if (**p == '-') {
+ if (l >= 0)
+ return -EINVAL;
+
+ l = day_nr[i].nr;
+ *p += 1;
+
+ } else if (**p == ',') {
+ l = -1;
+ *p += 1;
+ }
+
+ /* Allow a trailing comma but not an open range */
+ if (IN_SET(**p, 0, ' ')) {
+ *p += strspn(*p, " ");
+ return l < 0 ? 0 : -EINVAL;
+ }
+
+ first = false;
+ }
+}
+
+static int parse_one_number(const char *p, const char **e, unsigned long *ret) {
+ char *ee = NULL;
+ unsigned long value;
+
+ errno = 0;
+ value = strtoul(p, &ee, 10);
+ if (errno > 0)
+ return -errno;
+ if (ee == p)
+ return -EINVAL;
+
+ *ret = value;
+ *e = ee;
+ return 0;
+}
+
+static int parse_component_decimal(const char **p, bool usec, int *res) {
+ unsigned long value;
+ const char *e = NULL;
+ int r;
+
+ if (!isdigit(**p))
+ return -EINVAL;
+
+ r = parse_one_number(*p, &e, &value);
+ if (r < 0)
+ return r;
+
+ if (usec) {
+ if (value * USEC_PER_SEC / USEC_PER_SEC != value)
+ return -ERANGE;
+
+ value *= USEC_PER_SEC;
+
+ /* One "." is a decimal point, but ".." is a range separator */
+ if (e[0] == '.' && e[1] != '.') {
+ unsigned add;
+
+ e++;
+ r = parse_fractional_part_u(&e, 6, &add);
+ if (r < 0)
+ return r;
+
+ if (add + value < value)
+ return -ERANGE;
+ value += add;
+ }
+ }
+
+ if (value > INT_MAX)
+ return -ERANGE;
+
+ *p = e;
+ *res = value;
+
+ return 0;
+}
+
+static int const_chain(int value, CalendarComponent **c) {
+ CalendarComponent *cc = NULL;
+
+ assert(c);
+
+ cc = new(CalendarComponent, 1);
+ if (!cc)
+ return -ENOMEM;
+
+ *cc = (CalendarComponent) {
+ .start = value,
+ .stop = -1,
+ .repeat = 0,
+ .next = *c,
+ };
+
+ *c = cc;
+
+ return 0;
+}
+
+static int calendarspec_from_time_t(CalendarSpec *c, time_t time) {
+ _cleanup_(chain_freep) CalendarComponent
+ *year = NULL, *month = NULL, *day = NULL,
+ *hour = NULL, *minute = NULL, *us = NULL;
+ struct tm tm;
+ int r;
+
+ if (!gmtime_r(&time, &tm))
+ return -ERANGE;
+
+ if (tm.tm_year > INT_MAX - 1900)
+ return -ERANGE;
+
+ r = const_chain(tm.tm_year + 1900, &year);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_mon + 1, &month);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_mday, &day);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_hour, &hour);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_min, &minute);
+ if (r < 0)
+ return r;
+
+ r = const_chain(tm.tm_sec * USEC_PER_SEC, &us);
+ if (r < 0)
+ return r;
+
+ c->utc = true;
+ c->year = TAKE_PTR(year);
+ c->month = TAKE_PTR(month);
+ c->day = TAKE_PTR(day);
+ c->hour = TAKE_PTR(hour);
+ c->minute = TAKE_PTR(minute);
+ c->microsecond = TAKE_PTR(us);
+ return 0;
+}
+
+static int prepend_component(const char **p, bool usec, unsigned nesting, CalendarComponent **c) {
+ int r, start, stop = -1, repeat = 0;
+ CalendarComponent *cc;
+ const char *e = *p;
+
+ assert(p);
+ assert(c);
+
+ if (nesting > CALENDARSPEC_COMPONENTS_MAX)
+ return -ENOBUFS;
+
+ r = parse_component_decimal(&e, usec, &start);
+ if (r < 0)
+ return r;
+
+ if (e[0] == '.' && e[1] == '.') {
+ e += 2;
+ r = parse_component_decimal(&e, usec, &stop);
+ if (r < 0)
+ return r;
+
+ repeat = usec ? USEC_PER_SEC : 1;
+ }
+
+ if (*e == '/') {
+ e++;
+ r = parse_component_decimal(&e, usec, &repeat);
+ if (r < 0)
+ return r;
+
+ if (repeat == 0)
+ return -ERANGE;
+ } else {
+ /* If no repeat value is specified for the µs component, then let's explicitly refuse ranges
+ * below 1s because our default repeat granularity is beyond that. */
+
+ /* Overflow check */
+ if (start > INT_MAX - repeat)
+ return -ERANGE;
+
+ if (usec && stop >= 0 && start + repeat > stop)
+ return -EINVAL;
+ }
+
+ if (!IN_SET(*e, 0, ' ', ',', '-', '~', ':'))
+ return -EINVAL;
+
+ cc = new(CalendarComponent, 1);
+ if (!cc)
+ return -ENOMEM;
+
+ *cc = (CalendarComponent) {
+ .start = start,
+ .stop = stop,
+ .repeat = repeat,
+ .next = *c,
+ };
+
+ *p = e;
+ *c = cc;
+
+ if (*e ==',') {
+ *p += 1;
+ return prepend_component(p, usec, nesting + 1, c);
+ }
+
+ return 0;
+}
+
+static int parse_chain(const char **p, bool usec, CalendarComponent **c) {
+ _cleanup_(chain_freep) CalendarComponent *cc = NULL;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(c);
+
+ t = *p;
+
+ if (t[0] == '*') {
+ if (usec) {
+ r = const_chain(0, c);
+ if (r < 0)
+ return r;
+ (*c)->repeat = USEC_PER_SEC;
+ } else
+ *c = NULL;
+
+ *p = t + 1;
+ return 0;
+ }
+
+ r = prepend_component(&t, usec, 0, &cc);
+ if (r < 0)
+ return r;
+
+ *p = t;
+ *c = TAKE_PTR(cc);
+ return 0;
+}
+
+static int parse_date(const char **p, CalendarSpec *c) {
+ _cleanup_(chain_freep) CalendarComponent *first = NULL, *second = NULL, *third = NULL;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ t = *p;
+
+ if (*t == 0)
+ return 0;
+
+ /* @TIMESTAMP — UNIX time in seconds since the epoch */
+ if (*t == '@') {
+ unsigned long value;
+ time_t time;
+
+ r = parse_one_number(t + 1, &t, &value);
+ if (r < 0)
+ return r;
+
+ time = value;
+ if ((unsigned long) time != value)
+ return -ERANGE;
+
+ r = calendarspec_from_time_t(c, time);
+ if (r < 0)
+ return r;
+
+ *p = t;
+ return 1; /* finito, don't parse H:M:S after that */
+ }
+
+ r = parse_chain(&t, false, &first);
+ if (r < 0)
+ return r;
+
+ /* Already the end? A ':' as separator? In that case this was a time, not a date */
+ if (IN_SET(*t, 0, ':'))
+ return 0;
+
+ if (*t == '~')
+ c->end_of_month = true;
+ else if (*t != '-')
+ return -EINVAL;
+
+ t++;
+ r = parse_chain(&t, false, &second);
+ if (r < 0)
+ return r;
+
+ /* Got two parts, hence it's month and day */
+ if (IN_SET(*t, 0, ' ')) {
+ *p = t + strspn(t, " ");
+ c->month = TAKE_PTR(first);
+ c->day = TAKE_PTR(second);
+ return 0;
+ } else if (c->end_of_month)
+ return -EINVAL;
+
+ if (*t == '~')
+ c->end_of_month = true;
+ else if (*t != '-')
+ return -EINVAL;
+
+ t++;
+ r = parse_chain(&t, false, &third);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(*t, 0, ' '))
+ return -EINVAL;
+
+ /* Got three parts, hence it is year, month and day */
+ *p = t + strspn(t, " ");
+ c->year = TAKE_PTR(first);
+ c->month = TAKE_PTR(second);
+ c->day = TAKE_PTR(third);
+ return 0;
+}
+
+static int parse_calendar_time(const char **p, CalendarSpec *c) {
+ _cleanup_(chain_freep) CalendarComponent *h = NULL, *m = NULL, *s = NULL;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(*p);
+ assert(c);
+
+ t = *p;
+
+ /* If no time is specified at all, then this means 00:00:00 */
+ if (*t == 0)
+ goto null_hour;
+
+ r = parse_chain(&t, false, &h);
+ if (r < 0)
+ return r;
+
+ if (*t != ':')
+ return -EINVAL;
+
+ t++;
+ r = parse_chain(&t, false, &m);
+ if (r < 0)
+ return r;
+
+ /* Already at the end? Then it's hours and minutes, and seconds are 0 */
+ if (*t == 0)
+ goto null_second;
+
+ if (*t != ':')
+ return -EINVAL;
+
+ t++;
+ r = parse_chain(&t, true, &s);
+ if (r < 0)
+ return r;
+
+ /* At the end? Then it's hours, minutes and seconds */
+ if (*t == 0)
+ goto finish;
+
+ return -EINVAL;
+
+null_hour:
+ r = const_chain(0, &h);
+ if (r < 0)
+ return r;
+
+ r = const_chain(0, &m);
+ if (r < 0)
+ return r;
+
+null_second:
+ r = const_chain(0, &s);
+ if (r < 0)
+ return r;
+
+finish:
+ *p = t;
+ c->hour = TAKE_PTR(h);
+ c->minute = TAKE_PTR(m);
+ c->microsecond = TAKE_PTR(s);
+
+ return 0;
+}
+
+int calendar_spec_from_string(const char *p, CalendarSpec **spec) {
+ const char *utc;
+ _cleanup_(calendar_spec_freep) CalendarSpec *c = NULL;
+ _cleanup_free_ char *p_tmp = NULL;
+ int r;
+
+ assert(p);
+
+ c = new(CalendarSpec, 1);
+ if (!c)
+ return -ENOMEM;
+
+ *c = (CalendarSpec) {
+ .dst = -1,
+ .timezone = NULL,
+ };
+
+ utc = endswith_no_case(p, " UTC");
+ if (utc) {
+ c->utc = true;
+ p = p_tmp = strndup(p, utc - p);
+ if (!p)
+ return -ENOMEM;
+ } else {
+ const char *e = NULL;
+ int j;
+
+ tzset();
+
+ /* Check if the local timezone was specified? */
+ for (j = 0; j <= 1; j++) {
+ if (isempty(tzname[j]))
+ continue;
+
+ e = endswith_no_case(p, tzname[j]);
+ if (!e)
+ continue;
+ if (e == p)
+ continue;
+ if (e[-1] != ' ')
+ continue;
+
+ break;
+ }
+
+ /* Found one of the two timezones specified? */
+ if (IN_SET(j, 0, 1)) {
+ p = p_tmp = strndup(p, e - p - 1);
+ if (!p)
+ return -ENOMEM;
+
+ c->dst = j;
+ } else {
+ const char *last_space;
+
+ last_space = strrchr(p, ' ');
+ if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG)) {
+ c->timezone = strdup(last_space + 1);
+ if (!c->timezone)
+ return -ENOMEM;
+
+ p = p_tmp = strndup(p, last_space - p);
+ if (!p)
+ return -ENOMEM;
+ }
+ }
+ }
+
+ if (isempty(p))
+ return -EINVAL;
+
+ if (strcaseeq(p, "minutely")) {
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "hourly")) {
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "daily")) {
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "monthly")) {
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (STRCASE_IN_SET(p,
+ "annually",
+ "yearly",
+ "anually") /* backwards compatibility */ ) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "weekly")) {
+
+ c->weekdays_bits = 1;
+
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (strcaseeq(p, "quarterly")) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(4, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(7, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(10, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else if (STRCASE_IN_SET(p,
+ "biannually",
+ "bi-annually",
+ "semiannually",
+ "semi-annually")) {
+
+ r = const_chain(1, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(7, &c->month);
+ if (r < 0)
+ return r;
+ r = const_chain(1, &c->day);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->hour);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->minute);
+ if (r < 0)
+ return r;
+ r = const_chain(0, &c->microsecond);
+ if (r < 0)
+ return r;
+
+ } else {
+ r = parse_weekdays(&p, c);
+ if (r < 0)
+ return r;
+
+ r = parse_date(&p, c);
+ if (r < 0)
+ return r;
+
+ if (r == 0) {
+ r = parse_calendar_time(&p, c);
+ if (r < 0)
+ return r;
+ }
+
+ if (*p != 0)
+ return -EINVAL;
+ }
+
+ r = calendar_spec_normalize(c);
+ if (r < 0)
+ return r;
+
+ if (!calendar_spec_valid(c))
+ return -EINVAL;
+
+ if (spec)
+ *spec = TAKE_PTR(c);
+ return 0;
+}
+
+static int find_end_of_month(struct tm *tm, bool utc, int day) {
+ struct tm t = *tm;
+
+ t.tm_mon++;
+ t.tm_mday = 1 - day;
+
+ if (mktime_or_timegm(&t, utc) < 0 ||
+ t.tm_mon != tm->tm_mon)
+ return -1;
+
+ return t.tm_mday;
+}
+
+static int find_matching_component(const CalendarSpec *spec, const CalendarComponent *c,
+ struct tm *tm, int *val) {
+ const CalendarComponent *p = c;
+ int start, stop, d = -1;
+ bool d_set = false;
+ int r;
+
+ assert(val);
+
+ if (!c)
+ return 0;
+
+ while (c) {
+ start = c->start;
+ stop = c->stop;
+
+ if (spec->end_of_month && p == spec->day) {
+ start = find_end_of_month(tm, spec->utc, start);
+ stop = find_end_of_month(tm, spec->utc, stop);
+
+ if (stop > 0)
+ SWAP_TWO(start, stop);
+ }
+
+ if (start >= *val) {
+
+ if (!d_set || start < d) {
+ d = start;
+ d_set = true;
+ }
+
+ } else if (c->repeat > 0) {
+ int k;
+
+ k = start + c->repeat * DIV_ROUND_UP(*val - start, c->repeat);
+
+ if ((!d_set || k < d) && (stop < 0 || k <= stop)) {
+ d = k;
+ d_set = true;
+ }
+ }
+
+ c = c->next;
+ }
+
+ if (!d_set)
+ return -ENOENT;
+
+ r = *val != d;
+ *val = d;
+ return r;
+}
+
+static int tm_within_bounds(struct tm *tm, bool utc) {
+ struct tm t;
+ assert(tm);
+
+ /*
+ * Set an upper bound on the year so impossible dates like "*-02-31"
+ * don't cause find_next() to loop forever. tm_year contains years
+ * since 1900, so adjust it accordingly.
+ */
+ if (tm->tm_year + 1900 > MAX_YEAR)
+ return -ERANGE;
+
+ t = *tm;
+ if (mktime_or_timegm(&t, utc) < 0)
+ return negative_errno();
+
+ /* Did any normalization take place? If so, it was out of bounds before */
+ bool good = t.tm_year == tm->tm_year &&
+ t.tm_mon == tm->tm_mon &&
+ t.tm_mday == tm->tm_mday &&
+ t.tm_hour == tm->tm_hour &&
+ t.tm_min == tm->tm_min &&
+ t.tm_sec == tm->tm_sec;
+ if (!good)
+ *tm = t;
+ return good;
+}
+
+static bool matches_weekday(int weekdays_bits, const struct tm *tm, bool utc) {
+ struct tm t;
+ int k;
+
+ if (weekdays_bits < 0 || weekdays_bits >= BITS_WEEKDAYS)
+ return true;
+
+ t = *tm;
+ if (mktime_or_timegm(&t, utc) < 0)
+ return false;
+
+ k = t.tm_wday == 0 ? 6 : t.tm_wday - 1;
+ return (weekdays_bits & (1 << k));
+}
+
+static int find_next(const CalendarSpec *spec, struct tm *tm, usec_t *usec) {
+ struct tm c;
+ int tm_usec;
+ int r;
+
+ /* Returns -ENOENT if the expression is not going to elapse anymore */
+
+ assert(spec);
+ assert(tm);
+
+ c = *tm;
+ tm_usec = *usec;
+
+ for (;;) {
+ /* Normalize the current date */
+ (void) mktime_or_timegm(&c, spec->utc);
+ c.tm_isdst = spec->dst;
+
+ c.tm_year += 1900;
+ r = find_matching_component(spec, spec->year, &c, &c.tm_year);
+ c.tm_year -= 1900;
+
+ if (r > 0) {
+ c.tm_mon = 0;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ }
+ if (r < 0)
+ return r;
+ if (tm_within_bounds(&c, spec->utc) <= 0)
+ return -ENOENT;
+
+ c.tm_mon += 1;
+ r = find_matching_component(spec, spec->month, &c, &c.tm_mon);
+ c.tm_mon -= 1;
+
+ if (r > 0) {
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ }
+ if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) {
+ c.tm_year++;
+ c.tm_mon = 0;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+ if (r == 0)
+ continue;
+
+ r = find_matching_component(spec, spec->day, &c, &c.tm_mday);
+ if (r > 0)
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) {
+ c.tm_mon++;
+ c.tm_mday = 1;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+ if (r == 0)
+ continue;
+
+ if (!matches_weekday(spec->weekdays_bits, &c, spec->utc)) {
+ c.tm_mday++;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+
+ r = find_matching_component(spec, spec->hour, &c, &c.tm_hour);
+ if (r > 0)
+ c.tm_min = c.tm_sec = tm_usec = 0;
+ if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) {
+ c.tm_mday++;
+ c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+ if (r == 0)
+ /* The next hour we set might be missing if there
+ * are time zone changes. Let's try again starting at
+ * normalized time. */
+ continue;
+
+ r = find_matching_component(spec, spec->minute, &c, &c.tm_min);
+ if (r > 0)
+ c.tm_sec = tm_usec = 0;
+ if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) {
+ c.tm_hour++;
+ c.tm_min = c.tm_sec = tm_usec = 0;
+ continue;
+ }
+ if (r == 0)
+ continue;
+
+ c.tm_sec = c.tm_sec * USEC_PER_SEC + tm_usec;
+ r = find_matching_component(spec, spec->microsecond, &c, &c.tm_sec);
+ tm_usec = c.tm_sec % USEC_PER_SEC;
+ c.tm_sec /= USEC_PER_SEC;
+
+ if (r < 0 || (r = tm_within_bounds(&c, spec->utc)) < 0) {
+ c.tm_min++;
+ c.tm_sec = tm_usec = 0;
+ continue;
+ }
+ if (r == 0)
+ continue;
+
+ *tm = c;
+ *usec = tm_usec;
+ return 0;
+ }
+}
+
+static int calendar_spec_next_usec_impl(const CalendarSpec *spec, usec_t usec, usec_t *ret_next) {
+ struct tm tm;
+ time_t t;
+ int r;
+ usec_t tm_usec;
+
+ assert(spec);
+
+ if (usec > USEC_TIMESTAMP_FORMATTABLE_MAX)
+ return -EINVAL;
+
+ usec++;
+ t = (time_t) (usec / USEC_PER_SEC);
+ assert_se(localtime_or_gmtime_r(&t, &tm, spec->utc));
+ tm_usec = usec % USEC_PER_SEC;
+
+ r = find_next(spec, &tm, &tm_usec);
+ if (r < 0)
+ return r;
+
+ t = mktime_or_timegm(&tm, spec->utc);
+ if (t < 0)
+ return -EINVAL;
+
+ if (ret_next)
+ *ret_next = (usec_t) t * USEC_PER_SEC + tm_usec;
+
+ return 0;
+}
+
+typedef struct SpecNextResult {
+ usec_t next;
+ int return_value;
+} SpecNextResult;
+
+int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *ret_next) {
+ SpecNextResult *shared, tmp;
+ int r;
+
+ assert(spec);
+
+ if (isempty(spec->timezone))
+ return calendar_spec_next_usec_impl(spec, usec, ret_next);
+
+ shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+ if (shared == MAP_FAILED)
+ return negative_errno();
+
+ r = safe_fork("(sd-calendar)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT, NULL);
+ if (r < 0) {
+ (void) munmap(shared, sizeof *shared);
+ return r;
+ }
+ if (r == 0) {
+ char *colon_tz;
+
+ /* tzset(3) says $TZ should be prefixed with ":" if we reference timezone files */
+ colon_tz = strjoina(":", spec->timezone);
+
+ if (setenv("TZ", colon_tz, 1) != 0) {
+ shared->return_value = negative_errno();
+ _exit(EXIT_FAILURE);
+ }
+
+ tzset();
+
+ shared->return_value = calendar_spec_next_usec_impl(spec, usec, &shared->next);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ tmp = *shared;
+ if (munmap(shared, sizeof *shared) < 0)
+ return negative_errno();
+
+ if (tmp.return_value == 0 && ret_next)
+ *ret_next = tmp.next;
+
+ return tmp.return_value;
+}
diff --git a/src/shared/calendarspec.h b/src/shared/calendarspec.h
new file mode 100644
index 0000000..3bfe82d
--- /dev/null
+++ b/src/shared/calendarspec.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* A structure for specifying (possibly repetitive) points in calendar
+ * time, a la cron */
+
+#include <stdbool.h>
+
+#include "time-util.h"
+#include "util.h"
+
+typedef struct CalendarComponent {
+ int start;
+ int stop;
+ int repeat;
+
+ struct CalendarComponent *next;
+} CalendarComponent;
+
+typedef struct CalendarSpec {
+ int weekdays_bits;
+ bool end_of_month:1;
+ bool utc:1;
+ signed int dst:2;
+ char *timezone;
+
+ CalendarComponent *year;
+ CalendarComponent *month;
+ CalendarComponent *day;
+
+ CalendarComponent *hour;
+ CalendarComponent *minute;
+ CalendarComponent *microsecond;
+} CalendarSpec;
+
+CalendarSpec* calendar_spec_free(CalendarSpec *c);
+
+int calendar_spec_normalize(CalendarSpec *spec);
+bool calendar_spec_valid(CalendarSpec *spec);
+
+int calendar_spec_to_string(const CalendarSpec *spec, char **p);
+int calendar_spec_from_string(const char *p, CalendarSpec **spec);
+
+int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *next);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(CalendarSpec*, calendar_spec_free);
diff --git a/src/shared/cgroup-setup.c b/src/shared/cgroup-setup.c
new file mode 100644
index 0000000..f197f71
--- /dev/null
+++ b/src/shared/cgroup-setup.c
@@ -0,0 +1,841 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ftw.h>
+#include <unistd.h>
+
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "errno-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "process-util.h"
+#include "fileio.h"
+#include "user-util.h"
+#include "fd-util.h"
+
+bool cg_is_unified_wanted(void) {
+ static thread_local int wanted = -1;
+ bool b;
+ const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
+ _cleanup_free_ char *c = NULL;
+ int r;
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
+ r = cg_unified_cached(true);
+ if (r >= 0)
+ return (wanted = r >= CGROUP_UNIFIED_ALL);
+
+ /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */
+ r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
+ if (r > 0)
+ return (wanted = b);
+
+ /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to
+ * use hybrid or legacy hierarchy. */
+ r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
+ if (r > 0 && streq_ptr(c, "all"))
+ return (wanted = true);
+
+ return (wanted = is_default);
+}
+
+bool cg_is_legacy_wanted(void) {
+ static thread_local int wanted = -1;
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* Check if we have cgroup v2 already mounted. */
+ if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+
+ /* Otherwise, assume that at least partial legacy is wanted,
+ * since cgroup v2 should already be mounted at this point. */
+ return (wanted = true);
+}
+
+bool cg_is_hybrid_wanted(void) {
+ static thread_local int wanted = -1;
+ int r;
+ bool b;
+ const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
+ /* We default to true if the default is "hybrid", obviously, but also when the default is "unified",
+ * because if we get called, it means that unified hierarchy was not mounted. */
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
+ if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+
+ /* Otherwise, let's see what the kernel command line has to say. Since checking is expensive, cache
+ * a non-error result. */
+ r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
+
+ /* The meaning of the kernel option is reversed wrt. to the return value of this function, hence the
+ * negation. */
+ return (wanted = r > 0 ? !b : is_default);
+}
+
+int cg_weight_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_WEIGHT_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_CPU_SHARES_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_BLKIO_WEIGHT_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
+ assert(path);
+ assert(sb);
+ assert(ftwbuf);
+
+ if (typeflag != FTW_DP)
+ return 0;
+
+ if (ftwbuf->level < 1)
+ return 0;
+
+ (void) rmdir(path);
+ return 0;
+}
+
+int cg_trim(const char *controller, const char *path, bool delete_root) {
+ _cleanup_free_ char *fs = NULL;
+ int r, q;
+
+ assert(path);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
+ if (errno == ENOENT)
+ r = 0;
+ else
+ r = errno_or_else(EIO);
+ }
+
+ if (delete_root) {
+ if (rmdir(fs) < 0 && errno != ENOENT)
+ return -errno;
+ }
+
+ q = cg_hybrid_unified();
+ if (q < 0)
+ return q;
+ if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
+ if (q < 0)
+ log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
+ }
+
+ return r;
+}
+
+/* Create a cgroup in the hierarchy of controller.
+ * Returns 0 if the group already existed, 1 on success, negative otherwise.
+ */
+int cg_create(const char *controller, const char *path) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ r = cg_get_path_and_check(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ r = mkdir_parents(fs, 0755);
+ if (r < 0)
+ return r;
+
+ r = mkdir_errno_wrapper(fs, 0755);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+
+ if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
+ }
+
+ return 1;
+}
+
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
+ int r, q;
+
+ assert(pid >= 0);
+
+ r = cg_create(controller, path);
+ if (r < 0)
+ return r;
+
+ q = cg_attach(controller, path, pid);
+ if (q < 0)
+ return q;
+
+ /* This does not remove the cgroup on failure */
+ return r;
+}
+
+int cg_attach(const char *controller, const char *path, pid_t pid) {
+ _cleanup_free_ char *fs = NULL;
+ char c[DECIMAL_STR_MAX(pid_t) + 2];
+ int r;
+
+ assert(path);
+ assert(pid >= 0);
+
+ r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
+ if (r < 0)
+ return r;
+
+ if (pid == 0)
+ pid = getpid_cached();
+
+ xsprintf(c, PID_FMT "\n", pid);
+
+ r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+
+ if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
+ }
+
+ return 0;
+}
+
+int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
+ int r;
+
+ assert(controller);
+ assert(path);
+ assert(pid >= 0);
+
+ r = cg_attach(controller, path, pid);
+ if (r < 0) {
+ char prefix[strlen(path) + 1];
+
+ /* This didn't work? Then let's try all prefixes of
+ * the destination */
+
+ PATH_FOREACH_PREFIX(prefix, path) {
+ int q;
+
+ q = cg_attach(controller, prefix, pid);
+ if (q >= 0)
+ return q;
+ }
+ }
+
+ return r;
+}
+
+int cg_set_access(
+ const char *controller,
+ const char *path,
+ uid_t uid,
+ gid_t gid) {
+
+ struct Attribute {
+ const char *name;
+ bool fatal;
+ };
+
+ /* cgroup v1, aka legacy/non-unified */
+ static const struct Attribute legacy_attributes[] = {
+ { "cgroup.procs", true },
+ { "tasks", false },
+ { "cgroup.clone_children", false },
+ {},
+ };
+
+ /* cgroup v2, aka unified */
+ static const struct Attribute unified_attributes[] = {
+ { "cgroup.procs", true },
+ { "cgroup.subtree_control", true },
+ { "cgroup.threads", false },
+ {},
+ };
+
+ static const struct Attribute* const attributes[] = {
+ [false] = legacy_attributes,
+ [true] = unified_attributes,
+ };
+
+ _cleanup_free_ char *fs = NULL;
+ const struct Attribute *i;
+ int r, unified;
+
+ assert(path);
+
+ if (uid == UID_INVALID && gid == GID_INVALID)
+ return 0;
+
+ unified = cg_unified_controller(controller);
+ if (unified < 0)
+ return unified;
+
+ /* Configure access to the cgroup itself */
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(fs, 0755, uid, gid);
+ if (r < 0)
+ return r;
+
+ /* Configure access to the cgroup's attributes */
+ for (i = attributes[unified]; i->name; i++) {
+ fs = mfree(fs);
+
+ r = cg_get_path(controller, path, i->name, &fs);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(fs, 0644, uid, gid);
+ if (r < 0) {
+ if (i->fatal)
+ return r;
+
+ log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
+ }
+ }
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Always propagate access mode from unified to legacy controller */
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
+ if (r < 0)
+ log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
+ }
+ }
+
+ return 0;
+}
+
+int cg_migrate(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ bool done = false;
+ _cleanup_set_free_ Set *s = NULL;
+ int r, ret = 0;
+ pid_t my_pid;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ s = set_new(NULL);
+ if (!s)
+ return -ENOMEM;
+
+ my_pid = getpid_cached();
+
+ do {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid = 0;
+ done = true;
+
+ r = cg_enumerate_processes(cfrom, pfrom, &f);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ /* This might do weird stuff if we aren't a
+ * single-threaded program. However, we
+ * luckily know we are not */
+ if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
+ continue;
+
+ if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
+ continue;
+
+ /* Ignore kernel threads. Since they can only
+ * exist in the root cgroup, we only check for
+ * them there. */
+ if (cfrom &&
+ empty_or_root(pfrom) &&
+ is_kernel_thread(pid) > 0)
+ continue;
+
+ r = cg_attach(cto, pto, pid);
+ if (r < 0) {
+ if (ret >= 0 && r != -ESRCH)
+ ret = r;
+ } else if (ret == 0)
+ ret = 1;
+
+ done = false;
+
+ r = set_put(s, PID_TO_PTR(pid));
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ }
+
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ } while (!done);
+
+ return ret;
+}
+
+int cg_migrate_recursive(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ int r, ret = 0;
+ char *fn;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
+
+ r = cg_enumerate_subgroups(cfrom, pfrom, &d);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(empty_to_root(pfrom), fn);
+ free(fn);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
+ if (r != 0 && ret >= 0)
+ ret = r;
+ }
+
+ if (r < 0 && ret >= 0)
+ ret = r;
+
+ if (flags & CGROUP_REMOVE) {
+ r = cg_rmdir(cfrom, pfrom);
+ if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
+ return r;
+ }
+
+ return ret;
+}
+
+int cg_migrate_recursive_fallback(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ int r;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
+ if (r < 0) {
+ char prefix[strlen(pto) + 1];
+
+ /* This didn't work? Then let's try all prefixes of the destination */
+
+ PATH_FOREACH_PREFIX(prefix, pto) {
+ int q;
+
+ q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
+ if (q >= 0)
+ return q;
+ }
+ }
+
+ return r;
+}
+
+int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
+ CGroupController c;
+ CGroupMask done;
+ bool created;
+ int r;
+
+ /* This one will create a cgroup in our private tree, but also
+ * duplicate it in the trees specified in mask, and remove it
+ * in all others.
+ *
+ * Returns 0 if the group already existed in the systemd hierarchy,
+ * 1 on success, negative otherwise.
+ */
+
+ /* First create the cgroup in our own hierarchy. */
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
+ if (r < 0)
+ return r;
+ created = r;
+
+ /* If we are in the unified hierarchy, we are done now */
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return created;
+
+ supported &= CGROUP_MASK_V1;
+ mask = CGROUP_MASK_EXTEND_JOINED(mask);
+ done = 0;
+
+ /* Otherwise, do the same in the other hierarchies */
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ if (FLAGS_SET(mask, bit))
+ (void) cg_create(n, path);
+
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return created;
+}
+
+int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
+ int r;
+
+ r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
+ if (r < 0)
+ return r;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ supported &= CGROUP_MASK_V1;
+ CGroupMask done = 0;
+
+ for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *p = NULL;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (path_callback)
+ p = path_callback(bit, userdata);
+ if (!p)
+ p = path;
+
+ (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return 0;
+}
+
+int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) {
+ CGroupController c;
+ CGroupMask done;
+ int r = 0, q;
+
+ assert(to_callback);
+
+ supported &= CGROUP_MASK_V1;
+ mask = CGROUP_MASK_EXTEND_JOINED(mask);
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *to = NULL;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (!FLAGS_SET(mask, bit))
+ continue;
+
+ to = to_callback(bit, userdata);
+
+ /* Remember first error and try continuing */
+ q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0);
+ r = (r < 0) ? r : q;
+ }
+
+ return r;
+}
+
+int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
+ int r, q;
+
+ r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
+ if (r < 0)
+ return r;
+
+ q = cg_all_unified();
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return r;
+
+ return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root);
+}
+
+int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) {
+ CGroupController c;
+ CGroupMask done;
+ int r = 0, q;
+
+ supported &= CGROUP_MASK_V1;
+ mask = CGROUP_MASK_EXTEND_JOINED(mask);
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (FLAGS_SET(mask, bit)) {
+ /* Remember first error and try continuing */
+ q = cg_trim(cgroup_controller_to_string(c), path, delete_root);
+ r = (r < 0) ? r : q;
+ }
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return r;
+}
+
+int cg_enable_everywhere(
+ CGroupMask supported,
+ CGroupMask mask,
+ const char *p,
+ CGroupMask *ret_result_mask) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *fs = NULL;
+ CGroupController c;
+ CGroupMask ret = 0;
+ int r;
+
+ assert(p);
+
+ if (supported == 0) {
+ if (ret_result_mask)
+ *ret_result_mask = 0;
+ return 0;
+ }
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
+ * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
+ * caller tends to use the returned mask later on to compare if all controllers where properly joined,
+ * and if not requeues realization. This use is the primary purpose of the return value, hence let's
+ * minimize surprises here and reduce triggers for re-realization by always saying we fully
+ * succeeded.) */
+ if (ret_result_mask)
+ *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
+ * CGROUP_MASK_V2: The 'supported' mask
+ * might contain pure-V1 or BPF
+ * controllers, and we never want to
+ * claim that we could enable those with
+ * cgroup.subtree_control */
+ return 0;
+ }
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
+ if (r < 0)
+ return r;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(CGROUP_MASK_V2, bit))
+ continue;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ {
+ char s[1 + strlen(n) + 1];
+
+ s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
+ strcpy(s + 1, n);
+
+ if (!f) {
+ f = fopen(fs, "we");
+ if (!f)
+ return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
+ }
+
+ r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
+ FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
+ clearerr(f);
+
+ /* If we can't turn off a controller, leave it on in the reported resulting mask. This
+ * happens for example when we attempt to turn off a controller up in the tree that is
+ * used down in the tree. */
+ if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
+ * only here, and not follow the same logic
+ * for other errors such as EINVAL or
+ * EOPNOTSUPP or anything else. That's
+ * because EBUSY indicates that the
+ * controllers is currently enabled and
+ * cannot be disabled because something down
+ * the hierarchy is still using it. Any other
+ * error most likely means something like "I
+ * never heard of this controller" or
+ * similar. In the former case it's hence
+ * safe to assume the controller is still on
+ * after the failed operation, while in the
+ * latter case it's safer to assume the
+ * controller is unknown and hence certainly
+ * not enabled. */
+ ret |= bit;
+ } else {
+ /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
+ if (FLAGS_SET(mask, bit))
+ ret |= bit;
+ }
+ }
+ }
+
+ /* Let's return the precise set of controllers now enabled for the cgroup. */
+ if (ret_result_mask)
+ *ret_result_mask = ret;
+
+ return 0;
+}
diff --git a/src/shared/cgroup-setup.h b/src/shared/cgroup-setup.h
new file mode 100644
index 0000000..7eabce2
--- /dev/null
+++ b/src/shared/cgroup-setup.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "cgroup-util.h"
+
+bool cg_is_unified_wanted(void);
+bool cg_is_legacy_wanted(void);
+bool cg_is_hybrid_wanted(void);
+
+int cg_weight_parse(const char *s, uint64_t *ret);
+int cg_cpu_shares_parse(const char *s, uint64_t *ret);
+int cg_blkio_weight_parse(const char *s, uint64_t *ret);
+
+int cg_trim(const char *controller, const char *path, bool delete_root);
+
+int cg_create(const char *controller, const char *path);
+int cg_attach(const char *controller, const char *path, pid_t pid);
+int cg_attach_fallback(const char *controller, const char *path, pid_t pid);
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
+
+int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+
+int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path);
+int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata);
+int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata);
+int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root);
+int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root);
+int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask);
diff --git a/src/shared/cgroup-show.c b/src/shared/cgroup-show.c
new file mode 100644
index 0000000..f7c24f8
--- /dev/null
+++ b/src/shared/cgroup-show.c
@@ -0,0 +1,400 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <dirent.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-util.h"
+#include "cgroup-show.h"
+#include "cgroup-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "locale-util.h"
+#include "macro.h"
+#include "output-mode.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "unit-name.h"
+#include "xattr-util.h"
+
+static void show_pid_array(
+ pid_t pids[],
+ unsigned n_pids,
+ const char *prefix,
+ size_t n_columns,
+ bool extra,
+ bool more,
+ OutputFlags flags) {
+
+ unsigned i, j, pid_width;
+
+ if (n_pids == 0)
+ return;
+
+ typesafe_qsort(pids, n_pids, pid_compare_func);
+
+ /* Filter duplicates */
+ for (j = 0, i = 1; i < n_pids; i++) {
+ if (pids[i] == pids[j])
+ continue;
+ pids[++j] = pids[i];
+ }
+ n_pids = j + 1;
+ pid_width = DECIMAL_STR_WIDTH(pids[j]);
+
+ if (flags & OUTPUT_FULL_WIDTH)
+ n_columns = SIZE_MAX;
+ else {
+ if (n_columns > pid_width + 3) /* something like "├─1114784 " */
+ n_columns -= pid_width + 3;
+ else
+ n_columns = 20;
+ }
+ for (i = 0; i < n_pids; i++) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) get_process_cmdline(pids[i], n_columns,
+ PROCESS_CMDLINE_COMM_FALLBACK | PROCESS_CMDLINE_USE_LOCALE,
+ &t);
+
+ if (extra)
+ printf("%s%s ", prefix, special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET));
+ else
+ printf("%s%s", prefix, special_glyph(((more || i < n_pids-1) ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT)));
+
+ printf("%s%*"PID_PRI" %s%s\n", ansi_grey(), pid_width, pids[i], strna(t), ansi_normal());
+ }
+}
+
+static int show_cgroup_one_by_path(
+ const char *path,
+ const char *prefix,
+ size_t n_columns,
+ bool more,
+ OutputFlags flags) {
+
+ char *fn;
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t n = 0, n_allocated = 0;
+ _cleanup_free_ pid_t *pids = NULL;
+ _cleanup_free_ char *p = NULL;
+ pid_t pid;
+ int r;
+
+ r = cg_mangle_path(path, &p);
+ if (r < 0)
+ return r;
+
+ fn = strjoina(p, "/cgroup.procs");
+ f = fopen(fn, "re");
+ if (!f)
+ return -errno;
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ if (!(flags & OUTPUT_KERNEL_THREADS) && is_kernel_thread(pid) > 0)
+ continue;
+
+ if (!GREEDY_REALLOC(pids, n_allocated, n + 1))
+ return -ENOMEM;
+
+ assert(n < n_allocated);
+ pids[n++] = pid;
+ }
+
+ if (r < 0)
+ return r;
+
+ show_pid_array(pids, n, prefix, n_columns, false, more, flags);
+
+ return 0;
+}
+
+static int show_cgroup_name(
+ const char *path,
+ const char *prefix,
+ const char *glyph) {
+
+ _cleanup_free_ char *b = NULL;
+ bool delegate = false;
+ int r;
+
+ r = getxattr_malloc(path, "trusted.delegate", &b, false);
+ if (r < 0) {
+ if (r != -ENODATA)
+ log_debug_errno(r, "Failed to read trusted.delegate extended attribute: %m");
+ } else {
+ r = parse_boolean(b);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse trusted.delegate extended attribute boolean value: %m");
+ else
+ delegate = r > 0;
+
+ b = mfree(b);
+ }
+
+ b = strdup(basename(path));
+ if (!b)
+ return -ENOMEM;
+
+ printf("%s%s%s%s%s %s%s%s\n",
+ prefix, glyph,
+ delegate ? ansi_underline() : "",
+ cg_unescape(b),
+ delegate ? ansi_normal() : "",
+ delegate ? ansi_highlight() : "",
+ delegate ? special_glyph(SPECIAL_GLYPH_ELLIPSIS) : "",
+ delegate ? ansi_normal() : "");
+ return 0;
+}
+
+int show_cgroup_by_path(
+ const char *path,
+ const char *prefix,
+ size_t n_columns,
+ OutputFlags flags) {
+
+ _cleanup_free_ char *fn = NULL, *p1 = NULL, *last = NULL, *p2 = NULL;
+ _cleanup_closedir_ DIR *d = NULL;
+ bool shown_pids = false;
+ char *gn = NULL;
+ int r;
+
+ assert(path);
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ r = cg_mangle_path(path, &fn);
+ if (r < 0)
+ return r;
+
+ d = opendir(fn);
+ if (!d)
+ return -errno;
+
+ while ((r = cg_read_subgroup(d, &gn)) > 0) {
+ _cleanup_free_ char *k = NULL;
+
+ k = path_join(fn, gn);
+ free(gn);
+ if (!k)
+ return -ENOMEM;
+
+ if (!(flags & OUTPUT_SHOW_ALL) && cg_is_empty_recursive(NULL, k) > 0)
+ continue;
+
+ if (!shown_pids) {
+ show_cgroup_one_by_path(path, prefix, n_columns, true, flags);
+ shown_pids = true;
+ }
+
+ if (last) {
+ r = show_cgroup_name(last, prefix, special_glyph(SPECIAL_GLYPH_TREE_BRANCH));
+ if (r < 0)
+ return r;
+
+ if (!p1) {
+ p1 = strjoin(prefix, special_glyph(SPECIAL_GLYPH_TREE_VERTICAL));
+ if (!p1)
+ return -ENOMEM;
+ }
+
+ show_cgroup_by_path(last, p1, n_columns-2, flags);
+ free(last);
+ }
+
+ last = TAKE_PTR(k);
+ }
+
+ if (r < 0)
+ return r;
+
+ if (!shown_pids)
+ show_cgroup_one_by_path(path, prefix, n_columns, !!last, flags);
+
+ if (last) {
+ r = show_cgroup_name(last, prefix, special_glyph(SPECIAL_GLYPH_TREE_RIGHT));
+ if (r < 0)
+ return r;
+
+ if (!p2) {
+ p2 = strjoin(prefix, " ");
+ if (!p2)
+ return -ENOMEM;
+ }
+
+ show_cgroup_by_path(last, p2, n_columns-2, flags);
+ }
+
+ return 0;
+}
+
+int show_cgroup(const char *controller,
+ const char *path,
+ const char *prefix,
+ size_t n_columns,
+ OutputFlags flags) {
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ assert(path);
+
+ r = cg_get_path(controller, path, NULL, &p);
+ if (r < 0)
+ return r;
+
+ return show_cgroup_by_path(p, prefix, n_columns, flags);
+}
+
+static int show_extra_pids(
+ const char *controller,
+ const char *path,
+ const char *prefix,
+ size_t n_columns,
+ const pid_t pids[],
+ unsigned n_pids,
+ OutputFlags flags) {
+
+ _cleanup_free_ pid_t *copy = NULL;
+ unsigned i, j;
+ int r;
+
+ assert(path);
+
+ if (n_pids <= 0)
+ return 0;
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ prefix = strempty(prefix);
+
+ copy = new(pid_t, n_pids);
+ if (!copy)
+ return -ENOMEM;
+
+ for (i = 0, j = 0; i < n_pids; i++) {
+ _cleanup_free_ char *k = NULL;
+
+ r = cg_pid_get_path(controller, pids[i], &k);
+ if (r < 0)
+ return r;
+
+ if (path_startswith(k, path))
+ continue;
+
+ copy[j++] = pids[i];
+ }
+
+ show_pid_array(copy, j, prefix, n_columns, true, false, flags);
+
+ return 0;
+}
+
+int show_cgroup_and_extra(
+ const char *controller,
+ const char *path,
+ const char *prefix,
+ size_t n_columns,
+ const pid_t extra_pids[],
+ unsigned n_extra_pids,
+ OutputFlags flags) {
+
+ int r;
+
+ assert(path);
+
+ r = show_cgroup(controller, path, prefix, n_columns, flags);
+ if (r < 0)
+ return r;
+
+ return show_extra_pids(controller, path, prefix, n_columns, extra_pids, n_extra_pids, flags);
+}
+
+int show_cgroup_get_unit_path_and_warn(
+ sd_bus *bus,
+ const char *unit,
+ char **ret) {
+
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ path = unit_dbus_path_from_name(unit);
+ if (!path)
+ return log_oom();
+
+ r = sd_bus_get_property_string(
+ bus,
+ "org.freedesktop.systemd1",
+ path,
+ unit_dbus_interface_from_name(unit),
+ "ControlGroup",
+ &error,
+ ret);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query unit control group path: %s",
+ bus_error_message(&error, r));
+
+ return 0;
+}
+
+int show_cgroup_get_path_and_warn(
+ const char *machine,
+ const char *prefix,
+ char **ret) {
+
+ int r;
+ _cleanup_free_ char *root = NULL;
+
+ if (machine) {
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *unit = NULL;
+ const char *m;
+
+ m = strjoina("/run/systemd/machines/", machine);
+ r = parse_env_file(NULL, m, "SCOPE", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to load machine data: %m");
+
+ r = bus_connect_transport_systemd(BUS_TRANSPORT_LOCAL, NULL, false, &bus);
+ if (r < 0)
+ return bus_log_connect_error(r);
+
+ r = show_cgroup_get_unit_path_and_warn(bus, unit, &root);
+ if (r < 0)
+ return r;
+ } else {
+ r = cg_get_root_path(&root);
+ if (r == -ENOMEDIUM)
+ return log_error_errno(r, "Failed to get root control group path.\n"
+ "No cgroup filesystem mounted on /sys/fs/cgroup");
+ else if (r < 0)
+ return log_error_errno(r, "Failed to get root control group path: %m");
+ }
+
+ if (prefix) {
+ char *t;
+
+ t = strjoin(root, prefix);
+ if (!t)
+ return log_oom();
+
+ *ret = t;
+ } else
+ *ret = TAKE_PTR(root);
+
+ return 0;
+}
diff --git a/src/shared/cgroup-show.h b/src/shared/cgroup-show.h
new file mode 100644
index 0000000..fbbf766
--- /dev/null
+++ b/src/shared/cgroup-show.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-bus.h"
+
+#include "logs-show.h"
+#include "output-mode.h"
+
+int show_cgroup_by_path(const char *path, const char *prefix, size_t n_columns, OutputFlags flags);
+int show_cgroup(const char *controller, const char *path, const char *prefix, size_t n_columns, OutputFlags flags);
+
+int show_cgroup_and_extra(const char *controller, const char *path, const char *prefix, size_t n_columns, const pid_t extra_pids[], unsigned n_extra_pids, OutputFlags flags);
+
+int show_cgroup_get_unit_path_and_warn(
+ sd_bus *bus,
+ const char *unit,
+ char **ret);
+int show_cgroup_get_path_and_warn(
+ const char *machine,
+ const char *prefix,
+ char **ret);
diff --git a/src/shared/chown-recursive.c b/src/shared/chown-recursive.c
new file mode 100644
index 0000000..4563729
--- /dev/null
+++ b/src/shared/chown-recursive.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+
+#include "chown-recursive.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "stdio-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int chown_one(
+ int fd,
+ const struct stat *st,
+ uid_t uid,
+ gid_t gid,
+ mode_t mask) {
+
+ char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
+ const char *n;
+ int r;
+
+ assert(fd >= 0);
+ assert(st);
+
+ /* We change ACLs through the /proc/self/fd/%i path, so that we have a stable reference that works
+ * with O_PATH. */
+ xsprintf(procfs_path, "/proc/self/fd/%i", fd);
+
+ /* Drop any ACL if there is one */
+ FOREACH_STRING(n, "system.posix_acl_access", "system.posix_acl_default")
+ if (removexattr(procfs_path, n) < 0)
+ if (!IN_SET(errno, ENODATA, EOPNOTSUPP, ENOSYS, ENOTTY))
+ return -errno;
+
+ r = fchmod_and_chown(fd, st->st_mode & mask, uid, gid);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int chown_recursive_internal(
+ int fd,
+ const struct stat *st,
+ uid_t uid,
+ gid_t gid,
+ mode_t mask) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ bool changed = false;
+ struct dirent *de;
+ int r;
+
+ assert(fd >= 0);
+ assert(st);
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ _cleanup_close_ int path_fd = -1;
+ struct stat fst;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ /* Let's pin the child inode we want to fix now with an O_PATH fd, so that it cannot be swapped out
+ * while we manipulate it. */
+ path_fd = openat(dirfd(d), de->d_name, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (path_fd < 0)
+ return -errno;
+
+ if (fstat(path_fd, &fst) < 0)
+ return -errno;
+
+ if (S_ISDIR(fst.st_mode)) {
+ int subdir_fd;
+
+ /* Convert it to a "real" (i.e. non-O_PATH) fd now */
+ subdir_fd = fd_reopen(path_fd, O_RDONLY|O_CLOEXEC|O_NOATIME);
+ if (subdir_fd < 0)
+ return subdir_fd;
+
+ r = chown_recursive_internal(subdir_fd, &fst, uid, gid, mask); /* takes possession of subdir_fd even on failure */
+ if (r < 0)
+ return r;
+ if (r > 0)
+ changed = true;
+ } else {
+ r = chown_one(path_fd, &fst, uid, gid, mask);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ changed = true;
+ }
+ }
+
+ r = chown_one(dirfd(d), st, uid, gid, mask);
+ if (r < 0)
+ return r;
+
+ return r > 0 || changed;
+}
+
+int path_chown_recursive(
+ const char *path,
+ uid_t uid,
+ gid_t gid,
+ mode_t mask) {
+
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+
+ fd = open(path, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
+ if (fd < 0)
+ return -errno;
+
+ if (!uid_is_valid(uid) && !gid_is_valid(gid) && FLAGS_SET(mask, 07777))
+ return 0; /* nothing to do */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* Let's take a shortcut: if the top-level directory is properly owned, we don't descend into the
+ * whole tree, under the assumption that all is OK anyway. */
+ if ((!uid_is_valid(uid) || st.st_uid == uid) &&
+ (!gid_is_valid(gid) || st.st_gid == gid) &&
+ ((st.st_mode & ~mask & 07777) == 0))
+ return 0;
+
+ return chown_recursive_internal(TAKE_FD(fd), &st, uid, gid, mask); /* we donate the fd to the call, regardless if it succeeded or failed */
+}
+
+int fd_chown_recursive(
+ int fd,
+ uid_t uid,
+ gid_t gid,
+ mode_t mask) {
+
+ int duplicated_fd = -1;
+ struct stat st;
+
+ /* Note that the slightly different order of fstat() and the checks here and in
+ * path_chown_recursive(). That's because when we open the directory ourselves we can specify
+ * O_DIRECTORY and we always want to ensure we are operating on a directory before deciding whether
+ * the operation is otherwise redundant. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return -ENOTDIR;
+
+ if (!uid_is_valid(uid) && !gid_is_valid(gid) && FLAGS_SET(mask, 07777))
+ return 0; /* nothing to do */
+
+ /* Shortcut, as above */
+ if ((!uid_is_valid(uid) || st.st_uid == uid) &&
+ (!gid_is_valid(gid) || st.st_gid == gid) &&
+ ((st.st_mode & ~mask & 07777) == 0))
+ return 0;
+
+ /* Let's duplicate the fd here, as opendir() wants to take possession of it and close it afterwards */
+ duplicated_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (duplicated_fd < 0)
+ return -errno;
+
+ return chown_recursive_internal(duplicated_fd, &st, uid, gid, mask); /* fd donated even on failure */
+}
diff --git a/src/shared/chown-recursive.h b/src/shared/chown-recursive.h
new file mode 100644
index 0000000..00038c3
--- /dev/null
+++ b/src/shared/chown-recursive.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+int path_chown_recursive(const char *path, uid_t uid, gid_t gid, mode_t mask);
+
+int fd_chown_recursive(int fd, uid_t uid, gid_t gid, mode_t mask);
diff --git a/src/shared/clean-ipc.c b/src/shared/clean-ipc.c
new file mode 100644
index 0000000..77fe227
--- /dev/null
+++ b/src/shared/clean-ipc.c
@@ -0,0 +1,454 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <mqueue.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/ipc.h>
+#include <sys/msg.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "clean-ipc.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-util.h"
+#include "log.h"
+#include "macro.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+static bool match_uid_gid(uid_t subject_uid, gid_t subject_gid, uid_t delete_uid, gid_t delete_gid) {
+
+ if (uid_is_valid(delete_uid) && subject_uid == delete_uid)
+ return true;
+
+ if (gid_is_valid(delete_gid) && subject_gid == delete_gid)
+ return true;
+
+ return false;
+}
+
+static int clean_sysvipc_shm(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/shm", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/shm: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ unsigned n_attached;
+ pid_t cpid, lpid;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ int shmid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to read /proc/sysvipc/shm: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u " PID_FMT " " PID_FMT " %u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &shmid, &cpid, &lpid, &n_attached, &uid, &gid, &cuid, &cgid) != 8)
+ continue;
+
+ if (n_attached > 0)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (shmctl(shmid, IPC_RMID, NULL) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV shared memory segment %i: %m",
+ shmid);
+ } else {
+ log_debug("Removed SysV shared memory segment %i.", shmid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_sysvipc_sem(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/sem", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/sem: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ int semid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/sysvipc/sem: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &semid, &uid, &gid, &cuid, &cgid) != 5)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (semctl(semid, 0, IPC_RMID) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV semaphores object %i: %m",
+ semid);
+ } else {
+ log_debug("Removed SysV semaphore %i.", semid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_sysvipc_msg(uid_t delete_uid, gid_t delete_gid, bool rm) {
+ _cleanup_fclose_ FILE *f = NULL;
+ bool first = true;
+ int ret = 0, r;
+
+ f = fopen("/proc/sysvipc/msg", "re");
+ if (!f) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /proc/sysvipc/msg: %m");
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ uid_t uid, cuid;
+ gid_t gid, cgid;
+ pid_t cpid, lpid;
+ int msgid;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/sysvipc/msg: %m");
+ if (r == 0)
+ break;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (sscanf(line, "%*i %i %*o %*u %*u " PID_FMT " " PID_FMT " " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT,
+ &msgid, &cpid, &lpid, &uid, &gid, &cuid, &cgid) != 7)
+ continue;
+
+ if (!match_uid_gid(uid, gid, delete_uid, delete_gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (msgctl(msgid, IPC_RMID, NULL) < 0) {
+
+ /* Ignore entries that are already deleted */
+ if (IN_SET(errno, EIDRM, EINVAL))
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to remove SysV message queue %i: %m",
+ msgid);
+ } else {
+ log_debug("Removed SysV message queue %i.", msgid);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int clean_posix_shm_internal(const char *dirname, DIR *dir, uid_t uid, gid_t gid, bool rm) {
+ struct dirent *de;
+ int ret = 0, r;
+
+ assert(dir);
+
+ FOREACH_DIRENT_ALL(de, dir, goto fail) {
+ struct stat st;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to stat() POSIX shared memory segment %s/%s: %m",
+ dirname, de->d_name);
+ continue;
+ }
+
+ if (S_ISDIR(st.st_mode)) {
+ _cleanup_closedir_ DIR *kid;
+
+ kid = xopendirat(dirfd(dir), de->d_name, O_NOFOLLOW|O_NOATIME);
+ if (!kid) {
+ if (errno != ENOENT)
+ ret = log_warning_errno(errno, "Failed to enter shared memory directory %s/%s: %m",
+ dirname, de->d_name);
+ } else {
+ r = clean_posix_shm_internal(de->d_name, kid, uid, gid, rm);
+ if (r < 0)
+ ret = r;
+ }
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (unlinkat(dirfd(dir), de->d_name, AT_REMOVEDIR) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to remove POSIX shared memory directory %s/%s: %m",
+ dirname, de->d_name);
+ } else {
+ log_debug("Removed POSIX shared memory directory %s", de->d_name);
+ if (ret == 0)
+ ret = 1;
+ }
+ } else {
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ if (unlinkat(dirfd(dir), de->d_name, 0) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno, "Failed to remove POSIX shared memory segment %s: %m", de->d_name);
+ } else {
+ log_debug("Removed POSIX shared memory segment %s", de->d_name);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+ }
+
+ return ret;
+
+fail:
+ return log_warning_errno(errno, "Failed to read /dev/shm: %m");
+}
+
+static int clean_posix_shm(uid_t uid, gid_t gid, bool rm) {
+ _cleanup_closedir_ DIR *dir = NULL;
+
+ dir = opendir("/dev/shm");
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /dev/shm: %m");
+ }
+
+ return clean_posix_shm_internal("/dev/shm", dir, uid, gid, rm);
+}
+
+static int clean_posix_mq(uid_t uid, gid_t gid, bool rm) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ struct dirent *de;
+ int ret = 0;
+
+ dir = opendir("/dev/mqueue");
+ if (!dir) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to open /dev/mqueue: %m");
+ }
+
+ FOREACH_DIRENT_ALL(de, dir, goto fail) {
+ struct stat st;
+ char fn[1+strlen(de->d_name)+1];
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to stat() MQ segment %s: %m",
+ de->d_name);
+ continue;
+ }
+
+ if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid))
+ continue;
+
+ if (!rm)
+ return 1;
+
+ fn[0] = '/';
+ strcpy(fn+1, de->d_name);
+
+ if (mq_unlink(fn) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ ret = log_warning_errno(errno,
+ "Failed to unlink POSIX message queue %s: %m",
+ fn);
+ } else {
+ log_debug("Removed POSIX message queue %s", fn);
+ if (ret == 0)
+ ret = 1;
+ }
+ }
+
+ return ret;
+
+fail:
+ return log_warning_errno(errno, "Failed to read /dev/mqueue: %m");
+}
+
+int clean_ipc_internal(uid_t uid, gid_t gid, bool rm) {
+ int ret = 0, r;
+
+ /* If 'rm' is true, clean all IPC objects owned by either the specified UID or the specified GID. Return the
+ * last error encountered or == 0 if no matching IPC objects have been found or > 0 if matching IPC objects
+ * have been found and have been removed.
+ *
+ * If 'rm' is false, just search for IPC objects owned by either the specified UID or the specified GID. In
+ * this case we return < 0 on error, > 0 if we found a matching object, == 0 if we didn't.
+ *
+ * As special rule: if UID/GID is specified as root we'll silently not clean up things, and always claim that
+ * there are IPC objects for it. */
+
+ if (uid == 0) {
+ if (!rm)
+ return 1;
+
+ uid = UID_INVALID;
+ }
+ if (gid == 0) {
+ if (!rm)
+ return 1;
+
+ gid = GID_INVALID;
+ }
+
+ /* Anything to do? */
+ if (!uid_is_valid(uid) && !gid_is_valid(gid))
+ return 0;
+
+ r = clean_sysvipc_shm(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_sysvipc_sem(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_sysvipc_msg(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_posix_shm(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ r = clean_posix_mq(uid, gid, rm);
+ if (r != 0) {
+ if (!rm)
+ return r;
+ if (ret == 0)
+ ret = r;
+ }
+
+ return ret;
+}
+
+int clean_ipc_by_uid(uid_t uid) {
+ return clean_ipc_internal(uid, GID_INVALID, true);
+}
+
+int clean_ipc_by_gid(gid_t gid) {
+ return clean_ipc_internal(UID_INVALID, gid, true);
+}
diff --git a/src/shared/clean-ipc.h b/src/shared/clean-ipc.h
new file mode 100644
index 0000000..ed348fb
--- /dev/null
+++ b/src/shared/clean-ipc.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+#include "user-util.h"
+
+int clean_ipc_internal(uid_t uid, gid_t gid, bool rm);
+
+/* Remove all IPC objects owned by the specified UID or GID */
+int clean_ipc_by_uid(uid_t uid);
+int clean_ipc_by_gid(gid_t gid);
+
+/* Check if any IPC object owned by the specified UID or GID exists, returns > 0 if so, == 0 if not */
+static inline int search_ipc(uid_t uid, gid_t gid) {
+ return clean_ipc_internal(uid, gid, false);
+}
diff --git a/src/shared/clock-util.c b/src/shared/clock-util.c
new file mode 100644
index 0000000..2caa70f
--- /dev/null
+++ b/src/shared/clock-util.c
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <time.h>
+#include <linux/rtc.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "alloc-util.h"
+#include "clock-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "string-util.h"
+
+int clock_get_hwclock(struct tm *tm) {
+ _cleanup_close_ int fd = -1;
+
+ assert(tm);
+
+ fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ /* This leaves the timezone fields of struct tm
+ * uninitialized! */
+ if (ioctl(fd, RTC_RD_TIME, tm) < 0)
+ return -errno;
+
+ /* We don't know daylight saving, so we reset this in order not
+ * to confuse mktime(). */
+ tm->tm_isdst = -1;
+
+ return 0;
+}
+
+int clock_set_hwclock(const struct tm *tm) {
+ _cleanup_close_ int fd = -1;
+
+ assert(tm);
+
+ fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (ioctl(fd, RTC_SET_TIME, tm) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int clock_is_localtime(const char* adjtime_path) {
+ _cleanup_fclose_ FILE *f;
+ int r;
+
+ if (!adjtime_path)
+ adjtime_path = "/etc/adjtime";
+
+ /*
+ * The third line of adjtime is "UTC" or "LOCAL" or nothing.
+ * # /etc/adjtime
+ * 0.0 0 0
+ * 0
+ * UTC
+ */
+ f = fopen(adjtime_path, "re");
+ if (f) {
+ _cleanup_free_ char *line = NULL;
+ unsigned i;
+
+ for (i = 0; i < 2; i++) { /* skip the first two lines */
+ r = read_line(f, LONG_LINE_MAX, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false; /* less than three lines → default to UTC */
+ }
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false; /* less than three lines → default to UTC */
+
+ return streq(line, "LOCAL");
+
+ } else if (errno != ENOENT)
+ return -errno;
+
+ /* adjtime not present → default to UTC */
+ return false;
+}
+
+int clock_set_timezone(int *min) {
+ const struct timeval *tv_null = NULL;
+ struct timespec ts;
+ struct tm tm;
+ int minutesdelta;
+ struct timezone tz;
+
+ assert_se(clock_gettime(CLOCK_REALTIME, &ts) == 0);
+ assert_se(localtime_r(&ts.tv_sec, &tm));
+ minutesdelta = tm.tm_gmtoff / 60;
+
+ tz.tz_minuteswest = -minutesdelta;
+ tz.tz_dsttime = 0; /* DST_NONE */
+
+ /*
+ * If the RTC does not run in UTC but in local time, the very first
+ * call to settimeofday() will set the kernel's timezone and will warp the
+ * system clock, so that it runs in UTC instead of the local time we
+ * have read from the RTC.
+ */
+ if (settimeofday(tv_null, &tz) < 0)
+ return negative_errno();
+
+ if (min)
+ *min = minutesdelta;
+ return 0;
+}
+
+int clock_reset_timewarp(void) {
+ const struct timeval *tv_null = NULL;
+ struct timezone tz;
+
+ tz.tz_minuteswest = 0;
+ tz.tz_dsttime = 0; /* DST_NONE */
+
+ /*
+ * The very first call to settimeofday() does time warp magic. Do a
+ * dummy call here, so the time warping is sealed and all later calls
+ * behave as expected.
+ */
+ if (settimeofday(tv_null, &tz) < 0)
+ return -errno;
+
+ return 0;
+}
+
+#define EPOCH_FILE "/usr/lib/clock-epoch"
+
+int clock_apply_epoch(void) {
+ struct stat st;
+ struct timespec ts;
+ usec_t epoch_usec;
+
+ if (stat(EPOCH_FILE, &st) < 0) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Cannot stat " EPOCH_FILE ": %m");
+
+ epoch_usec = (usec_t) TIME_EPOCH * USEC_PER_SEC;
+ } else
+ epoch_usec = timespec_load(&st.st_mtim);
+
+ if (now(CLOCK_REALTIME) >= epoch_usec)
+ return 0;
+
+ if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, epoch_usec)) < 0)
+ return -errno;
+
+ return 1;
+}
diff --git a/src/shared/clock-util.h b/src/shared/clock-util.h
new file mode 100644
index 0000000..3f1ae7a
--- /dev/null
+++ b/src/shared/clock-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <time.h>
+
+int clock_is_localtime(const char* adjtime_path);
+int clock_set_timezone(int *min);
+int clock_reset_timewarp(void);
+int clock_get_hwclock(struct tm *tm);
+int clock_set_hwclock(const struct tm *tm);
+int clock_apply_epoch(void);
diff --git a/src/shared/condition.c b/src/shared/condition.c
new file mode 100644
index 0000000..b2ec690
--- /dev/null
+++ b/src/shared/condition.c
@@ -0,0 +1,973 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "apparmor-util.h"
+#include "architecture.h"
+#include "audit-util.h"
+#include "cap-list.h"
+#include "cgroup-util.h"
+#include "condition.h"
+#include "cpu-set-util.h"
+#include "efi-loader.h"
+#include "env-file.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "glob-util.h"
+#include "hostname-util.h"
+#include "ima-util.h"
+#include "limits-util.h"
+#include "list.h"
+#include "macro.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "smack-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "tomoyo-util.h"
+#include "user-record.h"
+#include "user-util.h"
+#include "util.h"
+#include "virt.h"
+
+Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate) {
+ Condition *c;
+
+ assert(type >= 0);
+ assert(type < _CONDITION_TYPE_MAX);
+ assert(parameter);
+
+ c = new(Condition, 1);
+ if (!c)
+ return NULL;
+
+ *c = (Condition) {
+ .type = type,
+ .trigger = trigger,
+ .negate = negate,
+ };
+
+ if (parameter) {
+ c->parameter = strdup(parameter);
+ if (!c->parameter)
+ return mfree(c);
+ }
+
+ return c;
+}
+
+Condition* condition_free(Condition *c) {
+ assert(c);
+
+ free(c->parameter);
+ return mfree(c);
+}
+
+Condition* condition_free_list_type(Condition *head, ConditionType type) {
+ Condition *c, *n;
+
+ LIST_FOREACH_SAFE(conditions, c, n, head)
+ if (type < 0 || c->type == type) {
+ LIST_REMOVE(conditions, head, c);
+ condition_free(c);
+ }
+
+ assert(type >= 0 || !head);
+ return head;
+}
+
+static int condition_test_kernel_command_line(Condition *c, char **env) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+ bool equal;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_KERNEL_COMMAND_LINE);
+
+ r = proc_cmdline(&line);
+ if (r < 0)
+ return r;
+
+ equal = strchr(c->parameter, '=');
+
+ for (p = line;;) {
+ _cleanup_free_ char *word = NULL;
+ bool found;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RELAX);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (equal)
+ found = streq(word, c->parameter);
+ else {
+ const char *f;
+
+ f = startswith(word, c->parameter);
+ found = f && IN_SET(*f, 0, '=');
+ }
+
+ if (found)
+ return true;
+ }
+
+ return false;
+}
+
+typedef enum {
+ /* Listed in order of checking. Note that some comparators are prefixes of others, hence the longest
+ * should be listed first. */
+ ORDER_LOWER_OR_EQUAL,
+ ORDER_GREATER_OR_EQUAL,
+ ORDER_LOWER,
+ ORDER_GREATER,
+ ORDER_EQUAL,
+ ORDER_UNEQUAL,
+ _ORDER_MAX,
+ _ORDER_INVALID = -1
+} OrderOperator;
+
+static OrderOperator parse_order(const char **s) {
+
+ static const char *const prefix[_ORDER_MAX] = {
+ [ORDER_LOWER_OR_EQUAL] = "<=",
+ [ORDER_GREATER_OR_EQUAL] = ">=",
+ [ORDER_LOWER] = "<",
+ [ORDER_GREATER] = ">",
+ [ORDER_EQUAL] = "=",
+ [ORDER_UNEQUAL] = "!=",
+ };
+
+ OrderOperator i;
+
+ for (i = 0; i < _ORDER_MAX; i++) {
+ const char *e;
+
+ e = startswith(*s, prefix[i]);
+ if (e) {
+ *s = e;
+ return i;
+ }
+ }
+
+ return _ORDER_INVALID;
+}
+
+static bool test_order(int k, OrderOperator p) {
+
+ switch (p) {
+
+ case ORDER_LOWER:
+ return k < 0;
+
+ case ORDER_LOWER_OR_EQUAL:
+ return k <= 0;
+
+ case ORDER_EQUAL:
+ return k == 0;
+
+ case ORDER_UNEQUAL:
+ return k != 0;
+
+ case ORDER_GREATER_OR_EQUAL:
+ return k >= 0;
+
+ case ORDER_GREATER:
+ return k > 0;
+
+ default:
+ assert_not_reached("unknown order");
+
+ }
+}
+
+static int condition_test_kernel_version(Condition *c, char **env) {
+ OrderOperator order;
+ struct utsname u;
+ const char *p;
+ bool first = true;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_KERNEL_VERSION);
+
+ assert_se(uname(&u) >= 0);
+
+ p = c->parameter;
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ const char *s;
+ int r;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse condition string \"%s\": %m", p);
+ if (r == 0)
+ break;
+
+ s = strstrip(word);
+ order = parse_order(&s);
+ if (order >= 0) {
+ s += strspn(s, WHITESPACE);
+ if (isempty(s)) {
+ if (first) {
+ /* For backwards compatibility, allow whitespace between the operator and
+ * value, without quoting, but only in the first expression. */
+ word = mfree(word);
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse condition string \"%s\": %m", p);
+ if (r == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected end of expression: %s", p);
+ s = word;
+ } else
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected end of expression: %s", p);
+ }
+
+ r = test_order(str_verscmp(u.release, s), order);
+ } else
+ /* No prefix? Then treat as glob string */
+ r = fnmatch(s, u.release, 0) == 0;
+
+ if (r == 0)
+ return false;
+
+ first = false;
+ }
+
+ return true;
+}
+
+static int condition_test_memory(Condition *c, char **env) {
+ OrderOperator order;
+ uint64_t m, k;
+ const char *p;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_MEMORY);
+
+ m = physical_memory();
+
+ p = c->parameter;
+ order = parse_order(&p);
+ if (order < 0)
+ order = ORDER_GREATER_OR_EQUAL; /* default to >= check, if nothing is specified. */
+
+ r = safe_atou64(p, &k);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse size: %m");
+
+ return test_order(CMP(m, k), order);
+}
+
+static int condition_test_cpus(Condition *c, char **env) {
+ OrderOperator order;
+ const char *p;
+ unsigned k;
+ int r, n;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_CPUS);
+
+ n = cpus_in_affinity_mask();
+ if (n < 0)
+ return log_debug_errno(n, "Failed to determine CPUs in affinity mask: %m");
+
+ p = c->parameter;
+ order = parse_order(&p);
+ if (order < 0)
+ order = ORDER_GREATER_OR_EQUAL; /* default to >= check, if nothing is specified. */
+
+ r = safe_atou(p, &k);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse number of CPUs: %m");
+
+ return test_order(CMP((unsigned) n, k), order);
+}
+
+static int condition_test_user(Condition *c, char **env) {
+ uid_t id;
+ int r;
+ _cleanup_free_ char *username = NULL;
+ const char *u;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_USER);
+
+ r = parse_uid(c->parameter, &id);
+ if (r >= 0)
+ return id == getuid() || id == geteuid();
+
+ if (streq("@system", c->parameter))
+ return uid_is_system(getuid()) || uid_is_system(geteuid());
+
+ username = getusername_malloc();
+ if (!username)
+ return -ENOMEM;
+
+ if (streq(username, c->parameter))
+ return 1;
+
+ if (getpid_cached() == 1)
+ return streq(c->parameter, "root");
+
+ u = c->parameter;
+ r = get_user_creds(&u, &id, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING);
+ if (r < 0)
+ return 0;
+
+ return id == getuid() || id == geteuid();
+}
+
+static int condition_test_control_group_controller(Condition *c, char **env) {
+ int r;
+ CGroupMask system_mask, wanted_mask = 0;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_CONTROL_GROUP_CONTROLLER);
+
+ r = cg_mask_supported(&system_mask);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine supported controllers: %m");
+
+ r = cg_mask_from_string(c->parameter, &wanted_mask);
+ if (r < 0 || wanted_mask <= 0) {
+ /* This won't catch the case that we have an unknown controller
+ * mixed in with valid ones -- these are only assessed on the
+ * validity of the valid controllers found. */
+ log_debug("Failed to parse cgroup string: %s", c->parameter);
+ return 1;
+ }
+
+ return FLAGS_SET(system_mask, wanted_mask);
+}
+
+static int condition_test_group(Condition *c, char **env) {
+ gid_t id;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_GROUP);
+
+ r = parse_gid(c->parameter, &id);
+ if (r >= 0)
+ return in_gid(id);
+
+ /* Avoid any NSS lookups if we are PID1 */
+ if (getpid_cached() == 1)
+ return streq(c->parameter, "root");
+
+ return in_group(c->parameter) > 0;
+}
+
+static int condition_test_virtualization(Condition *c, char **env) {
+ int b, v;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_VIRTUALIZATION);
+
+ if (streq(c->parameter, "private-users"))
+ return running_in_userns();
+
+ v = detect_virtualization();
+ if (v < 0)
+ return v;
+
+ /* First, compare with yes/no */
+ b = parse_boolean(c->parameter);
+ if (b >= 0)
+ return b == !!v;
+
+ /* Then, compare categorization */
+ if (streq(c->parameter, "vm"))
+ return VIRTUALIZATION_IS_VM(v);
+
+ if (streq(c->parameter, "container"))
+ return VIRTUALIZATION_IS_CONTAINER(v);
+
+ /* Finally compare id */
+ return v != VIRTUALIZATION_NONE && streq(c->parameter, virtualization_to_string(v));
+}
+
+static int condition_test_architecture(Condition *c, char **env) {
+ int a, b;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_ARCHITECTURE);
+
+ a = uname_architecture();
+ if (a < 0)
+ return a;
+
+ if (streq(c->parameter, "native"))
+ b = native_architecture();
+ else {
+ b = architecture_from_string(c->parameter);
+ if (b < 0) /* unknown architecture? Then it's definitely not ours */
+ return false;
+ }
+
+ return a == b;
+}
+
+static int condition_test_host(Condition *c, char **env) {
+ _cleanup_free_ char *h = NULL;
+ sd_id128_t x, y;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_HOST);
+
+ if (sd_id128_from_string(c->parameter, &x) >= 0) {
+
+ r = sd_id128_get_machine(&y);
+ if (r < 0)
+ return r;
+
+ return sd_id128_equal(x, y);
+ }
+
+ h = gethostname_malloc();
+ if (!h)
+ return -ENOMEM;
+
+ return fnmatch(c->parameter, h, FNM_CASEFOLD) == 0;
+}
+
+static int condition_test_ac_power(Condition *c, char **env) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_AC_POWER);
+
+ r = parse_boolean(c->parameter);
+ if (r < 0)
+ return r;
+
+ return (on_ac_power() != 0) == !!r;
+}
+
+static int condition_test_security(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_SECURITY);
+
+ if (streq(c->parameter, "selinux"))
+ return mac_selinux_use();
+ if (streq(c->parameter, "smack"))
+ return mac_smack_use();
+ if (streq(c->parameter, "apparmor"))
+ return mac_apparmor_use();
+ if (streq(c->parameter, "audit"))
+ return use_audit();
+ if (streq(c->parameter, "ima"))
+ return use_ima();
+ if (streq(c->parameter, "tomoyo"))
+ return mac_tomoyo_use();
+ if (streq(c->parameter, "uefi-secureboot"))
+ return is_efi_secure_boot();
+
+ return false;
+}
+
+static int condition_test_capability(Condition *c, char **env) {
+ unsigned long long capabilities = (unsigned long long) -1;
+ _cleanup_fclose_ FILE *f = NULL;
+ int value, r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_CAPABILITY);
+
+ /* If it's an invalid capability, we don't have it */
+ value = capability_from_name(c->parameter);
+ if (value < 0)
+ return -EINVAL;
+
+ /* If it's a valid capability we default to assume
+ * that we have it */
+
+ f = fopen("/proc/self/status", "re");
+ if (!f)
+ return -errno;
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ p = startswith(line, "CapBnd:");
+ if (p) {
+ if (sscanf(line+7, "%llx", &capabilities) != 1)
+ return -EIO;
+
+ break;
+ }
+ }
+
+ return !!(capabilities & (1ULL << value));
+}
+
+static int condition_test_needs_update(Condition *c, char **env) {
+ struct stat usr, other;
+ const char *p;
+ bool b;
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_NEEDS_UPDATE);
+
+ r = proc_cmdline_get_bool("systemd.condition-needs-update", &b);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse systemd.condition-needs-update= kernel command line argument, ignoring: %m");
+ if (r > 0)
+ return b;
+
+ if (!path_is_absolute(c->parameter)) {
+ log_debug("Specified condition parameter '%s' is not absolute, assuming an update is needed.", c->parameter);
+ return true;
+ }
+
+ /* If the file system is read-only we shouldn't suggest an update */
+ r = path_is_read_only_fs(c->parameter);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine if '%s' is read-only, ignoring: %m", c->parameter);
+ if (r > 0)
+ return false;
+
+ /* Any other failure means we should allow the condition to be true, so that we rather invoke too
+ * many update tools than too few. */
+
+ p = strjoina(c->parameter, "/.updated");
+ if (lstat(p, &other) < 0) {
+ if (errno != ENOENT)
+ log_debug_errno(errno, "Failed to stat() '%s', assuming an update is needed: %m", p);
+ return true;
+ }
+
+ if (lstat("/usr/", &usr) < 0) {
+ log_debug_errno(errno, "Failed to stat() /usr/, assuming an update is needed: %m");
+ return true;
+ }
+
+ /*
+ * First, compare seconds as they are always accurate...
+ */
+ if (usr.st_mtim.tv_sec != other.st_mtim.tv_sec)
+ return usr.st_mtim.tv_sec > other.st_mtim.tv_sec;
+
+ /*
+ * ...then compare nanoseconds.
+ *
+ * A false positive is only possible when /usr's nanoseconds > 0
+ * (otherwise /usr cannot be strictly newer than the target file)
+ * AND the target file's nanoseconds == 0
+ * (otherwise the filesystem supports nsec timestamps, see stat(2)).
+ */
+ if (usr.st_mtim.tv_nsec == 0 || other.st_mtim.tv_nsec > 0)
+ return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec;
+
+ _cleanup_free_ char *timestamp_str = NULL;
+ r = parse_env_file(NULL, p, "TIMESTAMP_NSEC", &timestamp_str);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse timestamp file '%s', using mtime: %m", p);
+ return true;
+ } else if (r == 0) {
+ log_debug("No data in timestamp file '%s', using mtime.", p);
+ return true;
+ }
+
+ uint64_t timestamp;
+ r = safe_atou64(timestamp_str, &timestamp);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", timestamp_str, p);
+ return true;
+ }
+
+ return timespec_load_nsec(&usr.st_mtim) > timestamp;
+}
+
+static int condition_test_first_boot(Condition *c, char **env) {
+ int r, q;
+ bool b;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FIRST_BOOT);
+
+ r = proc_cmdline_get_bool("systemd.condition-first-boot", &b);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse systemd.condition-first-boot= kernel command line argument, ignoring: %m");
+ if (r > 0)
+ return b == !!r;
+
+ r = parse_boolean(c->parameter);
+ if (r < 0)
+ return r;
+
+ q = access("/run/systemd/first-boot", F_OK);
+ if (q < 0 && errno != ENOENT)
+ log_debug_errno(errno, "Failed to check if /run/systemd/first-boot exists, ignoring: %m");
+
+ return (q >= 0) == !!r;
+}
+
+static int condition_test_environment(Condition *c, char **env) {
+ bool equal;
+ char **i;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_ENVIRONMENT);
+
+ equal = strchr(c->parameter, '=');
+
+ STRV_FOREACH(i, env) {
+ bool found;
+
+ if (equal)
+ found = streq(c->parameter, *i);
+ else {
+ const char *f;
+
+ f = startswith(*i, c->parameter);
+ found = f && IN_SET(*f, 0, '=');
+ }
+
+ if (found)
+ return true;
+ }
+
+ return false;
+}
+
+static int condition_test_path_exists(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_EXISTS);
+
+ return access(c->parameter, F_OK) >= 0;
+}
+
+static int condition_test_path_exists_glob(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_EXISTS_GLOB);
+
+ return glob_exists(c->parameter) > 0;
+}
+
+static int condition_test_path_is_directory(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_DIRECTORY);
+
+ return is_dir(c->parameter, true) > 0;
+}
+
+static int condition_test_path_is_symbolic_link(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_SYMBOLIC_LINK);
+
+ return is_symlink(c->parameter) > 0;
+}
+
+static int condition_test_path_is_mount_point(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_MOUNT_POINT);
+
+ return path_is_mount_point(c->parameter, NULL, AT_SYMLINK_FOLLOW) > 0;
+}
+
+static int condition_test_path_is_read_write(Condition *c, char **env) {
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_READ_WRITE);
+
+ return path_is_read_only_fs(c->parameter) <= 0;
+}
+
+static int condition_test_path_is_encrypted(Condition *c, char **env) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_PATH_IS_ENCRYPTED);
+
+ r = path_is_encrypted(c->parameter);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to determine if '%s' is encrypted: %m", c->parameter);
+
+ return r > 0;
+}
+
+static int condition_test_directory_not_empty(Condition *c, char **env) {
+ int r;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_DIRECTORY_NOT_EMPTY);
+
+ r = dir_is_empty(c->parameter);
+ return r <= 0 && r != -ENOENT;
+}
+
+static int condition_test_file_not_empty(Condition *c, char **env) {
+ struct stat st;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FILE_NOT_EMPTY);
+
+ return (stat(c->parameter, &st) >= 0 &&
+ S_ISREG(st.st_mode) &&
+ st.st_size > 0);
+}
+
+static int condition_test_file_is_executable(Condition *c, char **env) {
+ struct stat st;
+
+ assert(c);
+ assert(c->parameter);
+ assert(c->type == CONDITION_FILE_IS_EXECUTABLE);
+
+ return (stat(c->parameter, &st) >= 0 &&
+ S_ISREG(st.st_mode) &&
+ (st.st_mode & 0111));
+}
+
+int condition_test(Condition *c, char **env) {
+
+ static int (*const condition_tests[_CONDITION_TYPE_MAX])(Condition *c, char **env) = {
+ [CONDITION_PATH_EXISTS] = condition_test_path_exists,
+ [CONDITION_PATH_EXISTS_GLOB] = condition_test_path_exists_glob,
+ [CONDITION_PATH_IS_DIRECTORY] = condition_test_path_is_directory,
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = condition_test_path_is_symbolic_link,
+ [CONDITION_PATH_IS_MOUNT_POINT] = condition_test_path_is_mount_point,
+ [CONDITION_PATH_IS_READ_WRITE] = condition_test_path_is_read_write,
+ [CONDITION_PATH_IS_ENCRYPTED] = condition_test_path_is_encrypted,
+ [CONDITION_DIRECTORY_NOT_EMPTY] = condition_test_directory_not_empty,
+ [CONDITION_FILE_NOT_EMPTY] = condition_test_file_not_empty,
+ [CONDITION_FILE_IS_EXECUTABLE] = condition_test_file_is_executable,
+ [CONDITION_KERNEL_COMMAND_LINE] = condition_test_kernel_command_line,
+ [CONDITION_KERNEL_VERSION] = condition_test_kernel_version,
+ [CONDITION_VIRTUALIZATION] = condition_test_virtualization,
+ [CONDITION_SECURITY] = condition_test_security,
+ [CONDITION_CAPABILITY] = condition_test_capability,
+ [CONDITION_HOST] = condition_test_host,
+ [CONDITION_AC_POWER] = condition_test_ac_power,
+ [CONDITION_ARCHITECTURE] = condition_test_architecture,
+ [CONDITION_NEEDS_UPDATE] = condition_test_needs_update,
+ [CONDITION_FIRST_BOOT] = condition_test_first_boot,
+ [CONDITION_USER] = condition_test_user,
+ [CONDITION_GROUP] = condition_test_group,
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = condition_test_control_group_controller,
+ [CONDITION_CPUS] = condition_test_cpus,
+ [CONDITION_MEMORY] = condition_test_memory,
+ [CONDITION_ENVIRONMENT] = condition_test_environment,
+ };
+
+ int r, b;
+
+ assert(c);
+ assert(c->type >= 0);
+ assert(c->type < _CONDITION_TYPE_MAX);
+
+ r = condition_tests[c->type](c, env);
+ if (r < 0) {
+ c->result = CONDITION_ERROR;
+ return r;
+ }
+
+ b = (r > 0) == !c->negate;
+ c->result = b ? CONDITION_SUCCEEDED : CONDITION_FAILED;
+ return b;
+}
+
+bool condition_test_list(
+ Condition *first,
+ char **env,
+ condition_to_string_t to_string,
+ condition_test_logger_t logger,
+ void *userdata) {
+
+ Condition *c;
+ int triggered = -1;
+
+ assert(!!logger == !!to_string);
+
+ /* If the condition list is empty, then it is true */
+ if (!first)
+ return true;
+
+ /* Otherwise, if all of the non-trigger conditions apply and
+ * if any of the trigger conditions apply (unless there are
+ * none) we return true */
+ LIST_FOREACH(conditions, c, first) {
+ int r;
+
+ r = condition_test(c, env);
+
+ if (logger) {
+ if (r < 0)
+ logger(userdata, LOG_WARNING, r, PROJECT_FILE, __LINE__, __func__,
+ "Couldn't determine result for %s=%s%s%s, assuming failed: %m",
+ to_string(c->type),
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->parameter);
+ else
+ logger(userdata, LOG_DEBUG, 0, PROJECT_FILE, __LINE__, __func__,
+ "%s=%s%s%s %s.",
+ to_string(c->type),
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->parameter,
+ condition_result_to_string(c->result));
+ }
+
+ if (!c->trigger && r <= 0)
+ return false;
+
+ if (c->trigger && triggered <= 0)
+ triggered = r > 0;
+ }
+
+ return triggered != 0;
+}
+
+void condition_dump(Condition *c, FILE *f, const char *prefix, condition_to_string_t to_string) {
+ assert(c);
+ assert(f);
+ assert(to_string);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%s\t%s: %s%s%s %s\n",
+ prefix,
+ to_string(c->type),
+ c->trigger ? "|" : "",
+ c->negate ? "!" : "",
+ c->parameter,
+ condition_result_to_string(c->result));
+}
+
+void condition_dump_list(Condition *first, FILE *f, const char *prefix, condition_to_string_t to_string) {
+ Condition *c;
+
+ LIST_FOREACH(conditions, c, first)
+ condition_dump(c, f, prefix, to_string);
+}
+
+static const char* const condition_type_table[_CONDITION_TYPE_MAX] = {
+ [CONDITION_ARCHITECTURE] = "ConditionArchitecture",
+ [CONDITION_VIRTUALIZATION] = "ConditionVirtualization",
+ [CONDITION_HOST] = "ConditionHost",
+ [CONDITION_KERNEL_COMMAND_LINE] = "ConditionKernelCommandLine",
+ [CONDITION_KERNEL_VERSION] = "ConditionKernelVersion",
+ [CONDITION_SECURITY] = "ConditionSecurity",
+ [CONDITION_CAPABILITY] = "ConditionCapability",
+ [CONDITION_AC_POWER] = "ConditionACPower",
+ [CONDITION_NEEDS_UPDATE] = "ConditionNeedsUpdate",
+ [CONDITION_FIRST_BOOT] = "ConditionFirstBoot",
+ [CONDITION_PATH_EXISTS] = "ConditionPathExists",
+ [CONDITION_PATH_EXISTS_GLOB] = "ConditionPathExistsGlob",
+ [CONDITION_PATH_IS_DIRECTORY] = "ConditionPathIsDirectory",
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = "ConditionPathIsSymbolicLink",
+ [CONDITION_PATH_IS_MOUNT_POINT] = "ConditionPathIsMountPoint",
+ [CONDITION_PATH_IS_READ_WRITE] = "ConditionPathIsReadWrite",
+ [CONDITION_PATH_IS_ENCRYPTED] = "ConditionPathIsEncrypted",
+ [CONDITION_DIRECTORY_NOT_EMPTY] = "ConditionDirectoryNotEmpty",
+ [CONDITION_FILE_NOT_EMPTY] = "ConditionFileNotEmpty",
+ [CONDITION_FILE_IS_EXECUTABLE] = "ConditionFileIsExecutable",
+ [CONDITION_USER] = "ConditionUser",
+ [CONDITION_GROUP] = "ConditionGroup",
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = "ConditionControlGroupController",
+ [CONDITION_CPUS] = "ConditionCPUs",
+ [CONDITION_MEMORY] = "ConditionMemory",
+ [CONDITION_ENVIRONMENT] = "ConditionEnvironment",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(condition_type, ConditionType);
+
+static const char* const assert_type_table[_CONDITION_TYPE_MAX] = {
+ [CONDITION_ARCHITECTURE] = "AssertArchitecture",
+ [CONDITION_VIRTUALIZATION] = "AssertVirtualization",
+ [CONDITION_HOST] = "AssertHost",
+ [CONDITION_KERNEL_COMMAND_LINE] = "AssertKernelCommandLine",
+ [CONDITION_KERNEL_VERSION] = "AssertKernelVersion",
+ [CONDITION_SECURITY] = "AssertSecurity",
+ [CONDITION_CAPABILITY] = "AssertCapability",
+ [CONDITION_AC_POWER] = "AssertACPower",
+ [CONDITION_NEEDS_UPDATE] = "AssertNeedsUpdate",
+ [CONDITION_FIRST_BOOT] = "AssertFirstBoot",
+ [CONDITION_PATH_EXISTS] = "AssertPathExists",
+ [CONDITION_PATH_EXISTS_GLOB] = "AssertPathExistsGlob",
+ [CONDITION_PATH_IS_DIRECTORY] = "AssertPathIsDirectory",
+ [CONDITION_PATH_IS_SYMBOLIC_LINK] = "AssertPathIsSymbolicLink",
+ [CONDITION_PATH_IS_MOUNT_POINT] = "AssertPathIsMountPoint",
+ [CONDITION_PATH_IS_READ_WRITE] = "AssertPathIsReadWrite",
+ [CONDITION_PATH_IS_ENCRYPTED] = "AssertPathIsEncrypted",
+ [CONDITION_DIRECTORY_NOT_EMPTY] = "AssertDirectoryNotEmpty",
+ [CONDITION_FILE_NOT_EMPTY] = "AssertFileNotEmpty",
+ [CONDITION_FILE_IS_EXECUTABLE] = "AssertFileIsExecutable",
+ [CONDITION_USER] = "AssertUser",
+ [CONDITION_GROUP] = "AssertGroup",
+ [CONDITION_CONTROL_GROUP_CONTROLLER] = "AssertControlGroupController",
+ [CONDITION_CPUS] = "AssertCPUs",
+ [CONDITION_MEMORY] = "AssertMemory",
+ [CONDITION_ENVIRONMENT] = "AssertEnvironment",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(assert_type, ConditionType);
+
+static const char* const condition_result_table[_CONDITION_RESULT_MAX] = {
+ [CONDITION_UNTESTED] = "untested",
+ [CONDITION_SUCCEEDED] = "succeeded",
+ [CONDITION_FAILED] = "failed",
+ [CONDITION_ERROR] = "error",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(condition_result, ConditionResult);
diff --git a/src/shared/condition.h b/src/shared/condition.h
new file mode 100644
index 0000000..0d9754e
--- /dev/null
+++ b/src/shared/condition.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "list.h"
+#include "macro.h"
+
+typedef enum ConditionType {
+ CONDITION_ARCHITECTURE,
+ CONDITION_VIRTUALIZATION,
+ CONDITION_HOST,
+ CONDITION_KERNEL_COMMAND_LINE,
+ CONDITION_KERNEL_VERSION,
+ CONDITION_SECURITY,
+ CONDITION_CAPABILITY,
+ CONDITION_AC_POWER,
+ CONDITION_MEMORY,
+ CONDITION_CPUS,
+ CONDITION_ENVIRONMENT,
+
+ CONDITION_NEEDS_UPDATE,
+ CONDITION_FIRST_BOOT,
+
+ CONDITION_PATH_EXISTS,
+ CONDITION_PATH_EXISTS_GLOB,
+ CONDITION_PATH_IS_DIRECTORY,
+ CONDITION_PATH_IS_SYMBOLIC_LINK,
+ CONDITION_PATH_IS_MOUNT_POINT,
+ CONDITION_PATH_IS_READ_WRITE,
+ CONDITION_PATH_IS_ENCRYPTED,
+ CONDITION_DIRECTORY_NOT_EMPTY,
+ CONDITION_FILE_NOT_EMPTY,
+ CONDITION_FILE_IS_EXECUTABLE,
+
+ CONDITION_USER,
+ CONDITION_GROUP,
+
+ CONDITION_CONTROL_GROUP_CONTROLLER,
+
+ _CONDITION_TYPE_MAX,
+ _CONDITION_TYPE_INVALID = -1
+} ConditionType;
+
+typedef enum ConditionResult {
+ CONDITION_UNTESTED,
+ CONDITION_SUCCEEDED,
+ CONDITION_FAILED,
+ CONDITION_ERROR,
+ _CONDITION_RESULT_MAX,
+ _CONDITION_RESULT_INVALID = -1
+} ConditionResult;
+
+typedef struct Condition {
+ ConditionType type:8;
+
+ bool trigger:1;
+ bool negate:1;
+
+ ConditionResult result:6;
+
+ char *parameter;
+
+ LIST_FIELDS(struct Condition, conditions);
+} Condition;
+
+Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate);
+Condition* condition_free(Condition *c);
+Condition* condition_free_list_type(Condition *first, ConditionType type);
+static inline Condition* condition_free_list(Condition *first) {
+ return condition_free_list_type(first, _CONDITION_TYPE_INVALID);
+}
+
+int condition_test(Condition *c, char **env);
+
+typedef int (*condition_test_logger_t)(void *userdata, int level, int error, const char *file, int line, const char *func, const char *format, ...) _printf_(7, 8);
+typedef const char* (*condition_to_string_t)(ConditionType t) _const_;
+bool condition_test_list(Condition *first, char **env, condition_to_string_t to_string, condition_test_logger_t logger, void *userdata);
+
+void condition_dump(Condition *c, FILE *f, const char *prefix, condition_to_string_t to_string);
+void condition_dump_list(Condition *c, FILE *f, const char *prefix, condition_to_string_t to_string);
+
+const char* condition_type_to_string(ConditionType t) _const_;
+ConditionType condition_type_from_string(const char *s) _pure_;
+
+const char* assert_type_to_string(ConditionType t) _const_;
+ConditionType assert_type_from_string(const char *s) _pure_;
+
+const char* condition_result_to_string(ConditionResult r) _const_;
+ConditionResult condition_result_from_string(const char *s) _pure_;
+
+static inline bool condition_takes_path(ConditionType t) {
+ return IN_SET(t,
+ CONDITION_PATH_EXISTS,
+ CONDITION_PATH_EXISTS_GLOB,
+ CONDITION_PATH_IS_DIRECTORY,
+ CONDITION_PATH_IS_SYMBOLIC_LINK,
+ CONDITION_PATH_IS_MOUNT_POINT,
+ CONDITION_PATH_IS_READ_WRITE,
+ CONDITION_PATH_IS_ENCRYPTED,
+ CONDITION_DIRECTORY_NOT_EMPTY,
+ CONDITION_FILE_NOT_EMPTY,
+ CONDITION_FILE_IS_EXECUTABLE,
+ CONDITION_NEEDS_UPDATE);
+}
diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c
new file mode 100644
index 0000000..35d301d
--- /dev/null
+++ b/src/shared/conf-parser.c
@@ -0,0 +1,1247 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "macro.h"
+#include "missing_network.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "sd-id128.h"
+#include "signal-util.h"
+#include "socket-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "syslog-util.h"
+#include "time-util.h"
+#include "utf8.h"
+
+int config_item_table_lookup(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata) {
+
+ const ConfigTableItem *t;
+
+ assert(table);
+ assert(lvalue);
+ assert(func);
+ assert(ltype);
+ assert(data);
+
+ for (t = table; t->lvalue; t++) {
+
+ if (!streq(lvalue, t->lvalue))
+ continue;
+
+ if (!streq_ptr(section, t->section))
+ continue;
+
+ *func = t->parse;
+ *ltype = t->ltype;
+ *data = t->data;
+ return 1;
+ }
+
+ return 0;
+}
+
+int config_item_perf_lookup(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata) {
+
+ ConfigPerfItemLookup lookup = (ConfigPerfItemLookup) table;
+ const ConfigPerfItem *p;
+
+ assert(table);
+ assert(lvalue);
+ assert(func);
+ assert(ltype);
+ assert(data);
+
+ if (section) {
+ const char *key;
+
+ key = strjoina(section, ".", lvalue);
+ p = lookup(key, strlen(key));
+ } else
+ p = lookup(lvalue, strlen(lvalue));
+ if (!p)
+ return 0;
+
+ *func = p->parse;
+ *ltype = p->ltype;
+ *data = (uint8_t*) userdata + p->offset;
+ return 1;
+}
+
+/* Run the user supplied parser for an assignment */
+static int next_assignment(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ ConfigItemLookup lookup,
+ const void *table,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ const char *rvalue,
+ ConfigParseFlags flags,
+ void *userdata) {
+
+ ConfigParserCallback func = NULL;
+ int ltype = 0;
+ void *data = NULL;
+ int r;
+
+ assert(filename);
+ assert(line > 0);
+ assert(lookup);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = lookup(table, section, lvalue, &func, &ltype, &data, userdata);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (func)
+ return func(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+
+ return 0;
+ }
+
+ /* Warn about unknown non-extension fields. */
+ if (!(flags & CONFIG_PARSE_RELAXED) && !startswith(lvalue, "X-"))
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Unknown key name '%s' in section '%s', ignoring.", lvalue, section);
+
+ return 0;
+}
+
+/* Parse a single logical line */
+static int parse_line(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ char **section,
+ unsigned *section_line,
+ bool *section_ignored,
+ char *l,
+ void *userdata) {
+
+ char *e;
+
+ assert(filename);
+ assert(line > 0);
+ assert(lookup);
+ assert(l);
+
+ l = strstrip(l);
+ if (!*l)
+ return 0;
+
+ if (*l == '\n')
+ return 0;
+
+ if (!utf8_is_valid(l))
+ return log_syntax_invalid_utf8(unit, LOG_WARNING, filename, line, l);
+
+ if (*l == '[') {
+ size_t k;
+ char *n;
+
+ k = strlen(l);
+ assert(k > 0);
+
+ if (l[k-1] != ']')
+ return log_syntax(unit, LOG_ERR, filename, line, SYNTHETIC_ERRNO(EBADMSG), "Invalid section header '%s'", l);
+
+ n = strndup(l+1, k-2);
+ if (!n)
+ return log_oom();
+
+ if (sections && !nulstr_contains(sections, n)) {
+ bool ignore = flags & CONFIG_PARSE_RELAXED;
+ const char *t;
+
+ ignore = ignore || startswith(n, "X-");
+
+ if (!ignore)
+ NULSTR_FOREACH(t, sections)
+ if (streq_ptr(n, startswith(t, "-"))) {
+ ignore = true;
+ break;
+ }
+
+ if (!ignore)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown section '%s'. Ignoring.", n);
+
+ free(n);
+ *section = mfree(*section);
+ *section_line = 0;
+ *section_ignored = true;
+ } else {
+ free_and_replace(*section, n);
+ *section_line = line;
+ *section_ignored = false;
+ }
+
+ return 0;
+ }
+
+ if (sections && !*section) {
+ if (!(flags & CONFIG_PARSE_RELAXED) && !*section_ignored)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Assignment outside of section. Ignoring.");
+
+ return 0;
+ }
+
+ e = strchr(l, '=');
+ if (!e)
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Missing '=', ignoring line.");
+ if (e == l)
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Missing key name before '=', ignoring line.");
+
+ *e = 0;
+ e++;
+
+ return next_assignment(unit,
+ filename,
+ line,
+ lookup,
+ table,
+ *section,
+ *section_line,
+ strstrip(l),
+ strstrip(e),
+ flags,
+ userdata);
+}
+
+/* Go through the file and parse each line */
+int config_parse(const char *unit,
+ const char *filename,
+ FILE *f,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime) {
+
+ _cleanup_free_ char *section = NULL, *continuation = NULL;
+ _cleanup_fclose_ FILE *ours = NULL;
+ unsigned line = 0, section_line = 0;
+ bool section_ignored = false, bom_seen = false;
+ int r, fd;
+ usec_t mtime;
+
+ assert(filename);
+ assert(lookup);
+
+ if (!f) {
+ f = ours = fopen(filename, "re");
+ if (!f) {
+ /* Only log on request, except for ENOENT,
+ * since we return 0 to the caller. */
+ if ((flags & CONFIG_PARSE_WARN) || errno == ENOENT)
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to open configuration file '%s': %m", filename);
+ return errno == ENOENT ? 0 : -errno;
+ }
+ }
+
+ fd = fileno(f);
+ if (fd >= 0) { /* stream might not have an fd, let's be careful hence */
+ struct stat st;
+
+ if (fstat(fd, &st) < 0)
+ return log_full_errno(FLAGS_SET(flags, CONFIG_PARSE_WARN) ? LOG_ERR : LOG_DEBUG, errno,
+ "Failed to fstat(%s): %m", filename);
+
+ (void) stat_warn_permissions(filename, &st);
+ mtime = timespec_load(&st.st_mtim);
+ }
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ bool escaped = false;
+ char *l, *p, *e;
+
+ r = read_line(f, LONG_LINE_MAX, &buf);
+ if (r == 0)
+ break;
+ if (r == -ENOBUFS) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_error_errno(r, "%s:%u: Line too long", filename, line);
+
+ return r;
+ }
+ if (r < 0) {
+ if (FLAGS_SET(flags, CONFIG_PARSE_WARN))
+ log_error_errno(r, "%s:%u: Error while reading configuration file: %m", filename, line);
+
+ return r;
+ }
+
+ line++;
+
+ l = skip_leading_chars(buf, WHITESPACE);
+ if (*l != '\0' && strchr(COMMENTS, *l))
+ continue;
+
+ l = buf;
+ if (!bom_seen) {
+ char *q;
+
+ q = startswith(buf, UTF8_BYTE_ORDER_MARK);
+ if (q) {
+ l = q;
+ bom_seen = true;
+ }
+ }
+
+ if (continuation) {
+ if (strlen(continuation) + strlen(l) > LONG_LINE_MAX) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_error("%s:%u: Continuation line too long", filename, line);
+ return -ENOBUFS;
+ }
+
+ if (!strextend(&continuation, l, NULL)) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_oom();
+ return -ENOMEM;
+ }
+
+ p = continuation;
+ } else
+ p = l;
+
+ for (e = p; *e; e++) {
+ if (escaped)
+ escaped = false;
+ else if (*e == '\\')
+ escaped = true;
+ }
+
+ if (escaped) {
+ *(e-1) = ' ';
+
+ if (!continuation) {
+ continuation = strdup(l);
+ if (!continuation) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_oom();
+ return -ENOMEM;
+ }
+ }
+
+ continue;
+ }
+
+ r = parse_line(unit,
+ filename,
+ line,
+ sections,
+ lookup,
+ table,
+ flags,
+ &section,
+ &section_line,
+ &section_ignored,
+ p,
+ userdata);
+ if (r < 0) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line);
+ return r;
+ }
+
+ continuation = mfree(continuation);
+ }
+
+ if (continuation) {
+ r = parse_line(unit,
+ filename,
+ ++line,
+ sections,
+ lookup,
+ table,
+ flags,
+ &section,
+ &section_line,
+ &section_ignored,
+ continuation,
+ userdata);
+ if (r < 0) {
+ if (flags & CONFIG_PARSE_WARN)
+ log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line);
+ return r;
+ }
+ }
+
+ if (ret_mtime)
+ *ret_mtime = mtime;
+
+ return 0;
+}
+
+static int config_parse_many_files(
+ const char *conf_file,
+ char **files,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime) {
+
+ usec_t mtime = 0;
+ char **fn;
+ int r;
+
+ if (conf_file) {
+ r = config_parse(NULL, conf_file, NULL, sections, lookup, table, flags, userdata, &mtime);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(fn, files) {
+ usec_t t;
+
+ r = config_parse(NULL, *fn, NULL, sections, lookup, table, flags, userdata, &t);
+ if (r < 0)
+ return r;
+ if (t > mtime) /* Find the newest */
+ mtime = t;
+ }
+
+ if (ret_mtime)
+ *ret_mtime = mtime;
+
+ return 0;
+}
+
+/* Parse each config file in the directories specified as nulstr. */
+int config_parse_many_nulstr(
+ const char *conf_file,
+ const char *conf_file_dirs,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime) {
+
+ _cleanup_strv_free_ char **files = NULL;
+ int r;
+
+ r = conf_files_list_nulstr(&files, ".conf", NULL, 0, conf_file_dirs);
+ if (r < 0)
+ return r;
+
+ return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata, ret_mtime);
+}
+
+/* Parse each config file in the directories specified as strv. */
+int config_parse_many(
+ const char *conf_file,
+ const char* const* conf_file_dirs,
+ const char *dropin_dirname,
+ const char *sections,
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime) {
+
+ _cleanup_strv_free_ char **dropin_dirs = NULL;
+ _cleanup_strv_free_ char **files = NULL;
+ const char *suffix;
+ int r;
+
+ suffix = strjoina("/", dropin_dirname);
+ r = strv_extend_strv_concat(&dropin_dirs, (char**) conf_file_dirs, suffix);
+ if (r < 0)
+ return r;
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char* const*) dropin_dirs);
+ if (r < 0)
+ return r;
+
+ return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata, ret_mtime);
+}
+
+#define DEFINE_PARSER(type, vartype, conv_func) \
+ DEFINE_CONFIG_PARSE_PTR(config_parse_##type, conv_func, vartype, "Failed to parse " #type " value")
+
+DEFINE_PARSER(int, int, safe_atoi);
+DEFINE_PARSER(long, long, safe_atoli);
+DEFINE_PARSER(uint8, uint8_t, safe_atou8);
+DEFINE_PARSER(uint16, uint16_t, safe_atou16);
+DEFINE_PARSER(uint32, uint32_t, safe_atou32);
+DEFINE_PARSER(int32, int32_t, safe_atoi32);
+DEFINE_PARSER(uint64, uint64_t, safe_atou64);
+DEFINE_PARSER(unsigned, unsigned, safe_atou);
+DEFINE_PARSER(double, double, safe_atod);
+DEFINE_PARSER(nsec, nsec_t, parse_nsec);
+DEFINE_PARSER(sec, usec_t, parse_sec);
+DEFINE_PARSER(sec_def_infinity, usec_t, parse_sec_def_infinity);
+DEFINE_PARSER(mode, mode_t, parse_mode);
+
+int config_parse_iec_size(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ size_t *sz = data;
+ uint64_t v;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1024, &v);
+ if (r >= 0 && (uint64_t) (size_t) v != v)
+ r = -ERANGE;
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ *sz = (size_t) v;
+ return 0;
+}
+
+int config_parse_si_uint64(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *sz = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1000, sz);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue);
+
+ return 0;
+}
+
+int config_parse_iec_uint64(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint64_t *bytes = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_size(rvalue, 1024, bytes);
+ if (r < 0)
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse size value, ignoring: %s", rvalue);
+
+ return 0;
+}
+
+int config_parse_bool(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int k;
+ bool *b = data;
+ bool fatal = ltype;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ k = parse_boolean(rvalue);
+ if (k < 0) {
+ log_syntax(unit, fatal ? LOG_ERR : LOG_WARNING, filename, line, k,
+ "Failed to parse boolean value%s: %s",
+ fatal ? "" : ", ignoring", rvalue);
+ return fatal ? -ENOEXEC : 0;
+ }
+
+ *b = k;
+ return 0;
+}
+
+int config_parse_id128(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ sd_id128_t t, *result = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ r = sd_id128_from_string(rvalue, &t);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse 128bit ID/UUID, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (sd_id128_is_null(t)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "128bit ID/UUID is all 0, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *result = t;
+ return 0;
+}
+
+int config_parse_tristate(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int k, *t = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ /* A tristate is pretty much a boolean, except that it can
+ * also take the special value -1, indicating "uninitialized",
+ * much like NULL is for a pointer type. */
+
+ k = parse_boolean(rvalue);
+ if (k < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, k, "Failed to parse boolean value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *t = k;
+ return 0;
+}
+
+int config_parse_string(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (free_and_strdup(s, empty_to_null(rvalue)) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_path(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_free_ char *n = NULL;
+ bool fatal = ltype;
+ char **s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue))
+ goto finalize;
+
+ n = strdup(rvalue);
+ if (!n)
+ return log_oom();
+
+ r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE | (fatal ? PATH_CHECK_FATAL : 0), unit, filename, line, lvalue);
+ if (r < 0)
+ return fatal ? -ENOEXEC : 0;
+
+finalize:
+ return free_and_replace(*s, n);
+}
+
+int config_parse_strv(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char ***sv = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *sv = strv_free(*sv);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = strv_consume(sv, word);
+ if (r < 0)
+ return log_oom();
+ }
+}
+
+int config_parse_warn_compat(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ Disabled reason = ltype;
+
+ switch(reason) {
+
+ case DISABLED_CONFIGURATION:
+ log_syntax(unit, LOG_DEBUG, filename, line, 0,
+ "Support for option %s= has been disabled at compile time and it is ignored", lvalue);
+ break;
+
+ case DISABLED_LEGACY:
+ log_syntax(unit, LOG_INFO, filename, line, 0,
+ "Support for option %s= has been removed and it is ignored", lvalue);
+ break;
+
+ case DISABLED_EXPERIMENTAL:
+ log_syntax(unit, LOG_INFO, filename, line, 0,
+ "Support for option %s= has not yet been enabled and it is ignored", lvalue);
+ break;
+ }
+
+ return 0;
+}
+
+int config_parse_log_facility(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *o = data, x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ x = log_facility_unshifted_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse log facility, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *o = (x << 3) | LOG_PRI(*o);
+
+ return 0;
+}
+
+int config_parse_log_level(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *o = data, x;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ x = log_level_from_string(rvalue);
+ if (x < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse log level, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (*o < 0) /* if it wasn't initialized so far, assume zero facility */
+ *o = x;
+ else
+ *o = (*o & LOG_FACMASK) | x;
+
+ return 0;
+}
+
+int config_parse_signal(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int *sig = data, r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(sig);
+
+ r = signal_from_string(rvalue);
+ if (r <= 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse signal name, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *sig = r;
+ return 0;
+}
+
+int config_parse_personality(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned long *personality = data, p;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(personality);
+
+ if (isempty(rvalue))
+ p = PERSONALITY_INVALID;
+ else {
+ p = personality_from_string(rvalue);
+ if (p == PERSONALITY_INVALID) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse personality, ignoring: %s", rvalue);
+ return 0;
+ }
+ }
+
+ *personality = p;
+ return 0;
+}
+
+int config_parse_ifname(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ char **s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!ifname_valid(rvalue)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Interface name is not valid or too long, ignoring assignment: %s", rvalue);
+ return 0;
+ }
+
+ r = free_and_strdup(s, rvalue);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_ifnames(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ _cleanup_strv_free_ char **names = NULL;
+ char ***s = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *s = strv_free(*s);
+ return 0;
+ }
+
+ for (const char *p = rvalue;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to extract interface name, ignoring assignment: %s",
+ rvalue);
+ return 0;
+ }
+ if (r == 0)
+ break;
+
+ if (!ifname_valid_full(word, ltype)) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Interface name is not valid or too long, ignoring assignment: %s",
+ word);
+ continue;
+ }
+
+ r = strv_consume(&names, TAKE_PTR(word));
+ if (r < 0)
+ return log_oom();
+ }
+
+ r = strv_extend_strv(s, names, true);
+ if (r < 0)
+ return log_oom();
+
+ return 0;
+}
+
+int config_parse_ip_port(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t *s = data;
+ uint16_t port;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ *s = 0;
+ return 0;
+ }
+
+ r = parse_ip_port(rvalue, &port);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse port '%s'.", rvalue);
+ return 0;
+ }
+
+ *s = port;
+
+ return 0;
+}
+
+int config_parse_mtu(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint32_t *mtu = data;
+ int r;
+
+ assert(rvalue);
+ assert(mtu);
+
+ r = parse_mtu(ltype, rvalue, mtu);
+ if (r == -ERANGE) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Maximum transfer unit (MTU) value out of range. Permitted range is %" PRIu32 "…%" PRIu32 ", ignoring: %s",
+ (uint32_t) (ltype == AF_INET6 ? IPV6_MIN_MTU : IPV4_MIN_MTU), (uint32_t) UINT32_MAX,
+ rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse MTU value '%s', ignoring: %m", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+int config_parse_rlimit(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ struct rlimit **rl = data, d = {};
+ int r;
+
+ assert(rvalue);
+ assert(rl);
+
+ r = rlimit_parse(ltype, rvalue, &d);
+ if (r == -EILSEQ) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Soft resource limit chosen higher than hard limit, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse resource value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ if (rl[ltype])
+ *rl[ltype] = d;
+ else {
+ rl[ltype] = newdup(struct rlimit, &d, 1);
+ if (!rl[ltype])
+ return log_oom();
+ }
+
+ return 0;
+}
+
+int config_parse_permille(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ unsigned *permille = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(permille);
+
+ r = parse_permille(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse permille value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ *permille = (unsigned) r;
+
+ return 0;
+}
+
+int config_parse_vlanprotocol(const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ int *vlan_protocol = data;
+ assert(filename);
+ assert(lvalue);
+
+ if (isempty(rvalue)) {
+ *vlan_protocol = -1;
+ return 0;
+ }
+
+ if (STR_IN_SET(rvalue, "802.1ad", "802.1AD"))
+ *vlan_protocol = ETH_P_8021AD;
+ else if (STR_IN_SET(rvalue, "802.1q", "802.1Q"))
+ *vlan_protocol = ETH_P_8021Q;
+ else {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse VLAN protocol value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
+
+DEFINE_CONFIG_PARSE(config_parse_percent, parse_percent, "Failed to parse percent value");
diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h
new file mode 100644
index 0000000..f115cb2
--- /dev/null
+++ b/src/shared/conf-parser.h
@@ -0,0 +1,303 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <syslog.h>
+
+#include "alloc-util.h"
+#include "log.h"
+#include "macro.h"
+#include "time-util.h"
+
+/* An abstract parser for simple, line based, shallow configuration files consisting of variable assignments only. */
+
+typedef enum ConfigParseFlags {
+ CONFIG_PARSE_RELAXED = 1 << 0, /* Do not warn about unknown non-extension fields */
+ CONFIG_PARSE_WARN = 1 << 1, /* Emit non-debug messages */
+} ConfigParseFlags;
+
+/* Argument list for parsers of specific configuration settings. */
+#define CONFIG_PARSER_ARGUMENTS \
+ const char *unit, \
+ const char *filename, \
+ unsigned line, \
+ const char *section, \
+ unsigned section_line, \
+ const char *lvalue, \
+ int ltype, \
+ const char *rvalue, \
+ void *data, \
+ void *userdata
+
+/* Prototype for a parser for a specific configuration setting */
+typedef int (*ConfigParserCallback)(CONFIG_PARSER_ARGUMENTS);
+
+/* A macro declaring a function prototype, following the typedef above, simply because it's so cumbersomely long
+ * otherwise. (And current emacs gets irritatingly slow when editing files that contain lots of very long function
+ * prototypes on the same screen…) */
+#define CONFIG_PARSER_PROTOTYPE(name) int name(CONFIG_PARSER_ARGUMENTS)
+
+/* Wraps information for parsing a specific configuration variable, to
+ * be stored in a simple array */
+typedef struct ConfigTableItem {
+ const char *section; /* Section */
+ const char *lvalue; /* Name of the variable */
+ ConfigParserCallback parse; /* Function that is called to parse the variable's value */
+ int ltype; /* Distinguish different variables passed to the same callback */
+ void *data; /* Where to store the variable's data */
+} ConfigTableItem;
+
+/* Wraps information for parsing a specific configuration variable, to
+ * be stored in a gperf perfect hashtable */
+typedef struct ConfigPerfItem {
+ const char *section_and_lvalue; /* Section + "." + name of the variable */
+ ConfigParserCallback parse; /* Function that is called to parse the variable's value */
+ int ltype; /* Distinguish different variables passed to the same callback */
+ size_t offset; /* Offset where to store data, from the beginning of userdata */
+} ConfigPerfItem;
+
+/* Prototype for a low-level gperf lookup function */
+typedef const ConfigPerfItem* (*ConfigPerfItemLookup)(const char *section_and_lvalue, unsigned length);
+
+/* Prototype for a generic high-level lookup function */
+typedef int (*ConfigItemLookup)(
+ const void *table,
+ const char *section,
+ const char *lvalue,
+ ConfigParserCallback *func,
+ int *ltype,
+ void **data,
+ void *userdata);
+
+/* Linear table search implementation of ConfigItemLookup, based on
+ * ConfigTableItem arrays */
+int config_item_table_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata);
+
+/* gperf implementation of ConfigItemLookup, based on gperf
+ * ConfigPerfItem tables */
+int config_item_perf_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata);
+
+int config_parse(
+ const char *unit,
+ const char *filename,
+ FILE *f,
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime); /* possibly NULL */
+
+int config_parse_many_nulstr(
+ const char *conf_file, /* possibly NULL */
+ const char *conf_file_dirs, /* nulstr */
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime); /* possibly NULL */
+
+int config_parse_many(
+ const char *conf_file, /* possibly NULL */
+ const char* const* conf_file_dirs,
+ const char *dropin_dirname,
+ const char *sections, /* nulstr */
+ ConfigItemLookup lookup,
+ const void *table,
+ ConfigParseFlags flags,
+ void *userdata,
+ usec_t *ret_mtime); /* possibly NULL */
+
+CONFIG_PARSER_PROTOTYPE(config_parse_int);
+CONFIG_PARSER_PROTOTYPE(config_parse_unsigned);
+CONFIG_PARSER_PROTOTYPE(config_parse_long);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint8);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint16);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint32);
+CONFIG_PARSER_PROTOTYPE(config_parse_int32);
+CONFIG_PARSER_PROTOTYPE(config_parse_uint64);
+CONFIG_PARSER_PROTOTYPE(config_parse_double);
+CONFIG_PARSER_PROTOTYPE(config_parse_iec_size);
+CONFIG_PARSER_PROTOTYPE(config_parse_si_uint64);
+CONFIG_PARSER_PROTOTYPE(config_parse_iec_uint64);
+CONFIG_PARSER_PROTOTYPE(config_parse_bool);
+CONFIG_PARSER_PROTOTYPE(config_parse_id128);
+CONFIG_PARSER_PROTOTYPE(config_parse_tristate);
+CONFIG_PARSER_PROTOTYPE(config_parse_string);
+CONFIG_PARSER_PROTOTYPE(config_parse_path);
+CONFIG_PARSER_PROTOTYPE(config_parse_strv);
+CONFIG_PARSER_PROTOTYPE(config_parse_sec);
+CONFIG_PARSER_PROTOTYPE(config_parse_sec_def_infinity);
+CONFIG_PARSER_PROTOTYPE(config_parse_sec_def_unset);
+CONFIG_PARSER_PROTOTYPE(config_parse_nsec);
+CONFIG_PARSER_PROTOTYPE(config_parse_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_warn_compat);
+CONFIG_PARSER_PROTOTYPE(config_parse_log_facility);
+CONFIG_PARSER_PROTOTYPE(config_parse_log_level);
+CONFIG_PARSER_PROTOTYPE(config_parse_signal);
+CONFIG_PARSER_PROTOTYPE(config_parse_personality);
+CONFIG_PARSER_PROTOTYPE(config_parse_permille);
+CONFIG_PARSER_PROTOTYPE(config_parse_ifname);
+CONFIG_PARSER_PROTOTYPE(config_parse_ifnames);
+CONFIG_PARSER_PROTOTYPE(config_parse_ip_port);
+CONFIG_PARSER_PROTOTYPE(config_parse_mtu);
+CONFIG_PARSER_PROTOTYPE(config_parse_rlimit);
+CONFIG_PARSER_PROTOTYPE(config_parse_vlanprotocol);
+CONFIG_PARSER_PROTOTYPE(config_parse_percent);
+
+typedef enum Disabled {
+ DISABLED_CONFIGURATION,
+ DISABLED_LEGACY,
+ DISABLED_EXPERIMENTAL,
+} Disabled;
+
+#define DEFINE_CONFIG_PARSE(function, parser, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ int *i = data, r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ r = parser(rvalue); \
+ if (r < 0) { \
+ log_syntax(unit, LOG_WARNING, filename, line, r, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = r; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_PTR(function, parser, type, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data; \
+ int r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ r = parser(rvalue, i); \
+ if (r < 0) \
+ log_syntax(unit, LOG_WARNING, filename, line, r, \
+ msg ", ignoring: %s", rvalue); \
+ \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUM_FULL(function, from_string, type, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data, x; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ x = from_string(rvalue); \
+ if (x < 0) { \
+ log_syntax(unit, LOG_WARNING, filename, line, 0, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = x; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUM(function, name, type, msg) \
+ DEFINE_CONFIG_PARSE_ENUM_FULL(function, name##_from_string, type, msg)
+
+#define DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(function, name, type, default_value, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type *i = data, x; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ if (isempty(rvalue)) { \
+ *i = default_value; \
+ return 0; \
+ } \
+ \
+ x = name##_from_string(rvalue); \
+ if (x < 0) { \
+ log_syntax(unit, LOG_WARNING, filename, line, 0, \
+ msg ", ignoring: %s", rvalue); \
+ return 0; \
+ } \
+ \
+ *i = x; \
+ return 0; \
+ }
+
+#define DEFINE_CONFIG_PARSE_ENUMV(function, name, type, invalid, msg) \
+ CONFIG_PARSER_PROTOTYPE(function) { \
+ type **enums = data; \
+ _cleanup_free_ type *xs = NULL; \
+ size_t i = 0; \
+ int r; \
+ \
+ assert(filename); \
+ assert(lvalue); \
+ assert(rvalue); \
+ assert(data); \
+ \
+ xs = new0(type, 1); \
+ if (!xs) \
+ return -ENOMEM; \
+ \
+ *xs = invalid; \
+ \
+ for (const char *p = rvalue;;) { \
+ _cleanup_free_ char *en = NULL; \
+ type x, *new_xs; \
+ \
+ r = extract_first_word(&p, &en, NULL, 0); \
+ if (r == -ENOMEM) \
+ return log_oom(); \
+ if (r < 0) \
+ return log_syntax(unit, LOG_ERR, filename, line, 0, \
+ msg ": %s", en); \
+ if (r == 0) \
+ break; \
+ \
+ if ((x = name##_from_string(en)) < 0) { \
+ log_syntax(unit, LOG_WARNING, filename, line, 0, \
+ msg ", ignoring: %s", en); \
+ continue; \
+ } \
+ \
+ for (type *ys = xs; x != invalid && *ys != invalid; ys++) \
+ if (*ys == x) { \
+ log_syntax(unit, LOG_NOTICE, filename, line, 0, \
+ "Duplicate entry, ignoring: %s", \
+ en); \
+ x = invalid; \
+ } \
+ \
+ if (x == invalid) \
+ continue; \
+ \
+ *(xs + i) = x; \
+ new_xs = realloc(xs, (++i + 1) * sizeof(type)); \
+ if (new_xs) \
+ xs = new_xs; \
+ else \
+ return log_oom(); \
+ \
+ *(xs + i) = invalid; \
+ } \
+ \
+ return free_and_replace(*enums, xs); \
+ }
diff --git a/src/shared/coredump-util.c b/src/shared/coredump-util.c
new file mode 100644
index 0000000..a0b648b
--- /dev/null
+++ b/src/shared/coredump-util.c
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "coredump-util.h"
+#include "extract-word.h"
+#include "fileio.h"
+#include "string-table.h"
+
+static const char *const coredump_filter_table[_COREDUMP_FILTER_MAX] = {
+ [COREDUMP_FILTER_PRIVATE_ANONYMOUS] = "private-anonymous",
+ [COREDUMP_FILTER_SHARED_ANONYMOUS] = "shared-anonymous",
+ [COREDUMP_FILTER_PRIVATE_FILE_BACKED] = "private-file-backed",
+ [COREDUMP_FILTER_SHARED_FILE_BACKED] = "shared-file-backed",
+ [COREDUMP_FILTER_ELF_HEADERS] = "elf-headers",
+ [COREDUMP_FILTER_PRIVATE_HUGE] = "private-huge",
+ [COREDUMP_FILTER_SHARED_HUGE] = "shared-huge",
+ [COREDUMP_FILTER_PRIVATE_DAX] = "private-dax",
+ [COREDUMP_FILTER_SHARED_DAX] = "shared-dax",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(coredump_filter, CoredumpFilter);
+
+int coredump_filter_mask_from_string(const char *s, uint64_t *ret) {
+ uint64_t m = 0;
+
+ assert(s);
+ assert(ret);
+
+ for (;;) {
+ _cleanup_free_ char *n = NULL;
+ CoredumpFilter v;
+ int r;
+
+ r = extract_first_word(&s, &n, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (streq(n, "default")) {
+ m |= COREDUMP_FILTER_MASK_DEFAULT;
+ continue;
+ }
+
+ if (streq(n, "all")) {
+ m = UINT64_MAX;
+ continue;
+ }
+
+ v = coredump_filter_from_string(n);
+ if (v >= 0) {
+ m |= 1u << v;
+ continue;
+ }
+
+ uint64_t x;
+ r = safe_atoux64(n, &x);
+ if (r < 0)
+ return r;
+
+ m |= x;
+ }
+
+ *ret = m;
+ return 0;
+}
+
+int set_coredump_filter(uint64_t value) {
+ char t[STRLEN("0xFFFFFFFF")];
+
+ sprintf(t, "0x%"PRIx64, value);
+
+ return write_string_file("/proc/self/coredump_filter", t,
+ WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER);
+}
diff --git a/src/shared/coredump-util.h b/src/shared/coredump-util.h
new file mode 100644
index 0000000..a7f3c0e
--- /dev/null
+++ b/src/shared/coredump-util.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+typedef enum CoredumpFilter {
+ COREDUMP_FILTER_PRIVATE_ANONYMOUS = 0,
+ COREDUMP_FILTER_SHARED_ANONYMOUS,
+ COREDUMP_FILTER_PRIVATE_FILE_BACKED,
+ COREDUMP_FILTER_SHARED_FILE_BACKED,
+ COREDUMP_FILTER_ELF_HEADERS,
+ COREDUMP_FILTER_PRIVATE_HUGE,
+ COREDUMP_FILTER_SHARED_HUGE,
+ COREDUMP_FILTER_PRIVATE_DAX,
+ COREDUMP_FILTER_SHARED_DAX,
+ _COREDUMP_FILTER_MAX,
+ _COREDUMP_FILTER_INVALID = -1,
+} CoredumpFilter;
+
+#define COREDUMP_FILTER_MASK_DEFAULT (1u << COREDUMP_FILTER_PRIVATE_ANONYMOUS | \
+ 1u << COREDUMP_FILTER_SHARED_ANONYMOUS | \
+ 1u << COREDUMP_FILTER_ELF_HEADERS | \
+ 1u << COREDUMP_FILTER_PRIVATE_HUGE)
+
+const char* coredump_filter_to_string(CoredumpFilter i) _const_;
+CoredumpFilter coredump_filter_from_string(const char *s) _pure_;
+int coredump_filter_mask_from_string(const char *s, uint64_t *ret);
+
+int set_coredump_filter(uint64_t value);
diff --git a/src/shared/cpu-set-util.c b/src/shared/cpu-set-util.c
new file mode 100644
index 0000000..2c3b5bb
--- /dev/null
+++ b/src/shared/cpu-set-util.c
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <syslog.h>
+
+#include "alloc-util.h"
+#include "cpu-set-util.h"
+#include "dirent-util.h"
+#include "errno-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "parse-util.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "util.h"
+
+char* cpu_set_to_string(const CPUSet *a) {
+ _cleanup_free_ char *str = NULL;
+ size_t allocated = 0, len = 0;
+ int i, r;
+
+ for (i = 0; (size_t) i < a->allocated * 8; i++) {
+ if (!CPU_ISSET_S(i, a->allocated, a->set))
+ continue;
+
+ if (!GREEDY_REALLOC(str, allocated, len + 1 + DECIMAL_STR_MAX(int)))
+ return NULL;
+
+ r = sprintf(str + len, len > 0 ? " %d" : "%d", i);
+ assert_se(r > 0);
+ len += r;
+ }
+
+ return TAKE_PTR(str) ?: strdup("");
+}
+
+char *cpu_set_to_range_string(const CPUSet *set) {
+ unsigned range_start = 0, range_end;
+ _cleanup_free_ char *str = NULL;
+ size_t allocated = 0, len = 0;
+ bool in_range = false;
+ int r;
+
+ for (unsigned i = 0; i < set->allocated * 8; i++)
+ if (CPU_ISSET_S(i, set->allocated, set->set)) {
+ if (in_range)
+ range_end++;
+ else {
+ range_start = range_end = i;
+ in_range = true;
+ }
+ } else if (in_range) {
+ in_range = false;
+
+ if (!GREEDY_REALLOC(str, allocated, len + 2 + 2 * DECIMAL_STR_MAX(unsigned)))
+ return NULL;
+
+ if (range_end > range_start)
+ r = sprintf(str + len, len > 0 ? " %d-%d" : "%d-%d", range_start, range_end);
+ else
+ r = sprintf(str + len, len > 0 ? " %d" : "%d", range_start);
+ assert_se(r > 0);
+ len += r;
+ }
+
+ if (in_range) {
+ if (!GREEDY_REALLOC(str, allocated, len + 2 + 2 * DECIMAL_STR_MAX(int)))
+ return NULL;
+
+ if (range_end > range_start)
+ r = sprintf(str + len, len > 0 ? " %d-%d" : "%d-%d", range_start, range_end);
+ else
+ r = sprintf(str + len, len > 0 ? " %d" : "%d", range_start);
+ assert_se(r > 0);
+ }
+
+ return TAKE_PTR(str) ?: strdup("");
+}
+
+int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus) {
+ size_t need;
+
+ assert(cpu_set);
+
+ need = CPU_ALLOC_SIZE(ncpus);
+ if (need > cpu_set->allocated) {
+ cpu_set_t *t;
+
+ t = realloc(cpu_set->set, need);
+ if (!t)
+ return -ENOMEM;
+
+ memzero((uint8_t*) t + cpu_set->allocated, need - cpu_set->allocated);
+
+ cpu_set->set = t;
+ cpu_set->allocated = need;
+ }
+
+ return 0;
+}
+
+int cpu_set_add(CPUSet *cpu_set, unsigned cpu) {
+ int r;
+
+ if (cpu >= 8192)
+ /* As of kernel 5.1, CONFIG_NR_CPUS can be set to 8192 on PowerPC */
+ return -ERANGE;
+
+ r = cpu_set_realloc(cpu_set, cpu + 1);
+ if (r < 0)
+ return r;
+
+ CPU_SET_S(cpu, cpu_set->allocated, cpu_set->set);
+ return 0;
+}
+
+int cpu_set_add_all(CPUSet *a, const CPUSet *b) {
+ int r;
+
+ /* Do this backwards, so if we fail, we fail before changing anything. */
+ for (unsigned cpu_p1 = b->allocated * 8; cpu_p1 > 0; cpu_p1--)
+ if (CPU_ISSET_S(cpu_p1 - 1, b->allocated, b->set)) {
+ r = cpu_set_add(a, cpu_p1 - 1);
+ if (r < 0)
+ return r;
+ }
+
+ return 1;
+}
+
+int parse_cpu_set_full(
+ const char *rvalue,
+ CPUSet *cpu_set,
+ bool warn,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue) {
+
+ _cleanup_(cpu_set_reset) CPUSet c = {};
+ const char *p = rvalue;
+
+ assert(p);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ unsigned cpu_lower, cpu_upper;
+ int r;
+
+ r = extract_first_word(&p, &word, WHITESPACE ",", EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return warn ? log_oom() : -ENOMEM;
+ if (r < 0)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, rvalue) : r;
+ if (r == 0)
+ break;
+
+ r = parse_range(word, &cpu_lower, &cpu_upper);
+ if (r < 0)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CPU affinity '%s'", word) : r;
+
+ if (cpu_lower > cpu_upper) {
+ if (warn)
+ log_syntax(unit, LOG_WARNING, filename, line, 0, "Range '%s' is invalid, %u > %u, ignoring.",
+ word, cpu_lower, cpu_upper);
+
+ /* Make sure something is allocated, to distinguish this from the empty case */
+ r = cpu_set_realloc(&c, 1);
+ if (r < 0)
+ return r;
+ }
+
+ for (unsigned cpu_p1 = MIN(cpu_upper, UINT_MAX-1) + 1; cpu_p1 > cpu_lower; cpu_p1--) {
+ r = cpu_set_add(&c, cpu_p1 - 1);
+ if (r < 0)
+ return warn ? log_syntax(unit, LOG_ERR, filename, line, r,
+ "Cannot add CPU %u to set: %m", cpu_p1 - 1) : r;
+ }
+ }
+
+ /* On success, transfer ownership to the output variable */
+ *cpu_set = c;
+ c = (CPUSet) {};
+
+ return 0;
+}
+
+int parse_cpu_set_extend(
+ const char *rvalue,
+ CPUSet *old,
+ bool warn,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue) {
+
+ _cleanup_(cpu_set_reset) CPUSet cpuset = {};
+ int r;
+
+ r = parse_cpu_set_full(rvalue, &cpuset, true, unit, filename, line, lvalue);
+ if (r < 0)
+ return r;
+
+ if (!cpuset.set) {
+ /* An empty assignment resets the CPU list */
+ cpu_set_reset(old);
+ return 0;
+ }
+
+ if (!old->set) {
+ *old = cpuset;
+ cpuset = (CPUSet) {};
+ return 1;
+ }
+
+ return cpu_set_add_all(old, &cpuset);
+}
+
+int cpus_in_affinity_mask(void) {
+ size_t n = 16;
+ int r;
+
+ for (;;) {
+ cpu_set_t *c;
+
+ c = CPU_ALLOC(n);
+ if (!c)
+ return -ENOMEM;
+
+ if (sched_getaffinity(0, CPU_ALLOC_SIZE(n), c) >= 0) {
+ int k;
+
+ k = CPU_COUNT_S(CPU_ALLOC_SIZE(n), c);
+ CPU_FREE(c);
+
+ if (k <= 0)
+ return -EINVAL;
+
+ return k;
+ }
+
+ r = -errno;
+ CPU_FREE(c);
+
+ if (r != -EINVAL)
+ return r;
+ if (n > SIZE_MAX/2)
+ return -ENOMEM;
+ n *= 2;
+ }
+}
+
+int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated) {
+ uint8_t *out;
+
+ assert(set);
+ assert(ret);
+
+ out = new0(uint8_t, set->allocated);
+ if (!out)
+ return -ENOMEM;
+
+ for (unsigned cpu = 0; cpu < set->allocated * 8; cpu++)
+ if (CPU_ISSET_S(cpu, set->allocated, set->set))
+ out[cpu / 8] |= 1u << (cpu % 8);
+
+ *ret = out;
+ *allocated = set->allocated;
+ return 0;
+}
+
+int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set) {
+ _cleanup_(cpu_set_reset) CPUSet s = {};
+ int r;
+
+ assert(bits);
+ assert(set);
+
+ for (unsigned cpu = size * 8; cpu > 0; cpu--)
+ if (bits[(cpu - 1) / 8] & (1u << ((cpu - 1) % 8))) {
+ r = cpu_set_add(&s, cpu - 1);
+ if (r < 0)
+ return r;
+ }
+
+ *set = s;
+ s = (CPUSet) {};
+ return 0;
+}
diff --git a/src/shared/cpu-set-util.h b/src/shared/cpu-set-util.h
new file mode 100644
index 0000000..3c63a58
--- /dev/null
+++ b/src/shared/cpu-set-util.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sched.h>
+
+#include "macro.h"
+#include "missing_syscall.h"
+
+/* This wraps the libc interface with a variable to keep the allocated size. */
+typedef struct CPUSet {
+ cpu_set_t *set;
+ size_t allocated; /* in bytes */
+} CPUSet;
+
+static inline void cpu_set_reset(CPUSet *a) {
+ assert((a->allocated > 0) == !!a->set);
+ if (a->set)
+ CPU_FREE(a->set);
+ *a = (CPUSet) {};
+}
+
+int cpu_set_add_all(CPUSet *a, const CPUSet *b);
+int cpu_set_add(CPUSet *a, unsigned cpu);
+
+char* cpu_set_to_string(const CPUSet *a);
+char *cpu_set_to_range_string(const CPUSet *a);
+int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus);
+
+int parse_cpu_set_full(
+ const char *rvalue,
+ CPUSet *cpu_set,
+ bool warn,
+ const char *unit,
+ const char *filename, unsigned line,
+ const char *lvalue);
+int parse_cpu_set_extend(
+ const char *rvalue,
+ CPUSet *old,
+ bool warn,
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *lvalue);
+
+static inline int parse_cpu_set(const char *rvalue, CPUSet *cpu_set){
+ return parse_cpu_set_full(rvalue, cpu_set, false, NULL, NULL, 0, NULL);
+}
+
+int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated);
+int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set);
+
+int cpus_in_affinity_mask(void);
diff --git a/src/shared/cryptsetup-util.c b/src/shared/cryptsetup-util.c
new file mode 100644
index 0000000..34a078e
--- /dev/null
+++ b/src/shared/cryptsetup-util.c
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_LIBCRYPTSETUP
+#include "alloc-util.h"
+#include "cryptsetup-util.h"
+#include "dlfcn-util.h"
+#include "log.h"
+
+static void *cryptsetup_dl = NULL;
+
+int (*sym_crypt_activate_by_passphrase)(struct crypt_device *cd, const char *name, int keyslot, const char *passphrase, size_t passphrase_size, uint32_t flags);
+#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
+int (*sym_crypt_activate_by_signed_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, const char *signature, size_t signature_size, uint32_t flags);
+#endif
+int (*sym_crypt_activate_by_volume_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, uint32_t flags);
+int (*sym_crypt_deactivate_by_name)(struct crypt_device *cd, const char *name, uint32_t flags);
+int (*sym_crypt_format)(struct crypt_device *cd, const char *type, const char *cipher, const char *cipher_mode, const char *uuid, const char *volume_key, size_t volume_key_size, void *params);
+void (*sym_crypt_free)(struct crypt_device *cd);
+const char *(*sym_crypt_get_dir)(void);
+int (*sym_crypt_get_verity_info)(struct crypt_device *cd, struct crypt_params_verity *vp);
+int (*sym_crypt_init)(struct crypt_device **cd, const char *device);
+int (*sym_crypt_init_by_name)(struct crypt_device **cd, const char *name);
+int (*sym_crypt_keyslot_add_by_volume_key)(struct crypt_device *cd, int keyslot, const char *volume_key, size_t volume_key_size, const char *passphrase, size_t passphrase_size);
+int (*sym_crypt_load)(struct crypt_device *cd, const char *requested_type, void *params);
+int (*sym_crypt_resize)(struct crypt_device *cd, const char *name, uint64_t new_size);
+int (*sym_crypt_set_data_device)(struct crypt_device *cd, const char *device);
+void (*sym_crypt_set_debug_level)(int level);
+void (*sym_crypt_set_log_callback)(struct crypt_device *cd, void (*log)(int level, const char *msg, void *usrptr), void *usrptr);
+int (*sym_crypt_volume_key_get)(struct crypt_device *cd, int keyslot, char *volume_key, size_t *volume_key_size, const char *passphrase, size_t passphrase_size);
+
+int dlopen_cryptsetup(void) {
+ _cleanup_(dlclosep) void *dl = NULL;
+ int r;
+
+ if (cryptsetup_dl)
+ return 0; /* Already loaded */
+
+ dl = dlopen("libcryptsetup.so.12", RTLD_LAZY);
+ if (!dl)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "libcryptsetup support is not installed: %s", dlerror());
+
+ r = dlsym_many_and_warn(
+ dl,
+ LOG_DEBUG,
+ &sym_crypt_activate_by_passphrase, "crypt_activate_by_passphrase",
+#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
+ &sym_crypt_activate_by_signed_key, "crypt_activate_by_signed_key",
+#endif
+ &sym_crypt_activate_by_volume_key, "crypt_activate_by_volume_key",
+ &sym_crypt_deactivate_by_name, "crypt_deactivate_by_name",
+ &sym_crypt_format, "crypt_format",
+ &sym_crypt_free, "crypt_free",
+ &sym_crypt_get_dir, "crypt_get_dir",
+ &sym_crypt_get_verity_info, "crypt_get_verity_info",
+ &sym_crypt_init, "crypt_init",
+ &sym_crypt_init_by_name, "crypt_init_by_name",
+ &sym_crypt_keyslot_add_by_volume_key, "crypt_keyslot_add_by_volume_key",
+ &sym_crypt_load, "crypt_load",
+ &sym_crypt_resize, "crypt_resize",
+ &sym_crypt_set_data_device, "crypt_set_data_device",
+ &sym_crypt_set_debug_level, "crypt_set_debug_level",
+ &sym_crypt_set_log_callback, "crypt_set_log_callback",
+ &sym_crypt_volume_key_get, "crypt_volume_key_get",
+ NULL);
+ if (r < 0)
+ return r;
+
+ /* Note that we never release the reference here, because there's no real reason to, after all this
+ * was traditionally a regular shared library dependency which lives forever too. */
+ cryptsetup_dl = TAKE_PTR(dl);
+ return 1;
+}
+
+static void cryptsetup_log_glue(int level, const char *msg, void *usrptr) {
+
+ switch (level) {
+ case CRYPT_LOG_NORMAL:
+ level = LOG_NOTICE;
+ break;
+ case CRYPT_LOG_ERROR:
+ level = LOG_ERR;
+ break;
+ case CRYPT_LOG_VERBOSE:
+ level = LOG_INFO;
+ break;
+ case CRYPT_LOG_DEBUG:
+ level = LOG_DEBUG;
+ break;
+ default:
+ log_error("Unknown libcryptsetup log level: %d", level);
+ level = LOG_ERR;
+ }
+
+ log_full(level, "%s", msg);
+}
+
+void cryptsetup_enable_logging(struct crypt_device *cd) {
+ if (!cd)
+ return;
+
+ if (dlopen_cryptsetup() < 0) /* If this fails, let's gracefully ignore the issue, this is just debug
+ * logging after all, and if this failed we already generated a debug
+ * log message that should help to track things down. */
+ return;
+
+ sym_crypt_set_log_callback(cd, cryptsetup_log_glue, NULL);
+ sym_crypt_set_debug_level(DEBUG_LOGGING ? CRYPT_DEBUG_ALL : CRYPT_DEBUG_NONE);
+}
+
+#endif
diff --git a/src/shared/cryptsetup-util.h b/src/shared/cryptsetup-util.h
new file mode 100644
index 0000000..e7d885d
--- /dev/null
+++ b/src/shared/cryptsetup-util.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+#if HAVE_LIBCRYPTSETUP
+#include <libcryptsetup.h>
+
+/* These next two are defined in libcryptsetup.h from cryptsetup version 2.3.4 forwards. */
+#ifndef CRYPT_ACTIVATE_NO_READ_WORKQUEUE
+#define CRYPT_ACTIVATE_NO_READ_WORKQUEUE (1 << 24)
+#endif
+#ifndef CRYPT_ACTIVATE_NO_WRITE_WORKQUEUE
+#define CRYPT_ACTIVATE_NO_WRITE_WORKQUEUE (1 << 25)
+#endif
+
+extern int (*sym_crypt_activate_by_passphrase)(struct crypt_device *cd, const char *name, int keyslot, const char *passphrase, size_t passphrase_size, uint32_t flags);
+#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
+extern int (*sym_crypt_activate_by_signed_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, const char *signature, size_t signature_size, uint32_t flags);
+#endif
+extern int (*sym_crypt_activate_by_volume_key)(struct crypt_device *cd, const char *name, const char *volume_key, size_t volume_key_size, uint32_t flags);
+extern int (*sym_crypt_deactivate_by_name)(struct crypt_device *cd, const char *name, uint32_t flags);
+extern int (*sym_crypt_format)(struct crypt_device *cd, const char *type, const char *cipher, const char *cipher_mode, const char *uuid, const char *volume_key, size_t volume_key_size, void *params);
+extern void (*sym_crypt_free)(struct crypt_device *cd);
+extern const char *(*sym_crypt_get_dir)(void);
+extern int (*sym_crypt_get_verity_info)(struct crypt_device *cd, struct crypt_params_verity *vp);
+extern int (*sym_crypt_init)(struct crypt_device **cd, const char *device);
+extern int (*sym_crypt_init_by_name)(struct crypt_device **cd, const char *name);
+extern int (*sym_crypt_keyslot_add_by_volume_key)(struct crypt_device *cd, int keyslot, const char *volume_key, size_t volume_key_size, const char *passphrase, size_t passphrase_size);
+extern int (*sym_crypt_load)(struct crypt_device *cd, const char *requested_type, void *params);
+extern int (*sym_crypt_resize)(struct crypt_device *cd, const char *name, uint64_t new_size);
+extern int (*sym_crypt_set_data_device)(struct crypt_device *cd, const char *device);
+extern void (*sym_crypt_set_debug_level)(int level);
+extern void (*sym_crypt_set_log_callback)(struct crypt_device *cd, void (*log)(int level, const char *msg, void *usrptr), void *usrptr);
+extern int (*sym_crypt_volume_key_get)(struct crypt_device *cd, int keyslot, char *volume_key, size_t *volume_key_size, const char *passphrase, size_t passphrase_size);
+
+int dlopen_cryptsetup(void);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct crypt_device *, crypt_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct crypt_device *, sym_crypt_free);
+
+void cryptsetup_enable_logging(struct crypt_device *cd);
+
+#endif
diff --git a/src/shared/daemon-util.h b/src/shared/daemon-util.h
new file mode 100644
index 0000000..585e489
--- /dev/null
+++ b/src/shared/daemon-util.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-daemon.h"
+
+#define NOTIFY_READY "READY=1\n" "STATUS=Processing requests..."
+#define NOTIFY_STOPPING "STOPPING=1\n" "STATUS=Shutting down..."
+
+static inline const char *notify_start(const char *start, const char *stop) {
+ if (start)
+ (void) sd_notify(false, start);
+
+ return stop;
+}
+
+/* This is intended to be used with _cleanup_ attribute. */
+static inline void notify_on_cleanup(const char **p) {
+ if (*p)
+ (void) sd_notify(false, *p);
+}
diff --git a/src/shared/dev-setup.c b/src/shared/dev-setup.c
new file mode 100644
index 0000000..b788b06
--- /dev/null
+++ b/src/shared/dev-setup.c
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dev-setup.h"
+#include "label.h"
+#include "log.h"
+#include "nulstr-util.h"
+#include "path-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+
+int dev_setup(const char *prefix, uid_t uid, gid_t gid) {
+ static const char symlinks[] =
+ "-/proc/kcore\0" "/dev/core\0"
+ "/proc/self/fd\0" "/dev/fd\0"
+ "/proc/self/fd/0\0" "/dev/stdin\0"
+ "/proc/self/fd/1\0" "/dev/stdout\0"
+ "/proc/self/fd/2\0" "/dev/stderr\0";
+
+ const char *j, *k;
+ int r;
+
+ NULSTR_FOREACH_PAIR(j, k, symlinks) {
+ _cleanup_free_ char *link_name = NULL;
+ const char *n;
+
+ if (j[0] == '-') {
+ j++;
+
+ if (access(j, F_OK) < 0)
+ continue;
+ }
+
+ if (prefix) {
+ link_name = path_join(prefix, k);
+ if (!link_name)
+ return -ENOMEM;
+
+ n = link_name;
+ } else
+ n = k;
+
+ r = symlink_label(j, n);
+ if (r < 0)
+ log_debug_errno(r, "Failed to symlink %s to %s: %m", j, n);
+
+ if (uid != UID_INVALID || gid != GID_INVALID)
+ if (lchown(n, uid, gid) < 0)
+ log_debug_errno(errno, "Failed to chown %s: %m", n);
+ }
+
+ return 0;
+}
+
+int make_inaccessible_nodes(
+ const char *parent_dir,
+ uid_t uid,
+ gid_t gid) {
+
+ static const struct {
+ const char *name;
+ mode_t mode;
+ } table[] = {
+ { "inaccessible", S_IFDIR | 0755 },
+ { "inaccessible/reg", S_IFREG | 0000 },
+ { "inaccessible/dir", S_IFDIR | 0000 },
+ { "inaccessible/fifo", S_IFIFO | 0000 },
+ { "inaccessible/sock", S_IFSOCK | 0000 },
+
+ /* The following two are likely to fail if we lack the privs for it (for example in an userns
+ * environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibits creation of
+ * device nodes with a major/minor of 0). But that's entirely fine. Consumers of these files
+ * should implement falling back to use a different node then, for example
+ * <root>/inaccessible/sock, which is close enough in behaviour and semantics for most uses.
+ */
+ { "inaccessible/chr", S_IFCHR | 0000 },
+ { "inaccessible/blk", S_IFBLK | 0000 },
+ };
+
+ _cleanup_umask_ mode_t u;
+ int r;
+
+ if (!parent_dir)
+ parent_dir = "/run/systemd";
+
+ u = umask(0000);
+
+ /* Set up inaccessible (and empty) file nodes of all types. This are used to as mount sources for over-mounting
+ * ("masking") file nodes that shall become inaccessible and empty for specific containers or services. We try
+ * to lock down these nodes as much as we can, but otherwise try to match them as closely as possible with the
+ * underlying file, i.e. in the best case we offer the same node type as the underlying node. */
+
+ for (size_t i = 0; i < ELEMENTSOF(table); i++) {
+ _cleanup_free_ char *path = NULL;
+
+ path = path_join(parent_dir, table[i].name);
+ if (!path)
+ return log_oom();
+
+ if (S_ISDIR(table[i].mode))
+ r = mkdir_label(path, table[i].mode & 07777);
+ else
+ r = mknod_label(path, table[i].mode, makedev(0, 0));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to create '%s', ignoring: %m", path);
+ continue;
+ }
+
+ if (uid != UID_INVALID || gid != GID_INVALID) {
+ if (lchown(path, uid, gid) < 0)
+ log_debug_errno(errno, "Failed to chown '%s': %m", path);
+ }
+ }
+
+ return 0;
+}
diff --git a/src/shared/dev-setup.h b/src/shared/dev-setup.h
new file mode 100644
index 0000000..92ba6cf
--- /dev/null
+++ b/src/shared/dev-setup.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+int dev_setup(const char *prefix, uid_t uid, gid_t gid);
+
+int make_inaccessible_nodes(const char *parent_dir, uid_t uid, gid_t gid);
diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c
new file mode 100644
index 0000000..d1f299a
--- /dev/null
+++ b/src/shared/dissect-image.c
@@ -0,0 +1,2557 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+#include <valgrind/memcheck.h>
+#endif
+
+#include <linux/dm-ioctl.h>
+#include <linux/loop.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sysexits.h>
+
+#include "sd-device.h"
+#include "sd-id128.h"
+
+#include "architecture.h"
+#include "ask-password-api.h"
+#include "blkid-util.h"
+#include "blockdev-util.h"
+#include "copy.h"
+#include "cryptsetup-util.h"
+#include "def.h"
+#include "device-nodes.h"
+#include "device-util.h"
+#include "dissect-image.h"
+#include "dm-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "fsck-util.h"
+#include "gpt.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "namespace-util.h"
+#include "nulstr-util.h"
+#include "os-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "raw-clone.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+#include "udev-util.h"
+#include "user-util.h"
+#include "xattr-util.h"
+
+/* how many times to wait for the device nodes to appear */
+#define N_DEVICE_NODE_LIST_ATTEMPTS 10
+
+int probe_filesystem(const char *node, char **ret_fstype) {
+ /* Try to find device content type and return it in *ret_fstype. If nothing is found,
+ * 0/NULL will be returned. -EUCLEAN will be returned for ambiguous results, and an
+ * different error otherwise. */
+
+#if HAVE_BLKID
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ const char *fstype;
+ int r;
+
+ errno = 0;
+ b = blkid_new_probe_from_filename(node);
+ if (!b)
+ return errno_or_else(ENOMEM);
+
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (r == 1) {
+ log_debug("No type detected on partition %s", node);
+ goto not_found;
+ }
+ if (r == -2)
+ return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "Results ambiguous for partition %s", node);
+ if (r != 0)
+ return errno_or_else(EIO);
+
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+
+ if (fstype) {
+ char *t;
+
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+
+ *ret_fstype = t;
+ return 1;
+ }
+
+not_found:
+ *ret_fstype = NULL;
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+#if HAVE_BLKID
+static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ int r;
+
+ assert(d);
+ assert(ret);
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_parent(e, d);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(e);
+ return 0;
+}
+
+static int device_is_partition(sd_device *d, blkid_partition pp) {
+ blkid_loff_t bsize, bstart;
+ uint64_t size, start;
+ int partno, bpartno, r;
+ const char *ss, *v;
+
+ assert(d);
+ assert(pp);
+
+ r = sd_device_get_subsystem(d, &ss);
+ if (r < 0)
+ return r;
+ if (!streq(ss, "block"))
+ return false;
+
+ r = sd_device_get_sysattr_value(d, "partition", &v);
+ if (r == -ENOENT) /* Not a partition device */
+ return false;
+ if (r < 0)
+ return r;
+ r = safe_atoi(v, &partno);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ bpartno = blkid_partition_get_partno(pp);
+ if (bpartno < 0)
+ return errno_or_else(EIO);
+
+ if (partno != bpartno)
+ return false;
+
+ r = sd_device_get_sysattr_value(d, "start", &v);
+ if (r < 0)
+ return r;
+ r = safe_atou64(v, &start);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ bstart = blkid_partition_get_start(pp);
+ if (bstart < 0)
+ return errno_or_else(EIO);
+
+ if (start != (uint64_t) bstart)
+ return false;
+
+ r = sd_device_get_sysattr_value(d, "size", &v);
+ if (r < 0)
+ return r;
+ r = safe_atou64(v, &size);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ bsize = blkid_partition_get_size(pp);
+ if (bsize < 0)
+ return errno_or_else(EIO);
+
+ if (size != (uint64_t) bsize)
+ return false;
+
+ return true;
+}
+
+static int find_partition(
+ sd_device *parent,
+ blkid_partition pp,
+ sd_device **ret) {
+
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ sd_device *q;
+ int r;
+
+ assert(parent);
+ assert(pp);
+ assert(ret);
+
+ r = enumerator_for_parent(parent, &e);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, q) {
+ r = device_is_partition(q, pp);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ *ret = sd_device_ref(q);
+ return 0;
+ }
+ }
+
+ return -ENXIO;
+}
+
+struct wait_data {
+ sd_device *parent_device;
+ blkid_partition blkidp;
+ sd_device *found;
+};
+
+static inline void wait_data_done(struct wait_data *d) {
+ sd_device_unref(d->found);
+}
+
+static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ const char *parent1_path, *parent2_path;
+ struct wait_data *w = userdata;
+ sd_device *pp;
+ int r;
+
+ assert(w);
+
+ if (device_for_action(device, DEVICE_ACTION_REMOVE))
+ return 0;
+
+ r = sd_device_get_parent(device, &pp);
+ if (r < 0)
+ return 0; /* Doesn't have a parent? No relevant to us */
+
+ r = sd_device_get_syspath(pp, &parent1_path); /* Check parent of device of this action */
+ if (r < 0)
+ goto finish;
+
+ r = sd_device_get_syspath(w->parent_device, &parent2_path); /* Check parent of device we are looking for */
+ if (r < 0)
+ goto finish;
+
+ if (!path_equal(parent1_path, parent2_path))
+ return 0; /* Has a different parent than what we need, not interesting to us */
+
+ r = device_is_partition(device, w->blkidp);
+ if (r < 0)
+ goto finish;
+ if (r == 0) /* Not the one we need */
+ return 0;
+
+ /* It's the one we need! Yay! */
+ assert(!w->found);
+ w->found = sd_device_ref(device);
+ r = 0;
+
+finish:
+ return sd_event_exit(sd_device_monitor_get_event(monitor), r);
+}
+
+static int wait_for_partition_device(
+ sd_device *parent,
+ blkid_partition pp,
+ usec_t deadline,
+ sd_device **ret) {
+
+ _cleanup_(sd_event_source_unrefp) sd_event_source *timeout_source = NULL;
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ int r;
+
+ assert(parent);
+ assert(pp);
+ assert(ret);
+
+ r = find_partition(parent, pp, ret);
+ if (r != -ENXIO)
+ return r;
+
+ r = sd_event_new(&event);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_new(&monitor);
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, "block", "partition");
+ if (r < 0)
+ return r;
+
+ r = sd_device_monitor_attach_event(monitor, event);
+ if (r < 0)
+ return r;
+
+ _cleanup_(wait_data_done) struct wait_data w = {
+ .parent_device = parent,
+ .blkidp = pp,
+ };
+
+ r = sd_device_monitor_start(monitor, device_monitor_handler, &w);
+ if (r < 0)
+ return r;
+
+ /* Check again, the partition might have appeared in the meantime */
+ r = find_partition(parent, pp, ret);
+ if (r != -ENXIO)
+ return r;
+
+ if (deadline != USEC_INFINITY) {
+ r = sd_event_add_time(
+ event, &timeout_source,
+ CLOCK_MONOTONIC, deadline, 0,
+ NULL, INT_TO_PTR(-ETIMEDOUT));
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return r;
+
+ assert(w.found);
+ *ret = TAKE_PTR(w.found);
+ return 0;
+}
+
+static void check_partition_flags(
+ const char *node,
+ unsigned long long pflags,
+ unsigned long long supported) {
+
+ assert(node);
+
+ /* Mask away all flags supported by this partition's type and the three flags the UEFI spec defines generically */
+ pflags &= ~(supported | GPT_FLAG_REQUIRED_PARTITION | GPT_FLAG_NO_BLOCK_IO_PROTOCOL | GPT_FLAG_LEGACY_BIOS_BOOTABLE);
+
+ if (pflags == 0)
+ return;
+
+ /* If there are other bits set, then log about it, to make things discoverable */
+ for (unsigned i = 0; i < sizeof(pflags) * 8; i++) {
+ unsigned long long bit = 1ULL << i;
+ if (!FLAGS_SET(pflags, bit))
+ continue;
+
+ log_debug("Unexpected partition flag %llu set on %s!", bit, node);
+ }
+}
+
+static int device_wait_for_initialization_harder(
+ sd_device *device,
+ const char *subsystem,
+ usec_t deadline,
+ sd_device **ret) {
+
+ _cleanup_free_ char *uevent = NULL;
+ usec_t start, left, retrigger_timeout;
+ int r;
+
+ start = now(CLOCK_MONOTONIC);
+ left = usec_sub_unsigned(deadline, start);
+
+ if (DEBUG_LOGGING) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ const char *sn = NULL;
+
+ (void) sd_device_get_sysname(device, &sn);
+ log_debug("Waiting for device '%s' to initialize for %s.", strna(sn), format_timespan(buf, sizeof(buf), left, 0));
+ }
+
+ if (left != USEC_INFINITY)
+ retrigger_timeout = CLAMP(left / 4, 1 * USEC_PER_SEC, 5 * USEC_PER_SEC); /* A fourth of the total timeout, but let's clamp to 1s…5s range */
+ else
+ retrigger_timeout = 2 * USEC_PER_SEC;
+
+ for (;;) {
+ usec_t local_deadline, n;
+ bool last_try;
+
+ n = now(CLOCK_MONOTONIC);
+ assert(n >= start);
+
+ /* Find next deadline, when we'll retrigger */
+ local_deadline = start +
+ DIV_ROUND_UP(n - start, retrigger_timeout) * retrigger_timeout;
+
+ if (deadline != USEC_INFINITY && deadline <= local_deadline) {
+ local_deadline = deadline;
+ last_try = true;
+ } else
+ last_try = false;
+
+ r = device_wait_for_initialization(device, subsystem, local_deadline, ret);
+ if (r >= 0 && DEBUG_LOGGING) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ const char *sn = NULL;
+
+ (void) sd_device_get_sysname(device, &sn);
+ log_debug("Successfully waited for device '%s' to initialize for %s.", strna(sn), format_timespan(buf, sizeof(buf), usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0));
+
+ }
+ if (r != -ETIMEDOUT || last_try)
+ return r;
+
+ if (!uevent) {
+ const char *syspath;
+
+ r = sd_device_get_syspath(device, &syspath);
+ if (r < 0)
+ return r;
+
+ uevent = path_join(syspath, "uevent");
+ if (!uevent)
+ return -ENOMEM;
+ }
+
+ if (DEBUG_LOGGING) {
+ char buf[FORMAT_TIMESPAN_MAX];
+
+ log_debug("Device didn't initialize within %s, assuming lost event. Retriggering device through %s.",
+ format_timespan(buf, sizeof(buf), usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0),
+ uevent);
+ }
+
+ r = write_string_file(uevent, "change", WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+ }
+}
+#endif
+
+#define DEVICE_TIMEOUT_USEC (45 * USEC_PER_SEC)
+
+int dissect_image(
+ int fd,
+ const VeritySettings *verity,
+ const MountOptions *mount_options,
+ DissectImageFlags flags,
+ DissectedImage **ret) {
+
+#if HAVE_BLKID
+#ifdef GPT_ROOT_NATIVE
+ sd_id128_t root_uuid = SD_ID128_NULL, root_verity_uuid = SD_ID128_NULL;
+#endif
+#ifdef GPT_USR_NATIVE
+ sd_id128_t usr_uuid = SD_ID128_NULL, usr_verity_uuid = SD_ID128_NULL;
+#endif
+ bool is_gpt, is_mbr, generic_rw, multiple_generic = false;
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+ _cleanup_(blkid_free_probep) blkid_probe b = NULL;
+ _cleanup_free_ char *generic_node = NULL;
+ sd_id128_t generic_uuid = SD_ID128_NULL;
+ const char *pttype = NULL;
+ blkid_partlist pl;
+ int r, generic_nr, n_partitions;
+ struct stat st;
+ usec_t deadline;
+
+ assert(fd >= 0);
+ assert(ret);
+ assert(!verity || verity->root_hash || verity->root_hash_size == 0);
+ assert(!((flags & DISSECT_IMAGE_GPT_ONLY) && (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)));
+
+ /* Probes a disk image, and returns information about what it found in *ret.
+ *
+ * Returns -ENOPKG if no suitable partition table or file system could be found.
+ * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */
+
+ if (verity && verity->root_hash) {
+ sd_id128_t fsuuid, vuuid;
+
+ /* If a root hash is supplied, then we use the root partition that has a UUID that match the
+ * first 128bit of the root hash. And we use the verity partition that has a UUID that match
+ * the final 128bit. */
+
+ if (verity->root_hash_size < sizeof(sd_id128_t))
+ return -EINVAL;
+
+ memcpy(&fsuuid, verity->root_hash, sizeof(sd_id128_t));
+ memcpy(&vuuid, (const uint8_t*) verity->root_hash + verity->root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t));
+
+ if (sd_id128_is_null(fsuuid))
+ return -EINVAL;
+ if (sd_id128_is_null(vuuid))
+ return -EINVAL;
+
+ /* If the verity data declares it's for the /usr partition, then search for that, in all
+ * other cases assume it's for the root partition. */
+#ifdef GPT_USR_NATIVE
+ if (verity->designator == PARTITION_USR) {
+ usr_uuid = fsuuid;
+ usr_verity_uuid = vuuid;
+ } else {
+#endif
+#ifdef GPT_ROOT_NATIVE
+ root_uuid = fsuuid;
+ root_verity_uuid = vuuid;
+#endif
+#ifdef GPT_USR_NATIVE
+ }
+#endif
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ r = sd_device_new_from_devnum(&d, 'b', st.st_rdev);
+ if (r < 0)
+ return r;
+
+ if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
+ _cleanup_(sd_device_unrefp) sd_device *initialized = NULL;
+
+ /* If udev support is enabled, then let's wait for the device to be initialized before we doing anything. */
+
+ r = device_wait_for_initialization_harder(
+ d,
+ "block",
+ usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC),
+ &initialized);
+ if (r < 0)
+ return r;
+
+ sd_device_unref(d);
+ d = TAKE_PTR(initialized);
+ }
+
+ b = blkid_new_probe();
+ if (!b)
+ return -ENOMEM;
+
+ errno = 0;
+ r = blkid_probe_set_device(b, fd, 0, 0);
+ if (r != 0)
+ return errno_or_else(ENOMEM);
+
+ if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) {
+ /* Look for file system superblocks, unless we only shall look for GPT partition tables */
+ blkid_probe_enable_superblocks(b, 1);
+ blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE);
+ }
+
+ blkid_probe_enable_partitions(b, 1);
+ blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
+
+ errno = 0;
+ r = blkid_do_safeprobe(b);
+ if (IN_SET(r, -2, 1))
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to identify any partition table.");
+ if (r != 0)
+ return errno_or_else(EIO);
+
+ m = new0(DissectedImage, 1);
+ if (!m)
+ return -ENOMEM;
+
+ if ((!(flags & DISSECT_IMAGE_GPT_ONLY) &&
+ (flags & DISSECT_IMAGE_REQUIRE_ROOT)) ||
+ (flags & DISSECT_IMAGE_NO_PARTITION_TABLE)) {
+ const char *usage = NULL;
+
+ /* If flags permit this, also allow using non-partitioned single-filesystem images */
+
+ (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
+ if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
+ const char *fstype = NULL, *options = NULL, *devname = NULL;
+ _cleanup_free_ char *t = NULL, *n = NULL, *o = NULL;
+
+ /* OK, we have found a file system, that's our root partition then. */
+ (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
+
+ if (fstype) {
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+ }
+
+ r = sd_device_get_devname(d, &devname);
+ if (r < 0)
+ return r;
+
+ n = strdup(devname);
+ if (!n)
+ return -ENOMEM;
+
+ m->single_file_system = true;
+ m->verity = verity && verity->root_hash && verity->data_path && (verity->designator < 0 || verity->designator == PARTITION_ROOT);
+ m->can_verity = verity && verity->data_path;
+
+ options = mount_options_from_designator(mount_options, PARTITION_ROOT);
+ if (options) {
+ o = strdup(options);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ m->partitions[PARTITION_ROOT] = (DissectedPartition) {
+ .found = true,
+ .rw = !m->verity,
+ .partno = -1,
+ .architecture = _ARCHITECTURE_INVALID,
+ .fstype = TAKE_PTR(t),
+ .node = TAKE_PTR(n),
+ .mount_options = TAKE_PTR(o),
+ };
+
+ m->encrypted = streq_ptr(fstype, "crypto_LUKS");
+
+ *ret = TAKE_PTR(m);
+ return 0;
+ }
+ }
+
+ (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
+ if (!pttype)
+ return -ENOPKG;
+
+ is_gpt = streq_ptr(pttype, "gpt");
+ is_mbr = streq_ptr(pttype, "dos");
+
+ if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr))
+ return -ENOPKG;
+
+ /* Safety check: refuse block devices that carry a partition table but for which the kernel doesn't
+ * do partition scanning. */
+ r = blockdev_partscan_enabled(fd);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EPROTONOSUPPORT;
+
+ errno = 0;
+ pl = blkid_probe_get_partitions(b);
+ if (!pl)
+ return errno_or_else(ENOMEM);
+
+ errno = 0;
+ n_partitions = blkid_partlist_numof_partitions(pl);
+ if (n_partitions < 0)
+ return errno_or_else(EIO);
+
+ deadline = usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC);
+ for (int i = 0; i < n_partitions; i++) {
+ _cleanup_(sd_device_unrefp) sd_device *q = NULL;
+ unsigned long long pflags;
+ blkid_partition pp;
+ const char *node;
+ int nr;
+
+ errno = 0;
+ pp = blkid_partlist_get_partition(pl, i);
+ if (!pp)
+ return errno_or_else(EIO);
+
+ r = wait_for_partition_device(d, pp, deadline, &q);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_devname(q, &node);
+ if (r < 0)
+ return r;
+
+ pflags = blkid_partition_get_flags(pp);
+
+ errno = 0;
+ nr = blkid_partition_get_partno(pp);
+ if (nr < 0)
+ return errno_or_else(EIO);
+
+ if (is_gpt) {
+ PartitionDesignator designator = _PARTITION_DESIGNATOR_INVALID;
+ int architecture = _ARCHITECTURE_INVALID;
+ const char *stype, *sid, *fstype = NULL;
+ sd_id128_t type_id, id;
+ bool rw = true;
+
+ sid = blkid_partition_get_uuid(pp);
+ if (!sid)
+ continue;
+ if (sd_id128_from_string(sid, &id) < 0)
+ continue;
+
+ stype = blkid_partition_get_type_string(pp);
+ if (!stype)
+ continue;
+ if (sd_id128_from_string(stype, &type_id) < 0)
+ continue;
+
+ if (sd_id128_equal(type_id, GPT_HOME)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_HOME;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_SRV)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_SRV;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_ESP)) {
+
+ /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is
+ * not defined there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as
+ * recommended by the UEFI spec (See "12.3.3 Number and Location of System
+ * Partitions"). */
+
+ if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL)
+ continue;
+
+ designator = PARTITION_ESP;
+ fstype = "vfat";
+
+ } else if (sd_id128_equal(type_id, GPT_XBOOTLDR)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_XBOOTLDR;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ }
+#ifdef GPT_ROOT_NATIVE
+ else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a root ID is specified, ignore everything but the root id */
+ if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT;
+ architecture = native_architecture();
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless a root hash is specified */
+ if (sd_id128_is_null(root_verity_uuid) || !sd_id128_equal(root_verity_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = native_architecture();
+ rw = false;
+ }
+#endif
+#ifdef GPT_ROOT_SECONDARY
+ else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a root ID is specified, ignore everything but the root id */
+ if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_SECONDARY;
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless root has is specified */
+ if (sd_id128_is_null(root_verity_uuid) || !sd_id128_equal(root_verity_uuid, id))
+ continue;
+
+ designator = PARTITION_ROOT_SECONDARY_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = false;
+ }
+#endif
+#ifdef GPT_USR_NATIVE
+ else if (sd_id128_equal(type_id, GPT_USR_NATIVE)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a usr ID is specified, ignore everything but the usr id */
+ if (!sd_id128_is_null(usr_uuid) && !sd_id128_equal(usr_uuid, id))
+ continue;
+
+ designator = PARTITION_USR;
+ architecture = native_architecture();
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_USR_NATIVE_VERITY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless a usr hash is specified */
+ if (sd_id128_is_null(usr_verity_uuid) || !sd_id128_equal(usr_verity_uuid, id))
+ continue;
+
+ designator = PARTITION_USR_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = native_architecture();
+ rw = false;
+ }
+#endif
+#ifdef GPT_USR_SECONDARY
+ else if (sd_id128_equal(type_id, GPT_USR_SECONDARY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ /* If a usr ID is specified, ignore everything but the usr id */
+ if (!sd_id128_is_null(usr_uuid) && !sd_id128_equal(usr_uuid, id))
+ continue;
+
+ designator = PARTITION_USR_SECONDARY;
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_USR_SECONDARY_VERITY)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ m->can_verity = true;
+
+ /* Ignore verity unless usr has is specified */
+ if (sd_id128_is_null(usr_verity_uuid) || !sd_id128_equal(usr_verity_uuid, id))
+ continue;
+
+ designator = PARTITION_USR_SECONDARY_VERITY;
+ fstype = "DM_verity_hash";
+ architecture = SECONDARY_ARCHITECTURE;
+ rw = false;
+ }
+#endif
+ else if (sd_id128_equal(type_id, GPT_SWAP)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_SWAP;
+ fstype = "swap";
+
+ } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ if (generic_node)
+ multiple_generic = true;
+ else {
+ generic_nr = nr;
+ generic_rw = !(pflags & GPT_FLAG_READ_ONLY);
+ generic_uuid = id;
+ generic_node = strdup(node);
+ if (!generic_node)
+ return -ENOMEM;
+ }
+
+ } else if (sd_id128_equal(type_id, GPT_TMP)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ designator = PARTITION_TMP;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+
+ } else if (sd_id128_equal(type_id, GPT_VAR)) {
+
+ check_partition_flags(node, pflags, GPT_FLAG_NO_AUTO|GPT_FLAG_READ_ONLY);
+
+ if (pflags & GPT_FLAG_NO_AUTO)
+ continue;
+
+ if (!FLAGS_SET(flags, DISSECT_IMAGE_RELAX_VAR_CHECK)) {
+ sd_id128_t var_uuid;
+
+ /* For /var we insist that the uuid of the partition matches the
+ * HMAC-SHA256 of the /var GPT partition type uuid, keyed by machine
+ * ID. Why? Unlike the other partitions /var is inherently
+ * installation specific, hence we need to be careful not to mount it
+ * in the wrong installation. By hashing the partition UUID from
+ * /etc/machine-id we can securely bind the partition to the
+ * installation. */
+
+ r = sd_id128_get_machine_app_specific(GPT_VAR, &var_uuid);
+ if (r < 0)
+ return r;
+
+ if (!sd_id128_equal(var_uuid, id)) {
+ log_debug("Found a /var/ partition, but its UUID didn't match our expectations, ignoring.");
+ continue;
+ }
+ }
+
+ designator = PARTITION_VAR;
+ rw = !(pflags & GPT_FLAG_READ_ONLY);
+ }
+
+ if (designator != _PARTITION_DESIGNATOR_INVALID) {
+ _cleanup_free_ char *t = NULL, *n = NULL, *o = NULL;
+ const char *options = NULL;
+
+ /* First one wins */
+ if (m->partitions[designator].found)
+ continue;
+
+ if (fstype) {
+ t = strdup(fstype);
+ if (!t)
+ return -ENOMEM;
+ }
+
+ n = strdup(node);
+ if (!n)
+ return -ENOMEM;
+
+ options = mount_options_from_designator(mount_options, designator);
+ if (options) {
+ o = strdup(options);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ m->partitions[designator] = (DissectedPartition) {
+ .found = true,
+ .partno = nr,
+ .rw = rw,
+ .architecture = architecture,
+ .node = TAKE_PTR(n),
+ .fstype = TAKE_PTR(t),
+ .uuid = id,
+ .mount_options = TAKE_PTR(o),
+ };
+ }
+
+ } else if (is_mbr) {
+
+ switch (blkid_partition_get_type(pp)) {
+
+ case 0x83: /* Linux partition */
+
+ if (pflags != 0x80) /* Bootable flag */
+ continue;
+
+ if (generic_node)
+ multiple_generic = true;
+ else {
+ generic_nr = nr;
+ generic_rw = true;
+ generic_node = strdup(node);
+ if (!generic_node)
+ return -ENOMEM;
+ }
+
+ break;
+
+ case 0xEA: { /* Boot Loader Spec extended $BOOT partition */
+ _cleanup_free_ char *n = NULL, *o = NULL;
+ sd_id128_t id = SD_ID128_NULL;
+ const char *sid, *options = NULL;
+
+ /* First one wins */
+ if (m->partitions[PARTITION_XBOOTLDR].found)
+ continue;
+
+ sid = blkid_partition_get_uuid(pp);
+ if (sid)
+ (void) sd_id128_from_string(sid, &id);
+
+ n = strdup(node);
+ if (!n)
+ return -ENOMEM;
+
+ options = mount_options_from_designator(mount_options, PARTITION_XBOOTLDR);
+ if (options) {
+ o = strdup(options);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ m->partitions[PARTITION_XBOOTLDR] = (DissectedPartition) {
+ .found = true,
+ .partno = nr,
+ .rw = true,
+ .architecture = _ARCHITECTURE_INVALID,
+ .node = TAKE_PTR(n),
+ .uuid = id,
+ .mount_options = TAKE_PTR(o),
+ };
+
+ break;
+ }}
+ }
+ }
+
+ if (m->partitions[PARTITION_ROOT].found) {
+ /* If we found the primary arch, then invalidate the secondary arch to avoid any ambiguities,
+ * since we never want to mount the secondary arch in this case. */
+ m->partitions[PARTITION_ROOT_SECONDARY].found = false;
+ m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false;
+ m->partitions[PARTITION_USR_SECONDARY].found = false;
+ m->partitions[PARTITION_USR_SECONDARY_VERITY].found = false;
+ } else {
+ /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not
+ * either, then check if there's a single generic one, and use that. */
+
+ if (m->partitions[PARTITION_ROOT_VERITY].found)
+ return -EADDRNOTAVAIL;
+
+ /* We didn't find a primary architecture root, but we found a primary architecture /usr? Refuse that for now. */
+ if (m->partitions[PARTITION_USR].found || m->partitions[PARTITION_USR_VERITY].found)
+ return -EADDRNOTAVAIL;
+
+ if (m->partitions[PARTITION_ROOT_SECONDARY].found) {
+ /* Upgrade secondary arch to first */
+ m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY];
+ zero(m->partitions[PARTITION_ROOT_SECONDARY]);
+ m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY];
+ zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]);
+
+ m->partitions[PARTITION_USR] = m->partitions[PARTITION_USR_SECONDARY];
+ zero(m->partitions[PARTITION_USR_SECONDARY]);
+ m->partitions[PARTITION_USR_VERITY] = m->partitions[PARTITION_USR_SECONDARY_VERITY];
+ zero(m->partitions[PARTITION_USR_SECONDARY_VERITY]);
+
+ } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) {
+ _cleanup_free_ char *o = NULL;
+ const char *options = NULL;
+
+ /* If the root hash was set, then we won't fall back to a generic node, because the
+ * root hash decides. */
+ if (verity && verity->root_hash)
+ return -EADDRNOTAVAIL;
+
+ /* If we didn't find a generic node, then we can't fix this up either */
+ if (!generic_node)
+ return -ENXIO;
+
+ /* If we didn't find a properly marked root partition, but we did find a single suitable
+ * generic Linux partition, then use this as root partition, if the caller asked for it. */
+ if (multiple_generic)
+ return -ENOTUNIQ;
+
+ options = mount_options_from_designator(mount_options, PARTITION_ROOT);
+ if (options) {
+ o = strdup(options);
+ if (!o)
+ return -ENOMEM;
+ }
+
+ m->partitions[PARTITION_ROOT] = (DissectedPartition) {
+ .found = true,
+ .rw = generic_rw,
+ .partno = generic_nr,
+ .architecture = _ARCHITECTURE_INVALID,
+ .node = TAKE_PTR(generic_node),
+ .uuid = generic_uuid,
+ .mount_options = TAKE_PTR(o),
+ };
+ }
+ }
+
+ /* Refuse if we found a verity partition for /usr but no matching file system partition */
+ if (!m->partitions[PARTITION_USR].found && m->partitions[PARTITION_USR_VERITY].found)
+ return -EADDRNOTAVAIL;
+
+ /* Combinations of verity /usr with verity-less root is OK, but the reverse is not */
+ if (m->partitions[PARTITION_ROOT_VERITY].found && m->partitions[PARTITION_USR].found && !m->partitions[PARTITION_USR_VERITY].found)
+ return -EADDRNOTAVAIL;
+
+ if (verity && verity->root_hash) {
+ if (verity->designator < 0 || verity->designator == PARTITION_ROOT) {
+ if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found)
+ return -EADDRNOTAVAIL;
+
+ /* If we found a verity setup, then the root partition is necessarily read-only. */
+ m->partitions[PARTITION_ROOT].rw = false;
+ m->verity = true;
+ }
+
+ if (verity->designator == PARTITION_USR) {
+ if (!m->partitions[PARTITION_USR_VERITY].found || !m->partitions[PARTITION_USR].found)
+ return -EADDRNOTAVAIL;
+
+ m->partitions[PARTITION_USR].rw = false;
+ m->verity = true;
+ }
+ }
+
+ blkid_free_probe(b);
+ b = NULL;
+
+ /* Fill in file system types if we don't know them yet. */
+ for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ DissectedPartition *p = m->partitions + i;
+
+ if (!p->found)
+ continue;
+
+ if (!p->fstype && p->node) {
+ r = probe_filesystem(p->node, &p->fstype);
+ if (r < 0 && r != -EUCLEAN)
+ return r;
+ }
+
+ if (streq_ptr(p->fstype, "crypto_LUKS"))
+ m->encrypted = true;
+
+ if (p->fstype && fstype_is_ro(p->fstype))
+ p->rw = false;
+ }
+
+ *ret = TAKE_PTR(m);
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+DissectedImage* dissected_image_unref(DissectedImage *m) {
+ if (!m)
+ return NULL;
+
+ for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ free(m->partitions[i].fstype);
+ free(m->partitions[i].node);
+ free(m->partitions[i].decrypted_fstype);
+ free(m->partitions[i].decrypted_node);
+ free(m->partitions[i].mount_options);
+ }
+
+ free(m->hostname);
+ strv_free(m->machine_info);
+ strv_free(m->os_release);
+
+ return mfree(m);
+}
+
+static int is_loop_device(const char *path) {
+ char s[SYS_BLOCK_PATH_MAX("/../loop/")];
+ struct stat st;
+
+ assert(path);
+
+ if (stat(path, &st) < 0)
+ return -errno;
+
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ xsprintf_sys_block_path(s, "/loop/", st.st_dev);
+ if (access(s, F_OK) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */
+ xsprintf_sys_block_path(s, "/../loop/", st.st_dev);
+ if (access(s, F_OK) < 0)
+ return errno == ENOENT ? false : -errno;
+ }
+
+ return true;
+}
+
+static int run_fsck(const char *node, const char *fstype) {
+ int r, exit_status;
+ pid_t pid;
+
+ assert(node);
+ assert(fstype);
+
+ r = fsck_exists(fstype);
+ if (r < 0) {
+ log_debug_errno(r, "Couldn't determine whether fsck for %s exists, proceeding anyway.", fstype);
+ return 0;
+ }
+ if (r == 0) {
+ log_debug("Not checking partition %s, as fsck for %s does not exist.", node, fstype);
+ return 0;
+ }
+
+ r = safe_fork("(fsck)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_NULL_STDIO, &pid);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to fork off fsck: %m");
+ if (r == 0) {
+ /* Child */
+ execl("/sbin/fsck", "/sbin/fsck", "-aT", node, NULL);
+ log_debug_errno(errno, "Failed to execl() fsck: %m");
+ _exit(FSCK_OPERATIONAL_ERROR);
+ }
+
+ exit_status = wait_for_terminate_and_check("fsck", pid, 0);
+ if (exit_status < 0)
+ return log_debug_errno(exit_status, "Failed to fork off /sbin/fsck: %m");
+
+ if ((exit_status & ~FSCK_ERROR_CORRECTED) != FSCK_SUCCESS) {
+ log_debug("fsck failed with exit status %i.", exit_status);
+
+ if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN), "File system is corrupted, refusing.");
+
+ log_debug("Ignoring fsck error.");
+ }
+
+ return 0;
+}
+
+static int mount_partition(
+ DissectedPartition *m,
+ const char *where,
+ const char *directory,
+ uid_t uid_shift,
+ DissectImageFlags flags) {
+
+ _cleanup_free_ char *chased = NULL, *options = NULL;
+ const char *p, *node, *fstype;
+ bool rw;
+ int r;
+
+ assert(m);
+ assert(where);
+
+ /* Use decrypted node and matching fstype if available, otherwise use the original device */
+ node = m->decrypted_node ?: m->node;
+ fstype = m->decrypted_node ? m->decrypted_fstype: m->fstype;
+
+ if (!m->found || !node)
+ return 0;
+ if (!fstype)
+ return -EAFNOSUPPORT;
+
+ /* We are looking at an encrypted partition? This either means stacked encryption, or the caller didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error for this case. */
+ if (streq(fstype, "crypto_LUKS"))
+ return -EUNATCH;
+
+ rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY);
+
+ if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw) {
+ r = run_fsck(node, fstype);
+ if (r < 0)
+ return r;
+ }
+
+ if (directory) {
+ if (!FLAGS_SET(flags, DISSECT_IMAGE_READ_ONLY)) {
+ /* Automatically create missing mount points, if necessary. */
+ r = mkdir_p_root(where, directory, uid_shift, (gid_t) uid_shift, 0755);
+ if (r < 0)
+ return r;
+ }
+
+ r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased, NULL);
+ if (r < 0)
+ return r;
+
+ p = chased;
+ } else
+ p = where;
+
+ /* If requested, turn on discard support. */
+ if (fstype_can_discard(fstype) &&
+ ((flags & DISSECT_IMAGE_DISCARD) ||
+ ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node) > 0))) {
+ options = strdup("discard");
+ if (!options)
+ return -ENOMEM;
+ }
+
+ if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) {
+ _cleanup_free_ char *uid_option = NULL;
+
+ if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
+ return -ENOMEM;
+
+ if (!strextend_with_separator(&options, ",", uid_option, NULL))
+ return -ENOMEM;
+ }
+
+ if (!isempty(m->mount_options))
+ if (!strextend_with_separator(&options, ",", m->mount_options, NULL))
+ return -ENOMEM;
+
+ if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
+ r = mkdir_p(p, 0755);
+ if (r < 0)
+ return r;
+ }
+
+ r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
+ int r, xbootldr_mounted;
+
+ assert(m);
+ assert(where);
+
+ /* Returns:
+ *
+ * -ENXIO → No root partition found
+ * -EMEDIUMTYPE → DISSECT_IMAGE_VALIDATE_OS set but no os-release file found
+ * -EUNATCH → Encrypted partition found for which no dm-crypt was set up yet
+ * -EUCLEAN → fsck for file system failed
+ * -EBUSY → File system already mounted/used elsewhere (kernel)
+ * -EAFNOSUPPORT → File system type not supported or not known
+ */
+
+ if (!m->partitions[PARTITION_ROOT].found)
+ return -ENXIO;
+
+ if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
+ r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags);
+ if (r < 0)
+ return r;
+ }
+
+ /* Mask DISSECT_IMAGE_MKDIR for all subdirs: the idea is that only the top-level mount point is
+ * created if needed, but the image itself not modified. */
+ flags &= ~DISSECT_IMAGE_MKDIR;
+
+ if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
+ /* For us mounting root always means mounting /usr as well */
+ r = mount_partition(m->partitions + PARTITION_USR, where, "/usr", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ if (flags & DISSECT_IMAGE_VALIDATE_OS) {
+ r = path_is_os_tree(where);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EMEDIUMTYPE;
+ }
+ }
+
+ if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
+ return 0;
+
+ r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ r = mount_partition(m->partitions + PARTITION_VAR, where, "/var", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ r = mount_partition(m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ xbootldr_mounted = mount_partition(m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, flags);
+ if (xbootldr_mounted < 0)
+ return xbootldr_mounted;
+
+ if (m->partitions[PARTITION_ESP].found) {
+ int esp_done = false;
+
+ /* Mount the ESP to /efi if it exists. If it doesn't exist, use /boot instead, but only if it
+ * exists and is empty, and we didn't already mount the XBOOTLDR partition into it. */
+
+ r = chase_symlinks("/efi", where, CHASE_PREFIX_ROOT, NULL, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return r;
+
+ /* /efi doesn't exist. Let's see if /boot is suitable then */
+
+ if (!xbootldr_mounted) {
+ _cleanup_free_ char *p = NULL;
+
+ r = chase_symlinks("/boot", where, CHASE_PREFIX_ROOT, &p, NULL);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return r;
+ } else if (dir_is_empty(p) > 0) {
+ /* It exists and is an empty directory. Let's mount the ESP there. */
+ r = mount_partition(m->partitions + PARTITION_ESP, where, "/boot", uid_shift, flags);
+ if (r < 0)
+ return r;
+
+ esp_done = true;
+ }
+ }
+ }
+
+ if (!esp_done) {
+ /* OK, let's mount the ESP now to /efi (possibly creating the dir if missing) */
+
+ r = mount_partition(m->partitions + PARTITION_ESP, where, "/efi", uid_shift, flags);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) {
+ int r;
+
+ assert(m);
+ assert(where);
+
+ r = dissected_image_mount(m, where, uid_shift, flags);
+ if (r == -ENXIO)
+ return log_error_errno(r, "Not root file system found in image.");
+ if (r == -EMEDIUMTYPE)
+ return log_error_errno(r, "No suitable os-release file in image found.");
+ if (r == -EUNATCH)
+ return log_error_errno(r, "Encrypted file system discovered, but decryption not requested.");
+ if (r == -EUCLEAN)
+ return log_error_errno(r, "File system check on image failed.");
+ if (r == -EBUSY)
+ return log_error_errno(r, "File system already mounted elsewhere.");
+ if (r == -EAFNOSUPPORT)
+ return log_error_errno(r, "File system type not supported or not known.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to mount image: %m");
+
+ return r;
+}
+
+#if HAVE_LIBCRYPTSETUP
+typedef struct DecryptedPartition {
+ struct crypt_device *device;
+ char *name;
+ bool relinquished;
+} DecryptedPartition;
+
+struct DecryptedImage {
+ DecryptedPartition *decrypted;
+ size_t n_decrypted;
+ size_t n_allocated;
+};
+#endif
+
+DecryptedImage* decrypted_image_unref(DecryptedImage* d) {
+#if HAVE_LIBCRYPTSETUP
+ size_t i;
+ int r;
+
+ if (!d)
+ return NULL;
+
+ for (i = 0; i < d->n_decrypted; i++) {
+ DecryptedPartition *p = d->decrypted + i;
+
+ if (p->device && p->name && !p->relinquished) {
+ r = sym_crypt_deactivate_by_name(p->device, p->name, 0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name);
+ }
+
+ if (p->device)
+ sym_crypt_free(p->device);
+ free(p->name);
+ }
+
+ free(d);
+#endif
+ return NULL;
+}
+
+#if HAVE_LIBCRYPTSETUP
+
+static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) {
+ _cleanup_free_ char *name = NULL, *node = NULL;
+ const char *base;
+
+ assert(original_node);
+ assert(suffix);
+ assert(ret_name);
+ assert(ret_node);
+
+ base = strrchr(original_node, '/');
+ if (!base)
+ base = original_node;
+ else
+ base++;
+ if (isempty(base))
+ return -EINVAL;
+
+ name = strjoin(base, suffix);
+ if (!name)
+ return -ENOMEM;
+ if (!filename_is_valid(name))
+ return -EINVAL;
+
+ node = path_join(sym_crypt_get_dir(), name);
+ if (!node)
+ return -ENOMEM;
+
+ *ret_name = TAKE_PTR(name);
+ *ret_node = TAKE_PTR(node);
+
+ return 0;
+}
+
+static int decrypt_partition(
+ DissectedPartition *m,
+ const char *passphrase,
+ DissectImageFlags flags,
+ DecryptedImage *d) {
+
+ _cleanup_free_ char *node = NULL, *name = NULL;
+ _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
+ int r;
+
+ assert(m);
+ assert(d);
+
+ if (!m->found || !m->node || !m->fstype)
+ return 0;
+
+ if (!streq(m->fstype, "crypto_LUKS"))
+ return 0;
+
+ if (!passphrase)
+ return -ENOKEY;
+
+ r = dlopen_cryptsetup();
+ if (r < 0)
+ return r;
+
+ r = make_dm_name_and_node(m->node, "-decrypted", &name, &node);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
+ return -ENOMEM;
+
+ r = sym_crypt_init(&cd, m->node);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to initialize dm-crypt: %m");
+
+ cryptsetup_enable_logging(cd);
+
+ r = sym_crypt_load(cd, CRYPT_LUKS, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load LUKS metadata: %m");
+
+ r = sym_crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase),
+ ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) |
+ ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to activate LUKS device: %m");
+ return r == -EPERM ? -EKEYREJECTED : r;
+ }
+
+ d->decrypted[d->n_decrypted++] = (DecryptedPartition) {
+ .name = TAKE_PTR(name),
+ .device = TAKE_PTR(cd),
+ };
+
+ m->decrypted_node = TAKE_PTR(node);
+
+ return 0;
+}
+
+static int verity_can_reuse(
+ const VeritySettings *verity,
+ const char *name,
+ struct crypt_device **ret_cd) {
+
+ /* If the same volume was already open, check that the root hashes match, and reuse it if they do */
+ _cleanup_free_ char *root_hash_existing = NULL;
+ _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
+ struct crypt_params_verity crypt_params = {};
+ size_t root_hash_existing_size;
+ int r;
+
+ assert(verity);
+ assert(name);
+ assert(ret_cd);
+
+ r = sym_crypt_init_by_name(&cd, name);
+ if (r < 0)
+ return log_debug_errno(r, "Error opening verity device, crypt_init_by_name failed: %m");
+
+ r = sym_crypt_get_verity_info(cd, &crypt_params);
+ if (r < 0)
+ return log_debug_errno(r, "Error opening verity device, crypt_get_verity_info failed: %m");
+
+ root_hash_existing_size = verity->root_hash_size;
+ root_hash_existing = malloc0(root_hash_existing_size);
+ if (!root_hash_existing)
+ return -ENOMEM;
+
+ r = sym_crypt_volume_key_get(cd, CRYPT_ANY_SLOT, root_hash_existing, &root_hash_existing_size, NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Error opening verity device, crypt_volume_key_get failed: %m");
+ if (verity->root_hash_size != root_hash_existing_size ||
+ memcmp(root_hash_existing, verity->root_hash, verity->root_hash_size) != 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but root hashes are different.");
+
+#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
+ /* Ensure that, if signatures are supported, we only reuse the device if the previous mount used the
+ * same settings, so that a previous unsigned mount will not be reused if the user asks to use
+ * signing for the new one, and viceversa. */
+ if (!!verity->root_hash_sig != !!(crypt_params.flags & CRYPT_VERITY_ROOT_HASH_SIGNATURE))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Error opening verity device, it already exists but signature settings are not the same.");
+#endif
+
+ *ret_cd = TAKE_PTR(cd);
+ return 0;
+}
+
+static inline void dm_deferred_remove_clean(char *name) {
+ if (!name)
+ return;
+
+ (void) sym_crypt_deactivate_by_name(NULL, name, CRYPT_DEACTIVATE_DEFERRED);
+ free(name);
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char *, dm_deferred_remove_clean);
+
+static int verity_partition(
+ PartitionDesignator designator,
+ DissectedPartition *m,
+ DissectedPartition *v,
+ const VeritySettings *verity,
+ DissectImageFlags flags,
+ DecryptedImage *d) {
+
+ _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
+ _cleanup_(dm_deferred_remove_cleanp) char *restore_deferred_remove = NULL;
+ _cleanup_free_ char *node = NULL, *name = NULL;
+ int r;
+
+ assert(m);
+ assert(v || (verity && verity->data_path));
+
+ if (!verity || !verity->root_hash)
+ return 0;
+ if (!((verity->designator < 0 && designator == PARTITION_ROOT) ||
+ (verity->designator == designator)))
+ return 0;
+
+ if (!m->found || !m->node || !m->fstype)
+ return 0;
+ if (!verity->data_path) {
+ if (!v->found || !v->node || !v->fstype)
+ return 0;
+
+ if (!streq(v->fstype, "DM_verity_hash"))
+ return 0;
+ }
+
+ r = dlopen_cryptsetup();
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE)) {
+ /* Use the roothash, which is unique per volume, as the device node name, so that it can be reused */
+ _cleanup_free_ char *root_hash_encoded = NULL;
+
+ root_hash_encoded = hexmem(verity->root_hash, verity->root_hash_size);
+ if (!root_hash_encoded)
+ return -ENOMEM;
+
+ r = make_dm_name_and_node(root_hash_encoded, "-verity", &name, &node);
+ } else
+ r = make_dm_name_and_node(m->node, "-verity", &name, &node);
+ if (r < 0)
+ return r;
+
+ r = sym_crypt_init(&cd, verity->data_path ?: v->node);
+ if (r < 0)
+ return r;
+
+ cryptsetup_enable_logging(cd);
+
+ r = sym_crypt_load(cd, CRYPT_VERITY, NULL);
+ if (r < 0)
+ return r;
+
+ r = sym_crypt_set_data_device(cd, m->node);
+ if (r < 0)
+ return r;
+
+ if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1))
+ return -ENOMEM;
+
+ /* If activating fails because the device already exists, check the metadata and reuse it if it matches.
+ * In case of ENODEV/ENOENT, which can happen if another process is activating at the exact same time,
+ * retry a few times before giving up. */
+ for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) {
+ if (verity->root_hash_sig) {
+#if HAVE_CRYPT_ACTIVATE_BY_SIGNED_KEY
+ r = sym_crypt_activate_by_signed_key(
+ cd,
+ name,
+ verity->root_hash,
+ verity->root_hash_size,
+ verity->root_hash_sig,
+ verity->root_hash_sig_size,
+ CRYPT_ACTIVATE_READONLY);
+#else
+ r = log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Activation of verity device with signature requested, but not supported by %s due to missing crypt_activate_by_signed_key().", program_invocation_short_name);
+#endif
+ } else
+ r = sym_crypt_activate_by_volume_key(
+ cd,
+ name,
+ verity->root_hash,
+ verity->root_hash_size,
+ CRYPT_ACTIVATE_READONLY);
+ /* libdevmapper can return EINVAL when the device is already in the activation stage.
+ * There's no way to distinguish this situation from a genuine error due to invalid
+ * parameters, so immediately fall back to activating the device with a unique name.
+ * Improvements in libcrypsetup can ensure this never happens:
+ * https://gitlab.com/cryptsetup/cryptsetup/-/merge_requests/96 */
+ if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
+ return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
+ if (!IN_SET(r,
+ 0, /* Success */
+ -EEXIST, /* Volume is already open and ready to be used */
+ -EBUSY, /* Volume is being opened but not ready, crypt_init_by_name can fetch details */
+ -ENODEV /* Volume is being opened but not ready, crypt_init_by_name would fail, try to open again */))
+ return r;
+ if (IN_SET(r, -EEXIST, -EBUSY)) {
+ struct crypt_device *existing_cd = NULL;
+
+ if (!restore_deferred_remove){
+ /* To avoid races, disable automatic removal on umount while setting up the new device. Restore it on failure. */
+ r = dm_deferred_remove_cancel(name);
+ /* If activation returns EBUSY there might be no deferred removal to cancel, that's fine */
+ if (r < 0 && r != -ENXIO)
+ return log_debug_errno(r, "Disabling automated deferred removal for verity device %s failed: %m", node);
+ if (r == 0) {
+ restore_deferred_remove = strdup(name);
+ if (!restore_deferred_remove)
+ return -ENOMEM;
+ }
+ }
+
+ r = verity_can_reuse(verity, name, &existing_cd);
+ /* Same as above, -EINVAL can randomly happen when it actually means -EEXIST */
+ if (r == -EINVAL && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
+ return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
+ if (!IN_SET(r, 0, -ENODEV, -ENOENT, -EBUSY))
+ return log_debug_errno(r, "Checking whether existing verity device %s can be reused failed: %m", node);
+ if (r == 0) {
+ /* devmapper might say that the device exists, but the devlink might not yet have been
+ * created. Check and wait for the udev event in that case. */
+ r = device_wait_for_devlink(node, "block", usec_add(now(CLOCK_MONOTONIC), 100 * USEC_PER_MSEC), NULL);
+ /* Fallback to activation with a unique device if it's taking too long */
+ if (r == -ETIMEDOUT)
+ break;
+ if (r < 0)
+ return r;
+
+ if (cd)
+ sym_crypt_free(cd);
+ cd = existing_cd;
+ }
+ }
+ if (r == 0)
+ break;
+
+ /* Device is being opened by another process, but it has not finished yet, yield for 2ms */
+ (void) usleep(2 * USEC_PER_MSEC);
+ }
+
+ /* An existing verity device was reported by libcryptsetup/libdevmapper, but we can't use it at this time.
+ * Fall back to activating it with a unique device name. */
+ if (r != 0 && FLAGS_SET(flags, DISSECT_IMAGE_VERITY_SHARE))
+ return verity_partition(designator, m, v, verity, flags & ~DISSECT_IMAGE_VERITY_SHARE, d);
+
+ /* Everything looks good and we'll be able to mount the device, so deferred remove will be re-enabled at that point. */
+ restore_deferred_remove = mfree(restore_deferred_remove);
+
+ d->decrypted[d->n_decrypted++] = (DecryptedPartition) {
+ .name = TAKE_PTR(name),
+ .device = TAKE_PTR(cd),
+ };
+
+ m->decrypted_node = TAKE_PTR(node);
+
+ return 0;
+}
+#endif
+
+int dissected_image_decrypt(
+ DissectedImage *m,
+ const char *passphrase,
+ const VeritySettings *verity,
+ DissectImageFlags flags,
+ DecryptedImage **ret) {
+
+#if HAVE_LIBCRYPTSETUP
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL;
+ int r;
+#endif
+
+ assert(m);
+ assert(!verity || verity->root_hash || verity->root_hash_size == 0);
+
+ /* Returns:
+ *
+ * = 0 → There was nothing to decrypt
+ * > 0 → Decrypted successfully
+ * -ENOKEY → There's something to decrypt but no key was supplied
+ * -EKEYREJECTED → Passed key was not correct
+ */
+
+ if (verity && verity->root_hash && verity->root_hash_size < sizeof(sd_id128_t))
+ return -EINVAL;
+
+ if (!m->encrypted && !m->verity) {
+ *ret = NULL;
+ return 0;
+ }
+
+#if HAVE_LIBCRYPTSETUP
+ d = new0(DecryptedImage, 1);
+ if (!d)
+ return -ENOMEM;
+
+ for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) {
+ DissectedPartition *p = m->partitions + i;
+ PartitionDesignator k;
+
+ if (!p->found)
+ continue;
+
+ r = decrypt_partition(p, passphrase, flags, d);
+ if (r < 0)
+ return r;
+
+ k = PARTITION_VERITY_OF(i);
+ if (k >= 0) {
+ r = verity_partition(i, p, m->partitions + k, verity, flags | DISSECT_IMAGE_VERITY_SHARE, d);
+ if (r < 0)
+ return r;
+ }
+
+ if (!p->decrypted_fstype && p->decrypted_node) {
+ r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype);
+ if (r < 0 && r != -EUCLEAN)
+ return r;
+ }
+ }
+
+ *ret = TAKE_PTR(d);
+
+ return 1;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+int dissected_image_decrypt_interactively(
+ DissectedImage *m,
+ const char *passphrase,
+ const VeritySettings *verity,
+ DissectImageFlags flags,
+ DecryptedImage **ret) {
+
+ _cleanup_strv_free_erase_ char **z = NULL;
+ int n = 3, r;
+
+ if (passphrase)
+ n--;
+
+ for (;;) {
+ r = dissected_image_decrypt(m, passphrase, verity, flags, ret);
+ if (r >= 0)
+ return r;
+ if (r == -EKEYREJECTED)
+ log_error_errno(r, "Incorrect passphrase, try again!");
+ else if (r != -ENOKEY)
+ return log_error_errno(r, "Failed to decrypt image: %m");
+
+ if (--n < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EKEYREJECTED),
+ "Too many retries.");
+
+ z = strv_free(z);
+
+ r = ask_password_auto("Please enter image passphrase:", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query for passphrase: %m");
+
+ passphrase = z[0];
+ }
+}
+
+int decrypted_image_relinquish(DecryptedImage *d) {
+
+#if HAVE_LIBCRYPTSETUP
+ size_t i;
+ int r;
+#endif
+
+ assert(d);
+
+ /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so
+ * that we don't clean it up ourselves either anymore */
+
+#if HAVE_LIBCRYPTSETUP
+ for (i = 0; i < d->n_decrypted; i++) {
+ DecryptedPartition *p = d->decrypted + i;
+
+ if (p->relinquished)
+ continue;
+
+ r = sym_crypt_deactivate_by_name(NULL, p->name, CRYPT_DEACTIVATE_DEFERRED);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name);
+
+ p->relinquished = true;
+ }
+#endif
+
+ return 0;
+}
+
+static char *build_auxiliary_path(const char *image, const char *suffix) {
+ const char *e;
+ char *n;
+
+ assert(image);
+ assert(suffix);
+
+ e = endswith(image, ".raw");
+ if (!e)
+ return strjoin(e, suffix);
+
+ n = new(char, e - image + strlen(suffix) + 1);
+ if (!n)
+ return NULL;
+
+ strcpy(mempcpy(n, image, e - image), suffix);
+ return n;
+}
+
+void verity_settings_done(VeritySettings *v) {
+ assert(v);
+
+ v->root_hash = mfree(v->root_hash);
+ v->root_hash_size = 0;
+
+ v->root_hash_sig = mfree(v->root_hash_sig);
+ v->root_hash_sig_size = 0;
+
+ v->data_path = mfree(v->data_path);
+}
+
+int verity_settings_load(
+ VeritySettings *verity,
+ const char *image,
+ const char *root_hash_path,
+ const char *root_hash_sig_path) {
+
+ _cleanup_free_ void *root_hash = NULL, *root_hash_sig = NULL;
+ size_t root_hash_size = 0, root_hash_sig_size = 0;
+ _cleanup_free_ char *verity_data_path = NULL;
+ PartitionDesignator designator;
+ int r;
+
+ assert(verity);
+ assert(image);
+ assert(verity->designator < 0 || IN_SET(verity->designator, PARTITION_ROOT, PARTITION_USR));
+
+ /* If we are asked to load the root hash for a device node, exit early */
+ if (is_device_path(image))
+ return 0;
+
+ designator = verity->designator;
+
+ /* We only fill in what isn't already filled in */
+
+ if (!verity->root_hash) {
+ _cleanup_free_ char *text = NULL;
+
+ if (root_hash_path) {
+ /* If explicitly specified it takes precedence */
+ r = read_one_line_file(root_hash_path, &text);
+ if (r < 0)
+ return r;
+
+ if (designator < 0)
+ designator = PARTITION_ROOT;
+ } else {
+ /* Otherwise look for xattr and separate file, and first for the data for root and if
+ * that doesn't exist for /usr */
+
+ if (designator < 0 || designator == PARTITION_ROOT) {
+ r = getxattr_malloc(image, "user.verity.roothash", &text, true);
+ if (r < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ if (!IN_SET(r, -ENODATA, -ENOENT) && !ERRNO_IS_NOT_SUPPORTED(r))
+ return r;
+
+ p = build_auxiliary_path(image, ".roothash");
+ if (!p)
+ return -ENOMEM;
+
+ r = read_one_line_file(p, &text);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ }
+
+ if (text)
+ designator = PARTITION_ROOT;
+ }
+
+ if (!text && (designator < 0 || designator == PARTITION_USR)) {
+ /* So in the "roothash" xattr/file name above the "root" of course primarily
+ * refers to the root of the Verity Merkle tree. But coincidentally it also
+ * is the hash for the *root* file system, i.e. the "root" neatly refers to
+ * two distinct concepts called "root". Taking benefit of this happy
+ * coincidence we call the file with the root hash for the /usr/ file system
+ * `usrhash`, because `usrroothash` or `rootusrhash` would just be too
+ * confusing. We thus drop the reference to the root of the Merkle tree, and
+ * just indicate which file system it's about. */
+ r = getxattr_malloc(image, "user.verity.usrhash", &text, true);
+ if (r < 0) {
+ _cleanup_free_ char *p = NULL;
+
+ if (!IN_SET(r, -ENODATA, -ENOENT) && !ERRNO_IS_NOT_SUPPORTED(r))
+ return r;
+
+ p = build_auxiliary_path(image, ".usrhash");
+ if (!p)
+ return -ENOMEM;
+
+ r = read_one_line_file(p, &text);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ }
+
+ if (text)
+ designator = PARTITION_USR;
+ }
+ }
+
+ if (text) {
+ r = unhexmem(text, strlen(text), &root_hash, &root_hash_size);
+ if (r < 0)
+ return r;
+ if (root_hash_size < sizeof(sd_id128_t))
+ return -EINVAL;
+ }
+ }
+
+ if ((root_hash || verity->root_hash) && !verity->root_hash_sig) {
+ if (root_hash_sig_path) {
+ r = read_full_file_full(AT_FDCWD, root_hash_sig_path, 0, NULL, (char**) &root_hash_sig, &root_hash_sig_size);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ if (designator < 0)
+ designator = PARTITION_ROOT;
+ } else {
+ if (designator < 0 || designator == PARTITION_ROOT) {
+ _cleanup_free_ char *p = NULL;
+
+ /* Follow naming convention recommended by the relevant RFC:
+ * https://tools.ietf.org/html/rfc5751#section-3.2.1 */
+ p = build_auxiliary_path(image, ".roothash.p7s");
+ if (!p)
+ return -ENOMEM;
+
+ r = read_full_file_full(AT_FDCWD, p, 0, NULL, (char**) &root_hash_sig, &root_hash_sig_size);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ if (r >= 0)
+ designator = PARTITION_ROOT;
+ }
+
+ if (!root_hash_sig && (designator < 0 || designator == PARTITION_USR)) {
+ _cleanup_free_ char *p = NULL;
+
+ p = build_auxiliary_path(image, ".usrhash.p7s");
+ if (!p)
+ return -ENOMEM;
+
+ r = read_full_file_full(AT_FDCWD, p, 0, NULL, (char**) &root_hash_sig, &root_hash_sig_size);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ if (r >= 0)
+ designator = PARTITION_USR;
+ }
+ }
+
+ if (root_hash_sig && root_hash_sig_size == 0) /* refuse empty size signatures */
+ return -EINVAL;
+ }
+
+ if (!verity->data_path) {
+ _cleanup_free_ char *p = NULL;
+
+ p = build_auxiliary_path(image, ".verity");
+ if (!p)
+ return -ENOMEM;
+
+ if (access(p, F_OK) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else
+ verity_data_path = TAKE_PTR(p);
+ }
+
+ if (root_hash) {
+ verity->root_hash = TAKE_PTR(root_hash);
+ verity->root_hash_size = root_hash_size;
+ }
+
+ if (root_hash_sig) {
+ verity->root_hash_sig = TAKE_PTR(root_hash_sig);
+ verity->root_hash_sig_size = root_hash_sig_size;
+ }
+
+ if (verity_data_path)
+ verity->data_path = TAKE_PTR(verity_data_path);
+
+ if (verity->designator < 0)
+ verity->designator = designator;
+
+ return 1;
+}
+
+int dissected_image_acquire_metadata(DissectedImage *m) {
+
+ enum {
+ META_HOSTNAME,
+ META_MACHINE_ID,
+ META_MACHINE_INFO,
+ META_OS_RELEASE,
+ _META_MAX,
+ };
+
+ static const char *const paths[_META_MAX] = {
+ [META_HOSTNAME] = "/etc/hostname\0",
+ [META_MACHINE_ID] = "/etc/machine-id\0",
+ [META_MACHINE_INFO] = "/etc/machine-info\0",
+ [META_OS_RELEASE] = "/etc/os-release\0"
+ "/usr/lib/os-release\0",
+ };
+
+ _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
+ _cleanup_close_pair_ int error_pipe[2] = { -1, -1 };
+ _cleanup_(rmdir_and_freep) char *t = NULL;
+ _cleanup_(sigkill_waitp) pid_t child = 0;
+ sd_id128_t machine_id = SD_ID128_NULL;
+ _cleanup_free_ char *hostname = NULL;
+ unsigned n_meta_initialized = 0, k;
+ int fds[2 * _META_MAX], r, v;
+ ssize_t n;
+
+ BLOCK_SIGNALS(SIGCHLD);
+
+ assert(m);
+
+ for (; n_meta_initialized < _META_MAX; n_meta_initialized ++)
+ if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t);
+ if (r < 0)
+ goto finish;
+
+ if (pipe2(error_pipe, O_CLOEXEC) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child);
+ if (r < 0)
+ goto finish;
+ if (r == 0) {
+ error_pipe[0] = safe_close(error_pipe[0]);
+
+ r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_VALIDATE_OS);
+ if (r < 0) {
+ /* Let parent know the error */
+ (void) write(error_pipe[1], &r, sizeof(r));
+
+ log_debug_errno(r, "Failed to mount dissected image: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ for (k = 0; k < _META_MAX; k++) {
+ _cleanup_close_ int fd = -ENOENT;
+ const char *p;
+
+ fds[2*k] = safe_close(fds[2*k]);
+
+ NULSTR_FOREACH(p, paths[k]) {
+ fd = chase_symlinks_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
+ if (fd >= 0)
+ break;
+ }
+ if (fd < 0) {
+ log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]);
+ fds[2*k+1] = safe_close(fds[2*k+1]);
+ continue;
+ }
+
+ r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0);
+ if (r < 0) {
+ (void) write(error_pipe[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ fds[2*k+1] = safe_close(fds[2*k+1]);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ error_pipe[1] = safe_close(error_pipe[1]);
+
+ for (k = 0; k < _META_MAX; k++) {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ fds[2*k+1] = safe_close(fds[2*k+1]);
+
+ f = take_fdopen(&fds[2*k], "r");
+ if (!f) {
+ r = -errno;
+ goto finish;
+ }
+
+ switch (k) {
+
+ case META_HOSTNAME:
+ r = read_etc_hostname_stream(f, &hostname);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/hostname: %m");
+
+ break;
+
+ case META_MACHINE_ID: {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/machine-id: %m");
+ else if (r == 33) {
+ r = sd_id128_from_string(line, &machine_id);
+ if (r < 0)
+ log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line);
+ } else if (r == 0)
+ log_debug("/etc/machine-id file is empty.");
+ else if (streq(line, "uninitialized"))
+ log_debug("/etc/machine-id file is uninitialized (likely aborted first boot).");
+ else
+ log_debug("/etc/machine-id has unexpected length %i.", r);
+
+ break;
+ }
+
+ case META_MACHINE_INFO:
+ r = load_env_file_pairs(f, "machine-info", &machine_info);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read /etc/machine-info: %m");
+
+ break;
+
+ case META_OS_RELEASE:
+ r = load_env_file_pairs(f, "os-release", &os_release);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read OS release file: %m");
+
+ break;
+ }
+ }
+
+ r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
+ child = 0;
+ if (r < 0)
+ return r;
+
+ n = read(error_pipe[0], &v, sizeof(v));
+ if (n < 0)
+ return -errno;
+ if (n == sizeof(v))
+ return v; /* propagate error sent to us from child */
+ if (n != 0)
+ return -EIO;
+
+ if (r != EXIT_SUCCESS)
+ return -EPROTO;
+
+ free_and_replace(m->hostname, hostname);
+ m->machine_id = machine_id;
+ strv_free_and_replace(m->machine_info, machine_info);
+ strv_free_and_replace(m->os_release, os_release);
+
+finish:
+ for (k = 0; k < n_meta_initialized; k++)
+ safe_close_pair(fds + 2*k);
+
+ return r;
+}
+
+int dissect_image_and_warn(
+ int fd,
+ const char *name,
+ const VeritySettings *verity,
+ const MountOptions *mount_options,
+ DissectImageFlags flags,
+ DissectedImage **ret) {
+
+ _cleanup_free_ char *buffer = NULL;
+ int r;
+
+ if (!name) {
+ r = fd_get_path(fd, &buffer);
+ if (r < 0)
+ return r;
+
+ name = buffer;
+ }
+
+ r = dissect_image(fd, verity, mount_options, flags, ret);
+ switch (r) {
+
+ case -EOPNOTSUPP:
+ return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support.");
+
+ case -ENOPKG:
+ return log_error_errno(r, "Couldn't identify a suitable partition table or file system in '%s'.", name);
+
+ case -EADDRNOTAVAIL:
+ return log_error_errno(r, "No root partition for specified root hash found in '%s'.", name);
+
+ case -ENOTUNIQ:
+ return log_error_errno(r, "Multiple suitable root partitions found in image '%s'.", name);
+
+ case -ENXIO:
+ return log_error_errno(r, "No suitable root partition found in image '%s'.", name);
+
+ case -EPROTONOSUPPORT:
+ return log_error_errno(r, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name);
+
+ default:
+ if (r < 0)
+ return log_error_errno(r, "Failed to dissect image '%s': %m", name);
+
+ return r;
+ }
+}
+
+bool dissected_image_can_do_verity(const DissectedImage *image, PartitionDesignator partition_designator) {
+ if (image->single_file_system)
+ return partition_designator == PARTITION_ROOT && image->can_verity;
+
+ return PARTITION_VERITY_OF(partition_designator) >= 0;
+}
+
+bool dissected_image_has_verity(const DissectedImage *image, PartitionDesignator partition_designator) {
+ int k;
+
+ if (image->single_file_system)
+ return partition_designator == PARTITION_ROOT && image->verity;
+
+ k = PARTITION_VERITY_OF(partition_designator);
+ return k >= 0 && image->partitions[k].found;
+}
+
+MountOptions* mount_options_free_all(MountOptions *options) {
+ MountOptions *m;
+
+ while ((m = options)) {
+ LIST_REMOVE(mount_options, options, m);
+ free(m->options);
+ free(m);
+ }
+
+ return NULL;
+}
+
+const char* mount_options_from_designator(const MountOptions *options, PartitionDesignator designator) {
+ const MountOptions *m;
+
+ LIST_FOREACH(mount_options, m, options)
+ if (designator == m->partition_designator && !isempty(m->options))
+ return m->options;
+
+ return NULL;
+}
+
+int mount_image_privately_interactively(
+ const char *image,
+ DissectImageFlags flags,
+ char **ret_directory,
+ LoopDevice **ret_loop_device,
+ DecryptedImage **ret_decrypted_image) {
+
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
+ _cleanup_(rmdir_and_freep) char *created_dir = NULL;
+ _cleanup_free_ char *temp = NULL;
+ int r;
+
+ /* Mounts an OS image at a temporary place, inside a newly created mount namespace of our own. This
+ * is used by tools such as systemd-tmpfiles or systemd-firstboot to operate on some disk image
+ * easily. */
+
+ assert(image);
+ assert(ret_directory);
+ assert(ret_loop_device);
+ assert(ret_decrypted_image);
+
+ r = tempfn_random_child(NULL, program_invocation_short_name, &temp);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate temporary mount directory: %m");
+
+ r = loop_device_make_by_path(
+ image,
+ FLAGS_SET(flags, DISSECT_IMAGE_READ_ONLY) ? O_RDONLY : O_RDWR,
+ FLAGS_SET(flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
+ &d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up loopback device: %m");
+
+ r = dissect_image_and_warn(d->fd, image, NULL, NULL, flags, &dissected_image);
+ if (r < 0)
+ return r;
+
+ r = dissected_image_decrypt_interactively(dissected_image, NULL, NULL, flags, &decrypted_image);
+ if (r < 0)
+ return r;
+
+ r = detach_mount_namespace();
+ if (r < 0)
+ return log_error_errno(r, "Failed to detach mount namespace: %m");
+
+ r = mkdir_p(temp, 0700);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create mount point: %m");
+
+ created_dir = TAKE_PTR(temp);
+
+ r = dissected_image_mount_and_warn(dissected_image, created_dir, UID_INVALID, flags);
+ if (r < 0)
+ return r;
+
+ if (decrypted_image) {
+ r = decrypted_image_relinquish(decrypted_image);
+ if (r < 0)
+ return log_error_errno(r, "Failed to relinquish DM devices: %m");
+ }
+
+ loop_device_relinquish(d);
+
+ *ret_directory = TAKE_PTR(created_dir);
+ *ret_loop_device = TAKE_PTR(d);
+ *ret_decrypted_image = TAKE_PTR(decrypted_image);
+
+ return 0;
+}
+
+static const char *const partition_designator_table[] = {
+ [PARTITION_ROOT] = "root",
+ [PARTITION_ROOT_SECONDARY] = "root-secondary",
+ [PARTITION_USR] = "usr",
+ [PARTITION_USR_SECONDARY] = "usr-secondary",
+ [PARTITION_HOME] = "home",
+ [PARTITION_SRV] = "srv",
+ [PARTITION_ESP] = "esp",
+ [PARTITION_XBOOTLDR] = "xbootldr",
+ [PARTITION_SWAP] = "swap",
+ [PARTITION_ROOT_VERITY] = "root-verity",
+ [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity",
+ [PARTITION_USR_VERITY] = "usr-verity",
+ [PARTITION_USR_SECONDARY_VERITY] = "usr-secondary-verity",
+ [PARTITION_TMP] = "tmp",
+ [PARTITION_VAR] = "var",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(partition_designator, PartitionDesignator);
diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h
new file mode 100644
index 0000000..3b30e08
--- /dev/null
+++ b/src/shared/dissect-image.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+#include "list.h"
+#include "loop-util.h"
+#include "macro.h"
+
+typedef struct DissectedImage DissectedImage;
+typedef struct DissectedPartition DissectedPartition;
+typedef struct DecryptedImage DecryptedImage;
+typedef struct MountOptions MountOptions;
+typedef struct VeritySettings VeritySettings;
+
+struct DissectedPartition {
+ bool found:1;
+ bool rw:1;
+ int partno; /* -1 if there was no partition and the images contains a file system directly */
+ int architecture; /* Intended architecture: either native, secondary or unset (-1). */
+ sd_id128_t uuid; /* Partition entry UUID as reported by the GPT */
+ char *fstype;
+ char *node;
+ char *decrypted_node;
+ char *decrypted_fstype;
+ char *mount_options;
+};
+
+typedef enum PartitionDesignator {
+ PARTITION_ROOT,
+ PARTITION_ROOT_SECONDARY, /* Secondary architecture */
+ PARTITION_USR,
+ PARTITION_USR_SECONDARY,
+ PARTITION_HOME,
+ PARTITION_SRV,
+ PARTITION_ESP,
+ PARTITION_XBOOTLDR,
+ PARTITION_SWAP,
+ PARTITION_ROOT_VERITY, /* verity data for the PARTITION_ROOT partition */
+ PARTITION_ROOT_SECONDARY_VERITY, /* verity data for the PARTITION_ROOT_SECONDARY partition */
+ PARTITION_USR_VERITY,
+ PARTITION_USR_SECONDARY_VERITY,
+ PARTITION_TMP,
+ PARTITION_VAR,
+ _PARTITION_DESIGNATOR_MAX,
+ _PARTITION_DESIGNATOR_INVALID = -1
+} PartitionDesignator;
+
+static inline PartitionDesignator PARTITION_VERITY_OF(PartitionDesignator p) {
+ switch (p) {
+
+ case PARTITION_ROOT:
+ return PARTITION_ROOT_VERITY;
+
+ case PARTITION_ROOT_SECONDARY:
+ return PARTITION_ROOT_SECONDARY_VERITY;
+
+ case PARTITION_USR:
+ return PARTITION_USR_VERITY;
+
+ case PARTITION_USR_SECONDARY:
+ return PARTITION_USR_SECONDARY_VERITY;
+
+ default:
+ return _PARTITION_DESIGNATOR_INVALID;
+ }
+}
+
+typedef enum DissectImageFlags {
+ DISSECT_IMAGE_READ_ONLY = 1 << 0,
+ DISSECT_IMAGE_DISCARD_ON_LOOP = 1 << 1, /* Turn on "discard" if on a loop device and file system supports it */
+ DISSECT_IMAGE_DISCARD = 1 << 2, /* Turn on "discard" if file system supports it, on all block devices */
+ DISSECT_IMAGE_DISCARD_ON_CRYPTO = 1 << 3, /* Turn on "discard" also on crypto devices */
+ DISSECT_IMAGE_DISCARD_ANY = DISSECT_IMAGE_DISCARD_ON_LOOP |
+ DISSECT_IMAGE_DISCARD |
+ DISSECT_IMAGE_DISCARD_ON_CRYPTO,
+ DISSECT_IMAGE_GPT_ONLY = 1 << 4, /* Only recognize images with GPT partition tables */
+ DISSECT_IMAGE_REQUIRE_ROOT = 1 << 5, /* Don't accept disks without root partition (and if no partition table or only single generic partition, assume it's root) */
+ DISSECT_IMAGE_MOUNT_ROOT_ONLY = 1 << 6, /* Mount only the root and /usr partitions */
+ DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY = 1 << 7, /* Mount only the non-root and non-/usr partitions */
+ DISSECT_IMAGE_VALIDATE_OS = 1 << 8, /* Refuse mounting images that aren't identifiable as OS images */
+ DISSECT_IMAGE_NO_UDEV = 1 << 9, /* Don't wait for udev initializing things */
+ DISSECT_IMAGE_RELAX_VAR_CHECK = 1 << 10, /* Don't insist that the UUID of /var is hashed from /etc/machine-id */
+ DISSECT_IMAGE_FSCK = 1 << 11, /* File system check the partition before mounting (no effect when combined with DISSECT_IMAGE_READ_ONLY) */
+ DISSECT_IMAGE_NO_PARTITION_TABLE = 1 << 12, /* Only recognize single file system images */
+ DISSECT_IMAGE_VERITY_SHARE = 1 << 13, /* When activating a verity device, reuse existing one if already open */
+ DISSECT_IMAGE_MKDIR = 1 << 14, /* Make directory to mount right before mounting, if missing */
+} DissectImageFlags;
+
+struct DissectedImage {
+ bool encrypted:1;
+ bool verity:1; /* verity available and usable */
+ bool can_verity:1; /* verity available, but not necessarily used */
+ bool single_file_system:1; /* MBR/GPT or single file system */
+
+ DissectedPartition partitions[_PARTITION_DESIGNATOR_MAX];
+
+ char *hostname;
+ sd_id128_t machine_id;
+ char **machine_info;
+ char **os_release;
+};
+
+struct MountOptions {
+ PartitionDesignator partition_designator;
+ char *options;
+ LIST_FIELDS(MountOptions, mount_options);
+};
+
+struct VeritySettings {
+ /* Binary root hash for the Verity Merkle tree */
+ void *root_hash;
+ size_t root_hash_size;
+
+ /* PKCS#7 signature of the above */
+ void *root_hash_sig;
+ size_t root_hash_sig_size;
+
+ /* Path to the verity data file, if stored externally */
+ char *data_path;
+
+ /* PARTITION_ROOT or PARTITION_USR, depending on what these Verity settings are for */
+ PartitionDesignator designator;
+};
+
+#define VERITY_SETTINGS_DEFAULT { \
+ .designator = _PARTITION_DESIGNATOR_INVALID \
+ }
+
+MountOptions* mount_options_free_all(MountOptions *options);
+DEFINE_TRIVIAL_CLEANUP_FUNC(MountOptions*, mount_options_free_all);
+const char* mount_options_from_designator(const MountOptions *options, PartitionDesignator designator);
+
+int probe_filesystem(const char *node, char **ret_fstype);
+int dissect_image(int fd, const VeritySettings *verity, const MountOptions *mount_options, DissectImageFlags flags, DissectedImage **ret);
+int dissect_image_and_warn(int fd, const char *name, const VeritySettings *verity, const MountOptions *mount_options, DissectImageFlags flags, DissectedImage **ret);
+
+DissectedImage* dissected_image_unref(DissectedImage *m);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref);
+
+int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags, DecryptedImage **ret);
+int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags, DecryptedImage **ret);
+int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, DissectImageFlags flags);
+int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags);
+
+int dissected_image_acquire_metadata(DissectedImage *m);
+
+DecryptedImage* decrypted_image_unref(DecryptedImage *p);
+DEFINE_TRIVIAL_CLEANUP_FUNC(DecryptedImage*, decrypted_image_unref);
+int decrypted_image_relinquish(DecryptedImage *d);
+
+const char* partition_designator_to_string(PartitionDesignator d) _const_;
+PartitionDesignator partition_designator_from_string(const char *name) _pure_;
+
+int verity_settings_load(VeritySettings *verity, const char *image, const char *root_hash_path, const char *root_hash_sig_path);
+void verity_settings_done(VeritySettings *verity);
+
+bool dissected_image_can_do_verity(const DissectedImage *image, PartitionDesignator d);
+bool dissected_image_has_verity(const DissectedImage *image, PartitionDesignator d);
+
+int mount_image_privately_interactively(const char *path, DissectImageFlags flags, char **ret_directory, LoopDevice **ret_loop_device, DecryptedImage **ret_decrypted_image);
diff --git a/src/shared/dm-util.c b/src/shared/dm-util.c
new file mode 100644
index 0000000..b48b9b5
--- /dev/null
+++ b/src/shared/dm-util.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+#include <linux/dm-ioctl.h>
+#include <sys/ioctl.h>
+
+#include "dm-util.h"
+#include "fd-util.h"
+#include "string-util.h"
+
+int dm_deferred_remove_cancel(const char *name) {
+ _cleanup_close_ int fd = -1;
+ struct message {
+ struct dm_ioctl dm_ioctl;
+ struct dm_target_msg dm_target_msg;
+ char msg_text[STRLEN("@cancel_deferred_remove") + 1];
+ } _packed_ message = {
+ .dm_ioctl = {
+ .version = {
+ DM_VERSION_MAJOR,
+ DM_VERSION_MINOR,
+ DM_VERSION_PATCHLEVEL
+ },
+ .data_size = sizeof(struct message),
+ .data_start = sizeof(struct dm_ioctl),
+ },
+ .msg_text = "@cancel_deferred_remove",
+ };
+
+ assert(name);
+
+ if (strlen(name) >= sizeof(message.dm_ioctl.name))
+ return -ENODEV; /* A device with a name longer than this cannot possibly exist */
+
+ strncpy_exact(message.dm_ioctl.name, name, sizeof(message.dm_ioctl.name));
+
+ fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (ioctl(fd, DM_TARGET_MSG, &message))
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/dm-util.h b/src/shared/dm-util.h
new file mode 100644
index 0000000..e6e3d7d
--- /dev/null
+++ b/src/shared/dm-util.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int dm_deferred_remove_cancel(const char *name);
diff --git a/src/shared/dns-domain.c b/src/shared/dns-domain.c
new file mode 100644
index 0000000..ec42b29
--- /dev/null
+++ b/src/shared/dns-domain.c
@@ -0,0 +1,1414 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <endian.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <sys/socket.h>
+
+#include "alloc-util.h"
+#include "dns-domain.h"
+#include "hashmap.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "idn-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+
+int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags) {
+ const char *n;
+ char *d, last_char = 0;
+ int r = 0;
+
+ assert(name);
+ assert(*name);
+
+ n = *name;
+ d = dest;
+
+ for (;;) {
+ if (IN_SET(*n, 0, '.')) {
+ if (FLAGS_SET(flags, DNS_LABEL_LDH) && last_char == '-')
+ /* Trailing dash */
+ return -EINVAL;
+
+ if (n[0] == '.' && (n[1] != 0 || !FLAGS_SET(flags, DNS_LABEL_LEAVE_TRAILING_DOT)))
+ n++;
+
+ break;
+ }
+
+ if (r >= DNS_LABEL_MAX)
+ return -EINVAL;
+
+ if (sz <= 0)
+ return -ENOBUFS;
+
+ if (*n == '\\') {
+ /* Escaped character */
+ if (FLAGS_SET(flags, DNS_LABEL_NO_ESCAPES))
+ return -EINVAL;
+
+ n++;
+
+ if (*n == 0)
+ /* Ending NUL */
+ return -EINVAL;
+
+ else if (IN_SET(*n, '\\', '.')) {
+ /* Escaped backslash or dot */
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH))
+ return -EINVAL;
+
+ last_char = *n;
+ if (d)
+ *(d++) = *n;
+ sz--;
+ r++;
+ n++;
+
+ } else if (n[0] >= '0' && n[0] <= '9') {
+ unsigned k;
+
+ /* Escaped literal ASCII character */
+
+ if (!(n[1] >= '0' && n[1] <= '9') ||
+ !(n[2] >= '0' && n[2] <= '9'))
+ return -EINVAL;
+
+ k = ((unsigned) (n[0] - '0') * 100) +
+ ((unsigned) (n[1] - '0') * 10) +
+ ((unsigned) (n[2] - '0'));
+
+ /* Don't allow anything that doesn't
+ * fit in 8bit. Note that we do allow
+ * control characters, as some servers
+ * (e.g. cloudflare) are happy to
+ * generate labels with them
+ * inside. */
+ if (k > 255)
+ return -EINVAL;
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH) &&
+ !valid_ldh_char((char) k))
+ return -EINVAL;
+
+ last_char = (char) k;
+ if (d)
+ *(d++) = (char) k;
+ sz--;
+ r++;
+
+ n += 3;
+ } else
+ return -EINVAL;
+
+ } else if ((uint8_t) *n >= (uint8_t) ' ' && *n != 127) {
+
+ /* Normal character */
+
+ if (FLAGS_SET(flags, DNS_LABEL_LDH)) {
+ if (!valid_ldh_char(*n))
+ return -EINVAL;
+ if (r == 0 && *n == '-')
+ /* Leading dash */
+ return -EINVAL;
+ }
+
+ last_char = *n;
+ if (d)
+ *(d++) = *n;
+ sz--;
+ r++;
+ n++;
+ } else
+ return -EINVAL;
+ }
+
+ /* Empty label that is not at the end? */
+ if (r == 0 && *n)
+ return -EINVAL;
+
+ /* More than one trailing dot? */
+ if (n[0] == '.' && !FLAGS_SET(flags, DNS_LABEL_LEAVE_TRAILING_DOT))
+ return -EINVAL;
+
+ if (sz >= 1 && d)
+ *d = 0;
+
+ *name = n;
+ return r;
+}
+
+/* @label_terminal: terminal character of a label, updated to point to the terminal character of
+ * the previous label (always skipping one dot) or to NULL if there are no more
+ * labels. */
+int dns_label_unescape_suffix(const char *name, const char **label_terminal, char *dest, size_t sz) {
+ const char *terminal;
+ int r;
+
+ assert(name);
+ assert(label_terminal);
+ assert(dest);
+
+ /* no more labels */
+ if (!*label_terminal) {
+ if (sz >= 1)
+ *dest = 0;
+
+ return 0;
+ }
+
+ terminal = *label_terminal;
+ assert(IN_SET(*terminal, 0, '.'));
+
+ /* Skip current terminal character (and accept domain names ending it ".") */
+ if (*terminal == 0)
+ terminal--;
+ if (terminal >= name && *terminal == '.')
+ terminal--;
+
+ /* Point name to the last label, and terminal to the preceding terminal symbol (or make it a NULL pointer) */
+ for (;;) {
+ if (terminal < name) {
+ /* Reached the first label, so indicate that there are no more */
+ terminal = NULL;
+ break;
+ }
+
+ /* Find the start of the last label */
+ if (*terminal == '.') {
+ const char *y;
+ unsigned slashes = 0;
+
+ for (y = terminal - 1; y >= name && *y == '\\'; y--)
+ slashes++;
+
+ if (slashes % 2 == 0) {
+ /* The '.' was not escaped */
+ name = terminal + 1;
+ break;
+ } else {
+ terminal = y;
+ continue;
+ }
+ }
+
+ terminal--;
+ }
+
+ r = dns_label_unescape(&name, dest, sz, 0);
+ if (r < 0)
+ return r;
+
+ *label_terminal = terminal;
+
+ return r;
+}
+
+int dns_label_escape(const char *p, size_t l, char *dest, size_t sz) {
+ char *q;
+
+ /* DNS labels must be between 1 and 63 characters long. A
+ * zero-length label does not exist. See RFC 2182, Section
+ * 11. */
+
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+ if (sz < 1)
+ return -ENOBUFS;
+
+ assert(p);
+ assert(dest);
+
+ q = dest;
+ while (l > 0) {
+
+ if (IN_SET(*p, '.', '\\')) {
+
+ /* Dot or backslash */
+
+ if (sz < 3)
+ return -ENOBUFS;
+
+ *(q++) = '\\';
+ *(q++) = *p;
+
+ sz -= 2;
+
+ } else if (IN_SET(*p, '_', '-') ||
+ (*p >= '0' && *p <= '9') ||
+ (*p >= 'a' && *p <= 'z') ||
+ (*p >= 'A' && *p <= 'Z')) {
+
+ /* Proper character */
+
+ if (sz < 2)
+ return -ENOBUFS;
+
+ *(q++) = *p;
+ sz -= 1;
+
+ } else {
+
+ /* Everything else */
+
+ if (sz < 5)
+ return -ENOBUFS;
+
+ *(q++) = '\\';
+ *(q++) = '0' + (char) ((uint8_t) *p / 100);
+ *(q++) = '0' + (char) (((uint8_t) *p / 10) % 10);
+ *(q++) = '0' + (char) ((uint8_t) *p % 10);
+
+ sz -= 4;
+ }
+
+ p++;
+ l--;
+ }
+
+ *q = 0;
+ return (int) (q - dest);
+}
+
+int dns_label_escape_new(const char *p, size_t l, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ s = new(char, DNS_LABEL_ESCAPED_MAX);
+ if (!s)
+ return -ENOMEM;
+
+ r = dns_label_escape(p, l, s, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(s);
+
+ return r;
+}
+
+#if HAVE_LIBIDN
+int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) {
+ _cleanup_free_ uint32_t *input = NULL;
+ size_t input_size, l;
+ const char *p;
+ bool contains_8bit = false;
+ char buffer[DNS_LABEL_MAX+1];
+ int r;
+
+ assert(encoded);
+ assert(decoded);
+
+ /* Converts an U-label into an A-label */
+
+ r = dlopen_idn();
+ if (r < 0)
+ return r;
+
+ if (encoded_size <= 0)
+ return -EINVAL;
+
+ for (p = encoded; p < encoded + encoded_size; p++)
+ if ((uint8_t) *p > 127)
+ contains_8bit = true;
+
+ if (!contains_8bit) {
+ if (encoded_size > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ return 0;
+ }
+
+ input = sym_stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size);
+ if (!input)
+ return -ENOMEM;
+
+ if (sym_idna_to_ascii_4i(input, input_size, buffer, 0) != 0)
+ return -EINVAL;
+
+ l = strlen(buffer);
+
+ /* Verify that the result is not longer than one DNS label. */
+ if (l <= 0 || l > DNS_LABEL_MAX)
+ return -EINVAL;
+ if (l > decoded_max)
+ return -ENOBUFS;
+
+ memcpy(decoded, buffer, l);
+
+ /* If there's room, append a trailing NUL byte, but only then */
+ if (decoded_max > l)
+ decoded[l] = 0;
+
+ return (int) l;
+}
+
+int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) {
+ size_t input_size, output_size;
+ _cleanup_free_ uint32_t *input = NULL;
+ _cleanup_free_ char *result = NULL;
+ uint32_t *output = NULL;
+ size_t w;
+ int r;
+
+ /* To be invoked after unescaping. Converts an A-label into an U-label. */
+
+ assert(encoded);
+ assert(decoded);
+
+ r = dlopen_idn();
+ if (r < 0)
+ return r;
+
+ if (encoded_size <= 0 || encoded_size > DNS_LABEL_MAX)
+ return -EINVAL;
+
+ if (!memory_startswith(encoded, encoded_size, IDNA_ACE_PREFIX))
+ return 0;
+
+ input = sym_stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size);
+ if (!input)
+ return -ENOMEM;
+
+ output_size = input_size;
+ output = newa(uint32_t, output_size);
+
+ sym_idna_to_unicode_44i(input, input_size, output, &output_size, 0);
+
+ result = sym_stringprep_ucs4_to_utf8(output, output_size, NULL, &w);
+ if (!result)
+ return -ENOMEM;
+ if (w <= 0)
+ return -EINVAL;
+ if (w > decoded_max)
+ return -ENOBUFS;
+
+ memcpy(decoded, result, w);
+
+ /* Append trailing NUL byte if there's space, but only then. */
+ if (decoded_max > w)
+ decoded[w] = 0;
+
+ return w;
+}
+#endif
+
+int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **_ret) {
+ _cleanup_free_ char *ret = NULL;
+ size_t n = 0, allocated = 0;
+ const char *p;
+ bool first = true;
+ int r;
+
+ if (a)
+ p = a;
+ else if (b)
+ p = TAKE_PTR(b);
+ else
+ goto finish;
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&p, label, sizeof label, flags);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ if (*p != 0)
+ return -EINVAL;
+
+ if (b) {
+ /* Now continue with the second string, if there is one */
+ p = TAKE_PTR(b);
+ continue;
+ }
+
+ break;
+ }
+
+ if (_ret) {
+ if (!GREEDY_REALLOC(ret, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ r = dns_label_escape(label, r, ret + n + !first, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ if (!first)
+ ret[n] = '.';
+ } else {
+ char escaped[DNS_LABEL_ESCAPED_MAX];
+
+ r = dns_label_escape(label, r, escaped, sizeof(escaped));
+ if (r < 0)
+ return r;
+ }
+
+ if (!first)
+ n++;
+ else
+ first = false;
+
+ n += r;
+ }
+
+finish:
+ if (n > DNS_HOSTNAME_MAX)
+ return -EINVAL;
+
+ if (_ret) {
+ if (n == 0) {
+ /* Nothing appended? If so, generate at least a single dot, to indicate the DNS root domain */
+ if (!GREEDY_REALLOC(ret, allocated, 2))
+ return -ENOMEM;
+
+ ret[n++] = '.';
+ } else {
+ if (!GREEDY_REALLOC(ret, allocated, n + 1))
+ return -ENOMEM;
+ }
+
+ ret[n] = 0;
+ *_ret = TAKE_PTR(ret);
+ }
+
+ return 0;
+}
+
+void dns_name_hash_func(const char *p, struct siphash *state) {
+ int r;
+
+ assert(p);
+
+ for (;;) {
+ char label[DNS_LABEL_MAX+1];
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ break;
+ if (r == 0)
+ break;
+
+ ascii_strlower_n(label, r);
+ siphash24_compress(label, r, state);
+ siphash24_compress_byte(0, state); /* make sure foobar and foo.bar result in different hashes */
+ }
+
+ /* enforce that all names are terminated by the empty label */
+ string_hash_func("", state);
+}
+
+int dns_name_compare_func(const char *a, const char *b) {
+ const char *x, *y;
+ int r, q;
+
+ assert(a);
+ assert(b);
+
+ x = a + strlen(a);
+ y = b + strlen(b);
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+
+ if (x == NULL && y == NULL)
+ return 0;
+
+ r = dns_label_unescape_suffix(a, &x, la, sizeof(la));
+ q = dns_label_unescape_suffix(b, &y, lb, sizeof(lb));
+ if (r < 0 || q < 0)
+ return CMP(r, q);
+
+ r = ascii_strcasecmp_nn(la, r, lb, q);
+ if (r != 0)
+ return r;
+ }
+}
+
+DEFINE_HASH_OPS(dns_name_hash_ops, char, dns_name_hash_func, dns_name_compare_func);
+
+int dns_name_equal(const char *x, const char *y) {
+ int r, q;
+
+ assert(x);
+ assert(y);
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&x, la, sizeof la, 0);
+ if (r < 0)
+ return r;
+
+ q = dns_label_unescape(&y, lb, sizeof lb, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q)
+ return false;
+ if (r == 0)
+ return true;
+
+ if (ascii_strcasecmp_n(la, lb, r) != 0)
+ return false;
+ }
+}
+
+int dns_name_endswith(const char *name, const char *suffix) {
+ const char *n, *s, *saved_n = NULL;
+ int r, q;
+
+ assert(name);
+ assert(suffix);
+
+ n = name;
+ s = suffix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (r < 0)
+ return r;
+
+ if (!saved_n)
+ saved_n = n;
+
+ q = dns_label_unescape(&s, ls, sizeof ls, 0);
+ if (q < 0)
+ return q;
+
+ if (r == 0 && q == 0)
+ return true;
+ if (r == 0 && saved_n == n)
+ return false;
+
+ if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) {
+
+ /* Not the same, let's jump back, and try with the next label again */
+ s = suffix;
+ n = TAKE_PTR(saved_n);
+ }
+ }
+}
+
+int dns_name_startswith(const char *name, const char *prefix) {
+ const char *n, *p;
+ int r, q;
+
+ assert(name);
+ assert(prefix);
+
+ n = name;
+ p = prefix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], lp[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&p, lp, sizeof lp, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return true;
+
+ q = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q)
+ return false;
+ if (ascii_strcasecmp_n(ln, lp, r) != 0)
+ return false;
+ }
+}
+
+int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret) {
+ const char *n, *s, *saved_before = NULL, *saved_after = NULL, *prefix;
+ int r, q;
+
+ assert(name);
+ assert(old_suffix);
+ assert(new_suffix);
+ assert(ret);
+
+ n = name;
+ s = old_suffix;
+
+ for (;;) {
+ char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX];
+
+ if (!saved_before)
+ saved_before = n;
+
+ r = dns_label_unescape(&n, ln, sizeof ln, 0);
+ if (r < 0)
+ return r;
+
+ if (!saved_after)
+ saved_after = n;
+
+ q = dns_label_unescape(&s, ls, sizeof ls, 0);
+ if (q < 0)
+ return q;
+
+ if (r == 0 && q == 0)
+ break;
+ if (r == 0 && saved_after == n) {
+ *ret = NULL; /* doesn't match */
+ return 0;
+ }
+
+ if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) {
+
+ /* Not the same, let's jump back, and try with the next label again */
+ s = old_suffix;
+ n = TAKE_PTR(saved_after);
+ saved_before = NULL;
+ }
+ }
+
+ /* Found it! Now generate the new name */
+ prefix = strndupa(name, saved_before - name);
+
+ r = dns_name_concat(prefix, new_suffix, 0, ret);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int dns_name_between(const char *a, const char *b, const char *c) {
+ /* Determine if b is strictly greater than a and strictly smaller than c.
+ We consider the order of names to be circular, so that if a is
+ strictly greater than c, we consider b to be between them if it is
+ either greater than a or smaller than c. This is how the canonical
+ DNS name order used in NSEC records work. */
+
+ if (dns_name_compare_func(a, c) < 0)
+ /*
+ a and c are properly ordered:
+ a<---b--->c
+ */
+ return dns_name_compare_func(a, b) < 0 &&
+ dns_name_compare_func(b, c) < 0;
+ else
+ /*
+ a and c are equal or 'reversed':
+ <--b--c a----->
+ or:
+ <-----c a--b-->
+ */
+ return dns_name_compare_func(b, c) < 0 ||
+ dns_name_compare_func(a, b) < 0;
+}
+
+int dns_name_reverse(int family, const union in_addr_union *a, char **ret) {
+ const uint8_t *p;
+ int r;
+
+ assert(a);
+ assert(ret);
+
+ p = (const uint8_t*) a;
+
+ if (family == AF_INET)
+ r = asprintf(ret, "%u.%u.%u.%u.in-addr.arpa", p[3], p[2], p[1], p[0]);
+ else if (family == AF_INET6)
+ r = asprintf(ret, "%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.ip6.arpa",
+ hexchar(p[15] & 0xF), hexchar(p[15] >> 4), hexchar(p[14] & 0xF), hexchar(p[14] >> 4),
+ hexchar(p[13] & 0xF), hexchar(p[13] >> 4), hexchar(p[12] & 0xF), hexchar(p[12] >> 4),
+ hexchar(p[11] & 0xF), hexchar(p[11] >> 4), hexchar(p[10] & 0xF), hexchar(p[10] >> 4),
+ hexchar(p[ 9] & 0xF), hexchar(p[ 9] >> 4), hexchar(p[ 8] & 0xF), hexchar(p[ 8] >> 4),
+ hexchar(p[ 7] & 0xF), hexchar(p[ 7] >> 4), hexchar(p[ 6] & 0xF), hexchar(p[ 6] >> 4),
+ hexchar(p[ 5] & 0xF), hexchar(p[ 5] >> 4), hexchar(p[ 4] & 0xF), hexchar(p[ 4] >> 4),
+ hexchar(p[ 3] & 0xF), hexchar(p[ 3] >> 4), hexchar(p[ 2] & 0xF), hexchar(p[ 2] >> 4),
+ hexchar(p[ 1] & 0xF), hexchar(p[ 1] >> 4), hexchar(p[ 0] & 0xF), hexchar(p[ 0] >> 4));
+ else
+ return -EAFNOSUPPORT;
+ if (r < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int dns_name_address(const char *p, int *family, union in_addr_union *address) {
+ int r;
+
+ assert(p);
+ assert(family);
+ assert(address);
+
+ r = dns_name_endswith(p, "in-addr.arpa");
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ uint8_t a[4];
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(a); i++) {
+ char label[DNS_LABEL_MAX+1];
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+ if (r > 3)
+ return -EINVAL;
+
+ r = safe_atou8(label, &a[i]);
+ if (r < 0)
+ return r;
+ }
+
+ r = dns_name_equal(p, "in-addr.arpa");
+ if (r <= 0)
+ return r;
+
+ *family = AF_INET;
+ address->in.s_addr = htobe32(((uint32_t) a[3] << 24) |
+ ((uint32_t) a[2] << 16) |
+ ((uint32_t) a[1] << 8) |
+ (uint32_t) a[0]);
+
+ return 1;
+ }
+
+ r = dns_name_endswith(p, "ip6.arpa");
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ struct in6_addr a;
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(a.s6_addr); i++) {
+ char label[DNS_LABEL_MAX+1];
+ int x, y;
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r <= 0)
+ return r;
+ if (r != 1)
+ return -EINVAL;
+ x = unhexchar(label[0]);
+ if (x < 0)
+ return -EINVAL;
+
+ r = dns_label_unescape(&p, label, sizeof label, 0);
+ if (r <= 0)
+ return r;
+ if (r != 1)
+ return -EINVAL;
+ y = unhexchar(label[0]);
+ if (y < 0)
+ return -EINVAL;
+
+ a.s6_addr[ELEMENTSOF(a.s6_addr) - i - 1] = (uint8_t) y << 4 | (uint8_t) x;
+ }
+
+ r = dns_name_equal(p, "ip6.arpa");
+ if (r <= 0)
+ return r;
+
+ *family = AF_INET6;
+ address->in6 = a;
+ return 1;
+ }
+
+ return 0;
+}
+
+bool dns_name_is_root(const char *name) {
+
+ assert(name);
+
+ /* There are exactly two ways to encode the root domain name:
+ * as empty string, or with a single dot. */
+
+ return STR_IN_SET(name, "", ".");
+}
+
+bool dns_name_is_single_label(const char *name) {
+ int r;
+
+ assert(name);
+
+ r = dns_name_parent(&name);
+ if (r <= 0)
+ return false;
+
+ return dns_name_is_root(name);
+}
+
+/* Encode a domain name according to RFC 1035 Section 3.1, without compression */
+int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical) {
+ uint8_t *label_length, *out;
+ int r;
+
+ assert(domain);
+ assert(buffer);
+
+ out = buffer;
+
+ do {
+ /* Reserve a byte for label length */
+ if (len <= 0)
+ return -ENOBUFS;
+ len--;
+ label_length = out;
+ out++;
+
+ /* Convert and copy a single label. Note that
+ * dns_label_unescape() returns 0 when it hits the end
+ * of the domain name, which we rely on here to encode
+ * the trailing NUL byte. */
+ r = dns_label_unescape(&domain, (char *) out, len, 0);
+ if (r < 0)
+ return r;
+
+ /* Optionally, output the name in DNSSEC canonical
+ * format, as described in RFC 4034, section 6.2. Or
+ * in other words: in lower-case. */
+ if (canonical)
+ ascii_strlower_n((char*) out, (size_t) r);
+
+ /* Fill label length, move forward */
+ *label_length = r;
+ out += r;
+ len -= r;
+
+ } while (r != 0);
+
+ /* Verify the maximum size of the encoded name. The trailing
+ * dot + NUL byte account are included this time, hence
+ * compare against DNS_HOSTNAME_MAX + 2 (which is 255) this
+ * time. */
+ if (out - buffer > DNS_HOSTNAME_MAX + 2)
+ return -EINVAL;
+
+ return out - buffer;
+}
+
+static bool srv_type_label_is_valid(const char *label, size_t n) {
+ size_t k;
+
+ assert(label);
+
+ if (n < 2) /* Label needs to be at least 2 chars long */
+ return false;
+
+ if (label[0] != '_') /* First label char needs to be underscore */
+ return false;
+
+ /* Second char must be a letter */
+ if (!(label[1] >= 'A' && label[1] <= 'Z') &&
+ !(label[1] >= 'a' && label[1] <= 'z'))
+ return false;
+
+ /* Third and further chars must be alphanumeric or a hyphen */
+ for (k = 2; k < n; k++) {
+ if (!(label[k] >= 'A' && label[k] <= 'Z') &&
+ !(label[k] >= 'a' && label[k] <= 'z') &&
+ !(label[k] >= '0' && label[k] <= '9') &&
+ label[k] != '-')
+ return false;
+ }
+
+ return true;
+}
+
+bool dns_srv_type_is_valid(const char *name) {
+ unsigned c = 0;
+ int r;
+
+ if (!name)
+ return false;
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ /* This more or less implements RFC 6335, Section 5.1 */
+
+ r = dns_label_unescape(&name, label, sizeof label, 0);
+ if (r < 0)
+ return false;
+ if (r == 0)
+ break;
+
+ if (c >= 2)
+ return false;
+
+ if (!srv_type_label_is_valid(label, r))
+ return false;
+
+ c++;
+ }
+
+ return c == 2; /* exactly two labels */
+}
+
+bool dnssd_srv_type_is_valid(const char *name) {
+ return dns_srv_type_is_valid(name) &&
+ ((dns_name_endswith(name, "_tcp") > 0) ||
+ (dns_name_endswith(name, "_udp") > 0)); /* Specific to DNS-SD. RFC 6763, Section 7 */
+}
+
+bool dns_service_name_is_valid(const char *name) {
+ size_t l;
+
+ /* This more or less implements RFC 6763, Section 4.1.1 */
+
+ if (!name)
+ return false;
+
+ if (!utf8_is_valid(name))
+ return false;
+
+ if (string_has_cc(name, NULL))
+ return false;
+
+ l = strlen(name);
+ if (l <= 0)
+ return false;
+ if (l > 63)
+ return false;
+
+ return true;
+}
+
+int dns_service_join(const char *name, const char *type, const char *domain, char **ret) {
+ char escaped[DNS_LABEL_ESCAPED_MAX];
+ _cleanup_free_ char *n = NULL;
+ int r;
+
+ assert(type);
+ assert(domain);
+ assert(ret);
+
+ if (!dns_srv_type_is_valid(type))
+ return -EINVAL;
+
+ if (!name)
+ return dns_name_concat(type, domain, 0, ret);
+
+ if (!dns_service_name_is_valid(name))
+ return -EINVAL;
+
+ r = dns_label_escape(name, strlen(name), escaped, sizeof(escaped));
+ if (r < 0)
+ return r;
+
+ r = dns_name_concat(type, domain, 0, &n);
+ if (r < 0)
+ return r;
+
+ return dns_name_concat(escaped, n, 0, ret);
+}
+
+static bool dns_service_name_label_is_valid(const char *label, size_t n) {
+ char *s;
+
+ assert(label);
+
+ if (memchr(label, 0, n))
+ return false;
+
+ s = strndupa(label, n);
+ return dns_service_name_is_valid(s);
+}
+
+int dns_service_split(const char *joined, char **_name, char **_type, char **_domain) {
+ _cleanup_free_ char *name = NULL, *type = NULL, *domain = NULL;
+ const char *p = joined, *q = NULL, *d = NULL;
+ char a[DNS_LABEL_MAX], b[DNS_LABEL_MAX], c[DNS_LABEL_MAX];
+ int an, bn, cn, r;
+ unsigned x = 0;
+
+ assert(joined);
+
+ /* Get first label from the full name */
+ an = dns_label_unescape(&p, a, sizeof(a), 0);
+ if (an < 0)
+ return an;
+
+ if (an > 0) {
+ x++;
+
+ /* If there was a first label, try to get the second one */
+ bn = dns_label_unescape(&p, b, sizeof(b), 0);
+ if (bn < 0)
+ return bn;
+
+ if (bn > 0) {
+ x++;
+
+ /* If there was a second label, try to get the third one */
+ q = p;
+ cn = dns_label_unescape(&p, c, sizeof(c), 0);
+ if (cn < 0)
+ return cn;
+
+ if (cn > 0)
+ x++;
+ } else
+ cn = 0;
+ } else
+ an = 0;
+
+ if (x >= 2 && srv_type_label_is_valid(b, bn)) {
+
+ if (x >= 3 && srv_type_label_is_valid(c, cn)) {
+
+ if (dns_service_name_label_is_valid(a, an)) {
+ /* OK, got <name> . <type> . <type2> . <domain> */
+
+ name = strndup(a, an);
+ if (!name)
+ return -ENOMEM;
+
+ type = strjoin(b, ".", c);
+ if (!type)
+ return -ENOMEM;
+
+ d = p;
+ goto finish;
+ }
+
+ } else if (srv_type_label_is_valid(a, an)) {
+
+ /* OK, got <type> . <type2> . <domain> */
+
+ name = NULL;
+
+ type = strjoin(a, ".", b);
+ if (!type)
+ return -ENOMEM;
+
+ d = q;
+ goto finish;
+ }
+ }
+
+ name = NULL;
+ type = NULL;
+ d = joined;
+
+finish:
+ r = dns_name_normalize(d, 0, &domain);
+ if (r < 0)
+ return r;
+
+ if (_domain)
+ *_domain = TAKE_PTR(domain);
+
+ if (_type)
+ *_type = TAKE_PTR(type);
+
+ if (_name)
+ *_name = TAKE_PTR(name);
+
+ return 0;
+}
+
+static int dns_name_build_suffix_table(const char *name, const char *table[]) {
+ const char *p;
+ unsigned n = 0;
+ int r;
+
+ assert(name);
+ assert(table);
+
+ p = name;
+ for (;;) {
+ if (n > DNS_N_LABELS_MAX)
+ return -EINVAL;
+
+ table[n] = p;
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ n++;
+ }
+
+ return (int) n;
+}
+
+int dns_name_suffix(const char *name, unsigned n_labels, const char **ret) {
+ const char* labels[DNS_N_LABELS_MAX+1];
+ int n;
+
+ assert(name);
+ assert(ret);
+
+ n = dns_name_build_suffix_table(name, labels);
+ if (n < 0)
+ return n;
+
+ if ((unsigned) n < n_labels)
+ return -EINVAL;
+
+ *ret = labels[n - n_labels];
+ return (int) (n - n_labels);
+}
+
+int dns_name_skip(const char *a, unsigned n_labels, const char **ret) {
+ int r;
+
+ assert(a);
+ assert(ret);
+
+ for (; n_labels > 0; n_labels--) {
+ r = dns_name_parent(&a);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *ret = "";
+ return 0;
+ }
+ }
+
+ *ret = a;
+ return 1;
+}
+
+int dns_name_count_labels(const char *name) {
+ unsigned n = 0;
+ const char *p;
+ int r;
+
+ assert(name);
+
+ p = name;
+ for (;;) {
+ r = dns_name_parent(&p);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ if (n >= DNS_N_LABELS_MAX)
+ return -EINVAL;
+
+ n++;
+ }
+
+ return (int) n;
+}
+
+int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b) {
+ int r;
+
+ assert(a);
+ assert(b);
+
+ r = dns_name_skip(a, n_labels, &a);
+ if (r <= 0)
+ return r;
+
+ return dns_name_equal(a, b);
+}
+
+int dns_name_common_suffix(const char *a, const char *b, const char **ret) {
+ const char *a_labels[DNS_N_LABELS_MAX+1], *b_labels[DNS_N_LABELS_MAX+1];
+ int n = 0, m = 0, k = 0, r, q;
+
+ assert(a);
+ assert(b);
+ assert(ret);
+
+ /* Determines the common suffix of domain names a and b */
+
+ n = dns_name_build_suffix_table(a, a_labels);
+ if (n < 0)
+ return n;
+
+ m = dns_name_build_suffix_table(b, b_labels);
+ if (m < 0)
+ return m;
+
+ for (;;) {
+ char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX];
+ const char *x, *y;
+
+ if (k >= n || k >= m) {
+ *ret = a_labels[n - k];
+ return 0;
+ }
+
+ x = a_labels[n - 1 - k];
+ r = dns_label_unescape(&x, la, sizeof la, 0);
+ if (r < 0)
+ return r;
+
+ y = b_labels[m - 1 - k];
+ q = dns_label_unescape(&y, lb, sizeof lb, 0);
+ if (q < 0)
+ return q;
+
+ if (r != q || ascii_strcasecmp_n(la, lb, r) != 0) {
+ *ret = a_labels[n - k];
+ return 0;
+ }
+
+ k++;
+ }
+}
+
+int dns_name_apply_idna(const char *name, char **ret) {
+
+ /* Return negative on error, 0 if not implemented, positive on success. */
+
+#if HAVE_LIBIDN2 || HAVE_LIBIDN2
+ int r;
+
+ r = dlopen_idn();
+ if (r == -EOPNOTSUPP) {
+ *ret = NULL;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+#endif
+
+#if HAVE_LIBIDN2
+ _cleanup_free_ char *t = NULL;
+
+ assert(name);
+ assert(ret);
+
+ /* First, try non-transitional mode (i.e. IDN2008 rules) */
+ r = sym_idn2_lookup_u8((uint8_t*) name, (uint8_t**) &t,
+ IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL);
+ if (r == IDN2_DISALLOWED) /* If that failed, because of disallowed characters, try transitional mode.
+ * (i.e. IDN2003 rules which supports some unicode chars IDN2008 doesn't allow). */
+ r = sym_idn2_lookup_u8((uint8_t*) name, (uint8_t**) &t,
+ IDN2_NFC_INPUT | IDN2_TRANSITIONAL);
+
+ log_debug("idn2_lookup_u8: %s → %s", name, t);
+ if (r == IDN2_OK) {
+ if (!startswith(name, "xn--")) {
+ _cleanup_free_ char *s = NULL;
+
+ r = sym_idn2_to_unicode_8z8z(t, &s, 0);
+ if (r != IDN2_OK) {
+ log_debug("idn2_to_unicode_8z8z(\"%s\") failed: %d/%s",
+ t, r, sym_idn2_strerror(r));
+ return 0;
+ }
+
+ if (!streq_ptr(name, s)) {
+ log_debug("idn2 roundtrip failed: \"%s\" → \"%s\" → \"%s\", ignoring.",
+ name, t, s);
+ return 0;
+ }
+ }
+
+ *ret = TAKE_PTR(t);
+
+ return 1; /* *ret has been written */
+ }
+
+ log_debug("idn2_lookup_u8(\"%s\") failed: %d/%s", name, r, sym_idn2_strerror(r));
+ if (r == IDN2_2HYPHEN)
+ /* The name has two hyphens — forbidden by IDNA2008 in some cases */
+ return 0;
+ if (IN_SET(r, IDN2_TOO_BIG_DOMAIN, IDN2_TOO_BIG_LABEL))
+ return -ENOSPC;
+ return -EINVAL;
+#elif HAVE_LIBIDN
+ _cleanup_free_ char *buf = NULL;
+ size_t n = 0, allocated = 0;
+ bool first = true;
+ int r, q;
+
+ assert(name);
+ assert(ret);
+
+ for (;;) {
+ char label[DNS_LABEL_MAX];
+
+ r = dns_label_unescape(&name, label, sizeof label, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ q = dns_label_apply_idna(label, r, label, sizeof label);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ r = q;
+
+ if (!GREEDY_REALLOC(buf, allocated, n + !first + DNS_LABEL_ESCAPED_MAX))
+ return -ENOMEM;
+
+ r = dns_label_escape(label, r, buf + n + !first, DNS_LABEL_ESCAPED_MAX);
+ if (r < 0)
+ return r;
+
+ if (first)
+ first = false;
+ else
+ buf[n++] = '.';
+
+ n += r;
+ }
+
+ if (n > DNS_HOSTNAME_MAX)
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(buf, allocated, n + 1))
+ return -ENOMEM;
+
+ buf[n] = 0;
+ *ret = TAKE_PTR(buf);
+
+ return 1;
+#else
+ *ret = NULL;
+ return 0;
+#endif
+}
+
+int dns_name_is_valid_or_address(const char *name) {
+ /* Returns > 0 if the specified name is either a valid IP address formatted as string or a valid DNS name */
+
+ if (isempty(name))
+ return 0;
+
+ if (in_addr_from_string_auto(name, NULL, NULL) >= 0)
+ return 1;
+
+ return dns_name_is_valid(name);
+}
+
+int dns_name_dot_suffixed(const char *name) {
+ const char *p = name;
+ int r;
+
+ for (;;) {
+ if (streq(p, "."))
+ return true;
+
+ r = dns_label_unescape(&p, NULL, DNS_LABEL_MAX, DNS_LABEL_LEAVE_TRAILING_DOT);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return false;
+ }
+}
diff --git a/src/shared/dns-domain.h b/src/shared/dns-domain.h
new file mode 100644
index 0000000..77f5962
--- /dev/null
+++ b/src/shared/dns-domain.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "hashmap.h"
+#include "in-addr-util.h"
+
+/* Length of a single label, with all escaping removed, excluding any trailing dot or NUL byte */
+#define DNS_LABEL_MAX 63
+
+/* Worst case length of a single label, with all escaping applied and room for a trailing NUL byte. */
+#define DNS_LABEL_ESCAPED_MAX (DNS_LABEL_MAX*4+1)
+
+/* Maximum length of a full hostname, consisting of a series of unescaped labels, and no trailing dot or NUL byte */
+#define DNS_HOSTNAME_MAX 253
+
+/* Maximum length of a full hostname, on the wire, including the final NUL byte */
+#define DNS_WIRE_FORMAT_HOSTNAME_MAX 255
+
+/* Maximum number of labels per valid hostname */
+#define DNS_N_LABELS_MAX 127
+
+typedef enum DNSLabelFlags {
+ DNS_LABEL_LDH = 1 << 0, /* Follow the "LDH" rule — only letters, digits, and internal hyphens. */
+ DNS_LABEL_NO_ESCAPES = 1 << 1, /* Do not treat backslashes specially */
+ DNS_LABEL_LEAVE_TRAILING_DOT = 1 << 2, /* Leave trailing dot in place */
+} DNSLabelFlags;
+
+int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags);
+int dns_label_unescape_suffix(const char *name, const char **label_end, char *dest, size_t sz);
+int dns_label_escape(const char *p, size_t l, char *dest, size_t sz);
+int dns_label_escape_new(const char *p, size_t l, char **ret);
+
+static inline int dns_name_parent(const char **name) {
+ return dns_label_unescape(name, NULL, DNS_LABEL_MAX, 0);
+}
+
+#if HAVE_LIBIDN
+int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max);
+int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max);
+#endif
+
+int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **ret);
+
+static inline int dns_name_normalize(const char *s, DNSLabelFlags flags, char **ret) {
+ /* dns_name_concat() normalizes as a side-effect */
+ return dns_name_concat(s, NULL, flags, ret);
+}
+
+static inline int dns_name_is_valid(const char *s) {
+ int r;
+
+ /* dns_name_normalize() verifies as a side effect */
+ r = dns_name_normalize(s, 0, NULL);
+ if (r == -EINVAL)
+ return 0;
+ if (r < 0)
+ return r;
+ return 1;
+}
+
+static inline int dns_name_is_valid_ldh(const char *s) {
+ int r;
+
+ r = dns_name_concat(s, NULL, DNS_LABEL_LDH|DNS_LABEL_NO_ESCAPES, NULL);
+ if (r == -EINVAL)
+ return 0;
+ if (r < 0)
+ return r;
+ return 1;
+}
+
+void dns_name_hash_func(const char *s, struct siphash *state);
+int dns_name_compare_func(const char *a, const char *b);
+extern const struct hash_ops dns_name_hash_ops;
+
+int dns_name_between(const char *a, const char *b, const char *c);
+int dns_name_equal(const char *x, const char *y);
+int dns_name_endswith(const char *name, const char *suffix);
+int dns_name_startswith(const char *name, const char *prefix);
+
+int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret);
+
+int dns_name_reverse(int family, const union in_addr_union *a, char **ret);
+int dns_name_address(const char *p, int *family, union in_addr_union *a);
+
+bool dns_name_is_root(const char *name);
+bool dns_name_is_single_label(const char *name);
+
+int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical);
+
+bool dns_srv_type_is_valid(const char *name);
+bool dnssd_srv_type_is_valid(const char *name);
+bool dns_service_name_is_valid(const char *name);
+
+int dns_service_join(const char *name, const char *type, const char *domain, char **ret);
+int dns_service_split(const char *joined, char **name, char **type, char **domain);
+
+int dns_name_suffix(const char *name, unsigned n_labels, const char **ret);
+int dns_name_count_labels(const char *name);
+
+int dns_name_skip(const char *a, unsigned n_labels, const char **ret);
+int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b);
+
+int dns_name_common_suffix(const char *a, const char *b, const char **ret);
+
+int dns_name_apply_idna(const char *name, char **ret);
+
+int dns_name_is_valid_or_address(const char *name);
+
+int dns_name_dot_suffixed(const char *name);
diff --git a/src/shared/dropin.c b/src/shared/dropin.c
new file mode 100644
index 0000000..89f4b8a
--- /dev/null
+++ b/src/shared/dropin.c
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "dirent-util.h"
+#include "dropin.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio-label.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-name.h"
+
+int drop_in_file(const char *dir, const char *unit, unsigned level,
+ const char *name, char **ret_p, char **ret_q) {
+
+ char prefix[DECIMAL_STR_MAX(unsigned)];
+ _cleanup_free_ char *b = NULL, *p = NULL, *q = NULL;
+
+ assert(unit);
+ assert(name);
+ assert(ret_p);
+ assert(ret_q);
+
+ sprintf(prefix, "%u", level);
+
+ b = xescape(name, "/.");
+ if (!b)
+ return -ENOMEM;
+
+ if (!filename_is_valid(b))
+ return -EINVAL;
+
+ p = strjoin(dir, "/", unit, ".d");
+ q = strjoin(p, "/", prefix, "-", b, ".conf");
+ if (!p || !q)
+ return -ENOMEM;
+
+ *ret_p = TAKE_PTR(p);
+ *ret_q = TAKE_PTR(q);
+ return 0;
+}
+
+int write_drop_in(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *data) {
+
+ _cleanup_free_ char *p = NULL, *q = NULL;
+ int r;
+
+ assert(dir);
+ assert(unit);
+ assert(name);
+ assert(data);
+
+ r = drop_in_file(dir, unit, level, name, &p, &q);
+ if (r < 0)
+ return r;
+
+ (void) mkdir_p(p, 0755);
+ return write_string_file_atomic_label(q, data);
+}
+
+int write_drop_in_format(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *format, ...) {
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ assert(dir);
+ assert(unit);
+ assert(name);
+ assert(format);
+
+ va_start(ap, format);
+ r = vasprintf(&p, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return write_drop_in(dir, unit, level, name, p);
+}
+
+static int unit_file_add_dir(
+ const char *original_root,
+ const char *path,
+ char ***dirs) {
+
+ _cleanup_free_ char *chased = NULL;
+ int r;
+
+ assert(path);
+
+ /* This adds [original_root]/path to dirs, if it exists. */
+
+ r = chase_symlinks(path, original_root, 0, &chased, NULL);
+ if (r == -ENOENT) /* Ignore -ENOENT, after all most units won't have a drop-in dir. */
+ return 0;
+ if (r == -ENAMETOOLONG) {
+ /* Also, ignore -ENAMETOOLONG but log about it. After all, users are not even able to create the
+ * drop-in dir in such case. This mostly happens for device units with an overly long /sys path. */
+ log_debug_errno(r, "Path '%s' too long, couldn't canonicalize, ignoring.", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_warning_errno(r, "Failed to canonicalize path '%s': %m", path);
+
+ if (strv_consume(dirs, TAKE_PTR(chased)) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int unit_file_find_dirs(
+ const char *original_root,
+ Set *unit_path_cache,
+ const char *unit_path,
+ const char *name,
+ const char *suffix,
+ char ***dirs) {
+
+ _cleanup_free_ char *prefix = NULL, *instance = NULL, *built = NULL;
+ bool is_instance, chopped;
+ const char *dash;
+ UnitType type;
+ char *path;
+ size_t n;
+ int r;
+
+ assert(unit_path);
+ assert(name);
+ assert(suffix);
+
+ path = strjoina(unit_path, "/", name, suffix);
+ if (!unit_path_cache || set_get(unit_path_cache, path)) {
+ r = unit_file_add_dir(original_root, path, dirs);
+ if (r < 0)
+ return r;
+ }
+
+ is_instance = unit_name_is_valid(name, UNIT_NAME_INSTANCE);
+ if (is_instance) { /* Also try the template dir */
+ _cleanup_free_ char *template = NULL;
+
+ r = unit_name_template(name, &template);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate template from unit name: %m");
+
+ r = unit_file_find_dirs(original_root, unit_path_cache, unit_path, template, suffix, dirs);
+ if (r < 0)
+ return r;
+ }
+
+ /* Return early for top level drop-ins. */
+ if (unit_type_from_string(name) >= 0)
+ return 0;
+
+ /* Let's see if there's a "-" prefix for this unit name. If so, let's invoke ourselves for it. This will then
+ * recursively do the same for all our prefixes. i.e. this means given "foo-bar-waldo.service" we'll also
+ * search "foo-bar-.service" and "foo-.service".
+ *
+ * Note the order in which we do it: we traverse up adding drop-ins on each step. This means the more specific
+ * drop-ins may override the more generic drop-ins, which is the intended behaviour. */
+
+ r = unit_name_to_prefix(name, &prefix);
+ if (r < 0)
+ return log_error_errno(r, "Failed to derive unit name prefix from unit name: %m");
+
+ chopped = false;
+ for (;;) {
+ dash = strrchr(prefix, '-');
+ if (!dash) /* No dash? if so we are done */
+ return 0;
+
+ n = (size_t) (dash - prefix);
+ if (n == 0) /* Leading dash? If so, we are done */
+ return 0;
+
+ if (prefix[n+1] != 0 || chopped) {
+ prefix[n+1] = 0;
+ break;
+ }
+
+ /* Trailing dash? If so, chop it off and try again, but not more than once. */
+ prefix[n] = 0;
+ chopped = true;
+ }
+
+ if (!unit_prefix_is_valid(prefix))
+ return 0;
+
+ type = unit_name_to_type(name);
+ if (type < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to derive unit type from unit name: %s",
+ name);
+
+ if (is_instance) {
+ r = unit_name_to_instance(name, &instance);
+ if (r < 0)
+ return log_error_errno(r, "Failed to derive unit name instance from unit name: %m");
+ }
+
+ r = unit_name_build_from_type(prefix, instance, type, &built);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build prefix unit name: %m");
+
+ return unit_file_find_dirs(original_root, unit_path_cache, unit_path, built, suffix, dirs);
+}
+
+int unit_file_find_dropin_paths(
+ const char *original_root,
+ char **lookup_path,
+ Set *unit_path_cache,
+ const char *dir_suffix,
+ const char *file_suffix,
+ const char *name,
+ const Set *aliases,
+ char ***ret) {
+
+ _cleanup_strv_free_ char **dirs = NULL;
+ const char *n;
+ char **p;
+ int r;
+
+ assert(ret);
+
+ if (name)
+ STRV_FOREACH(p, lookup_path)
+ (void) unit_file_find_dirs(original_root, unit_path_cache, *p, name, dir_suffix, &dirs);
+
+ SET_FOREACH(n, aliases)
+ STRV_FOREACH(p, lookup_path)
+ (void) unit_file_find_dirs(original_root, unit_path_cache, *p, n, dir_suffix, &dirs);
+
+ /* All the names in the unit are of the same type so just grab one. */
+ n = name ?: (const char*) set_first(aliases);
+ if (n) {
+ UnitType type = _UNIT_TYPE_INVALID;
+
+ type = unit_name_to_type(n);
+ if (type < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to derive unit type from unit name: %s", n);
+
+ /* Special top level drop in for "<unit type>.<suffix>". Add this last as it's the most generic
+ * and should be able to be overridden by more specific drop-ins. */
+ STRV_FOREACH(p, lookup_path)
+ (void) unit_file_find_dirs(original_root,
+ unit_path_cache,
+ *p,
+ unit_type_to_string(type),
+ dir_suffix,
+ &dirs);
+ }
+
+ if (strv_isempty(dirs)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ r = conf_files_list_strv(ret, file_suffix, NULL, 0, (const char**) dirs);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to create the list of configuration files: %m");
+
+ return 1;
+}
diff --git a/src/shared/dropin.h b/src/shared/dropin.h
new file mode 100644
index 0000000..54cceaf
--- /dev/null
+++ b/src/shared/dropin.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+#include "unit-name.h"
+
+int drop_in_file(const char *dir, const char *unit, unsigned level,
+ const char *name, char **_p, char **_q);
+
+int write_drop_in(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *data);
+
+int write_drop_in_format(const char *dir, const char *unit, unsigned level,
+ const char *name, const char *format, ...) _printf_(5, 6);
+
+int unit_file_find_dropin_paths(
+ const char *original_root,
+ char **lookup_path,
+ Set *unit_path_cache,
+ const char *dir_suffix,
+ const char *file_suffix,
+ const char *name,
+ const Set *aliases,
+ char ***paths);
diff --git a/src/shared/efi-loader.c b/src/shared/efi-loader.c
new file mode 100644
index 0000000..20f70da
--- /dev/null
+++ b/src/shared/efi-loader.c
@@ -0,0 +1,806 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "efi-loader.h"
+#include "efivars.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "parse-util.h"
+#include "sort-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "utf8.h"
+#include "virt.h"
+
+#if ENABLE_EFI
+
+#define LOAD_OPTION_ACTIVE 0x00000001
+#define MEDIA_DEVICE_PATH 0x04
+#define MEDIA_HARDDRIVE_DP 0x01
+#define MEDIA_FILEPATH_DP 0x04
+#define SIGNATURE_TYPE_GUID 0x02
+#define MBR_TYPE_EFI_PARTITION_TABLE_HEADER 0x02
+#define END_DEVICE_PATH_TYPE 0x7f
+#define END_ENTIRE_DEVICE_PATH_SUBTYPE 0xff
+#define EFI_OS_INDICATIONS_BOOT_TO_FW_UI 0x0000000000000001
+
+#define boot_option__contents \
+ { \
+ uint32_t attr; \
+ uint16_t path_len; \
+ uint16_t title[]; \
+ }
+
+struct boot_option boot_option__contents;
+struct boot_option__packed boot_option__contents _packed_;
+assert_cc(offsetof(struct boot_option, title) == offsetof(struct boot_option__packed, title));
+/* sizeof(struct boot_option) != sizeof(struct boot_option__packed), so
+ * the *size* of the structure should not be used anywhere below. */
+
+struct drive_path {
+ uint32_t part_nr;
+ uint64_t part_start;
+ uint64_t part_size;
+ char signature[16];
+ uint8_t mbr_type;
+ uint8_t signature_type;
+} _packed_;
+
+#define device_path__contents \
+ { \
+ uint8_t type; \
+ uint8_t sub_type; \
+ uint16_t length; \
+ union { \
+ uint16_t path[0]; \
+ struct drive_path drive; \
+ }; \
+ }
+
+struct device_path device_path__contents;
+struct device_path__packed device_path__contents _packed_;
+assert_cc(sizeof(struct device_path) == sizeof(struct device_path__packed));
+
+int efi_reboot_to_firmware_supported(void) {
+ _cleanup_free_ void *v = NULL;
+ static int cache = -1;
+ uint64_t b;
+ size_t s;
+ int r;
+
+ if (cache > 0)
+ return 0;
+ if (cache == 0)
+ return -EOPNOTSUPP;
+
+ if (!is_efi_boot())
+ goto not_supported;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndicationsSupported", NULL, &v, &s);
+ if (r == -ENOENT)
+ goto not_supported; /* variable doesn't exist? it's not supported then */
+ if (r < 0)
+ return r;
+ if (s != sizeof(uint64_t))
+ return -EINVAL;
+
+ b = *(uint64_t*) v;
+ if (!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI))
+ goto not_supported; /* bit unset? it's not supported then */
+
+ cache = 1;
+ return 0;
+
+not_supported:
+ cache = 0;
+ return -EOPNOTSUPP;
+}
+
+static int get_os_indications(uint64_t *ret) {
+ static struct stat cache_stat = {};
+ _cleanup_free_ void *v = NULL;
+ _cleanup_free_ char *fn = NULL;
+ static uint64_t cache;
+ struct stat new_stat;
+ size_t s;
+ int r;
+
+ assert(ret);
+
+ /* Let's verify general support first */
+ r = efi_reboot_to_firmware_supported();
+ if (r < 0)
+ return r;
+
+ fn = efi_variable_path(EFI_VENDOR_GLOBAL, "OsIndications");
+ if (!fn)
+ return -ENOMEM;
+
+ /* stat() the EFI variable, to see if the mtime changed. If it did we need to cache again. */
+ if (stat(fn, &new_stat) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* Doesn't exist? Then we can exit early (also see below) */
+ *ret = 0;
+ return 0;
+
+ } else if (stat_inode_unmodified(&new_stat, &cache_stat)) {
+ /* inode didn't change, we can return the cached value */
+ *ret = cache;
+ return 0;
+ }
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndications", NULL, &v, &s);
+ if (r == -ENOENT) {
+ /* Some firmware implementations that do support OsIndications and report that with
+ * OsIndicationsSupported will remove the OsIndications variable when it is unset. Let's
+ * pretend it's 0 then, to hide this implementation detail. Note that this call will return
+ * -ENOENT then only if the support for OsIndications is missing entirely, as determined by
+ * efi_reboot_to_firmware_supported() above. */
+ *ret = 0;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+ if (s != sizeof(uint64_t))
+ return -EINVAL;
+
+ cache_stat = new_stat;
+ *ret = cache = *(uint64_t *)v;
+ return 0;
+}
+
+int efi_get_reboot_to_firmware(void) {
+ int r;
+ uint64_t b;
+
+ r = get_os_indications(&b);
+ if (r < 0)
+ return r;
+
+ return !!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI);
+}
+
+int efi_set_reboot_to_firmware(bool value) {
+ int r;
+ uint64_t b, b_new;
+
+ r = get_os_indications(&b);
+ if (r < 0)
+ return r;
+
+ b_new = UPDATE_FLAG(b, EFI_OS_INDICATIONS_BOOT_TO_FW_UI, value);
+
+ /* Avoid writing to efi vars store if we can due to firmware bugs. */
+ if (b != b_new)
+ return efi_set_variable(EFI_VENDOR_GLOBAL, "OsIndications", &b_new, sizeof(uint64_t));
+
+ return 0;
+}
+
+static ssize_t utf16_size(const uint16_t *s, size_t buf_len_bytes) {
+ size_t l = 0;
+
+ /* Returns the size of the string in bytes without the terminating two zero bytes */
+
+ if (buf_len_bytes % sizeof(uint16_t) != 0)
+ return -EINVAL;
+
+ while (l < buf_len_bytes / sizeof(uint16_t)) {
+ if (s[l] == 0)
+ return (l + 1) * sizeof(uint16_t);
+ l++;
+ }
+
+ return -EINVAL; /* The terminator was not found */
+}
+
+struct guid {
+ uint32_t u1;
+ uint16_t u2;
+ uint16_t u3;
+ uint8_t u4[8];
+} _packed_;
+
+static void efi_guid_to_id128(const void *guid, sd_id128_t *id128) {
+ uint32_t u1;
+ uint16_t u2, u3;
+ const struct guid *uuid = guid;
+
+ memcpy(&u1, &uuid->u1, sizeof(uint32_t));
+ id128->bytes[0] = (u1 >> 24) & 0xff;
+ id128->bytes[1] = (u1 >> 16) & 0xff;
+ id128->bytes[2] = (u1 >> 8) & 0xff;
+ id128->bytes[3] = u1 & 0xff;
+ memcpy(&u2, &uuid->u2, sizeof(uint16_t));
+ id128->bytes[4] = (u2 >> 8) & 0xff;
+ id128->bytes[5] = u2 & 0xff;
+ memcpy(&u3, &uuid->u3, sizeof(uint16_t));
+ id128->bytes[6] = (u3 >> 8) & 0xff;
+ id128->bytes[7] = u3 & 0xff;
+ memcpy(&id128->bytes[8], uuid->u4, sizeof(uuid->u4));
+}
+
+int efi_get_boot_option(
+ uint16_t id,
+ char **title,
+ sd_id128_t *part_uuid,
+ char **path,
+ bool *active) {
+
+ char boot_id[9];
+ _cleanup_free_ uint8_t *buf = NULL;
+ size_t l;
+ struct boot_option *header;
+ ssize_t title_size;
+ _cleanup_free_ char *s = NULL, *p = NULL;
+ sd_id128_t p_uuid = SD_ID128_NULL;
+ int r;
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, (void **)&buf, &l);
+ if (r < 0)
+ return r;
+ if (l < offsetof(struct boot_option, title))
+ return -ENOENT;
+
+ header = (struct boot_option *)buf;
+ title_size = utf16_size(header->title, l - offsetof(struct boot_option, title));
+ if (title_size < 0)
+ return title_size;
+
+ if (title) {
+ s = utf16_to_utf8(header->title, title_size);
+ if (!s)
+ return -ENOMEM;
+ }
+
+ if (header->path_len > 0) {
+ uint8_t *dbuf;
+ size_t dnext, doff;
+
+ doff = offsetof(struct boot_option, title) + title_size;
+ dbuf = buf + doff;
+ if (header->path_len > l - doff)
+ return -EINVAL;
+
+ dnext = 0;
+ while (dnext < header->path_len) {
+ struct device_path *dpath;
+
+ dpath = (struct device_path *)(dbuf + dnext);
+ if (dpath->length < 4)
+ break;
+
+ /* Type 0x7F – End of Hardware Device Path, Sub-Type 0xFF – End Entire Device Path */
+ if (dpath->type == END_DEVICE_PATH_TYPE && dpath->sub_type == END_ENTIRE_DEVICE_PATH_SUBTYPE)
+ break;
+
+ dnext += dpath->length;
+
+ /* Type 0x04 – Media Device Path */
+ if (dpath->type != MEDIA_DEVICE_PATH)
+ continue;
+
+ /* Sub-Type 1 – Hard Drive */
+ if (dpath->sub_type == MEDIA_HARDDRIVE_DP) {
+ /* 0x02 – GUID Partition Table */
+ if (dpath->drive.mbr_type != MBR_TYPE_EFI_PARTITION_TABLE_HEADER)
+ continue;
+
+ /* 0x02 – GUID signature */
+ if (dpath->drive.signature_type != SIGNATURE_TYPE_GUID)
+ continue;
+
+ if (part_uuid)
+ efi_guid_to_id128(dpath->drive.signature, &p_uuid);
+ continue;
+ }
+
+ /* Sub-Type 4 – File Path */
+ if (dpath->sub_type == MEDIA_FILEPATH_DP && !p && path) {
+ p = utf16_to_utf8(dpath->path, dpath->length-4);
+ if (!p)
+ return -ENOMEM;
+
+ efi_tilt_backslashes(p);
+ continue;
+ }
+ }
+ }
+
+ if (title)
+ *title = TAKE_PTR(s);
+ if (part_uuid)
+ *part_uuid = p_uuid;
+ if (path)
+ *path = TAKE_PTR(p);
+ if (active)
+ *active = header->attr & LOAD_OPTION_ACTIVE;
+
+ return 0;
+}
+
+static void to_utf16(uint16_t *dest, const char *src) {
+ int i;
+
+ for (i = 0; src[i] != '\0'; i++)
+ dest[i] = src[i];
+ dest[i] = '\0';
+}
+
+static void id128_to_efi_guid(sd_id128_t id, void *guid) {
+ struct guid uuid = {
+ .u1 = id.bytes[0] << 24 | id.bytes[1] << 16 | id.bytes[2] << 8 | id.bytes[3],
+ .u2 = id.bytes[4] << 8 | id.bytes[5],
+ .u3 = id.bytes[6] << 8 | id.bytes[7],
+ };
+ memcpy(uuid.u4, id.bytes+8, sizeof(uuid.u4));
+ memcpy(guid, &uuid, sizeof(uuid));
+}
+
+static uint16_t *tilt_slashes(uint16_t *s) {
+ uint16_t *p;
+
+ for (p = s; *p; p++)
+ if (*p == '/')
+ *p = '\\';
+
+ return s;
+}
+
+int efi_add_boot_option(
+ uint16_t id,
+ const char *title,
+ uint32_t part,
+ uint64_t pstart,
+ uint64_t psize,
+ sd_id128_t part_uuid,
+ const char *path) {
+
+ size_t size, title_len, path_len;
+ _cleanup_free_ char *buf = NULL;
+ struct boot_option *option;
+ struct device_path *devicep;
+ char boot_id[9];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ title_len = (strlen(title)+1) * 2;
+ path_len = (strlen(path)+1) * 2;
+
+ buf = malloc0(offsetof(struct boot_option, title) + title_len +
+ sizeof(struct drive_path) +
+ sizeof(struct device_path) + path_len);
+ if (!buf)
+ return -ENOMEM;
+
+ /* header */
+ option = (struct boot_option *)buf;
+ option->attr = LOAD_OPTION_ACTIVE;
+ option->path_len = offsetof(struct device_path, drive) + sizeof(struct drive_path) +
+ offsetof(struct device_path, path) + path_len +
+ offsetof(struct device_path, path);
+ to_utf16(option->title, title);
+ size = offsetof(struct boot_option, title) + title_len;
+
+ /* partition info */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = MEDIA_DEVICE_PATH;
+ devicep->sub_type = MEDIA_HARDDRIVE_DP;
+ devicep->length = offsetof(struct device_path, drive) + sizeof(struct drive_path);
+ memcpy(&devicep->drive.part_nr, &part, sizeof(uint32_t));
+ memcpy(&devicep->drive.part_start, &pstart, sizeof(uint64_t));
+ memcpy(&devicep->drive.part_size, &psize, sizeof(uint64_t));
+ id128_to_efi_guid(part_uuid, devicep->drive.signature);
+ devicep->drive.mbr_type = MBR_TYPE_EFI_PARTITION_TABLE_HEADER;
+ devicep->drive.signature_type = SIGNATURE_TYPE_GUID;
+ size += devicep->length;
+
+ /* path to loader */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = MEDIA_DEVICE_PATH;
+ devicep->sub_type = MEDIA_FILEPATH_DP;
+ devicep->length = offsetof(struct device_path, path) + path_len;
+ to_utf16(devicep->path, path);
+ tilt_slashes(devicep->path);
+ size += devicep->length;
+
+ /* end of path */
+ devicep = (struct device_path *)(buf + size);
+ devicep->type = END_DEVICE_PATH_TYPE;
+ devicep->sub_type = END_ENTIRE_DEVICE_PATH_SUBTYPE;
+ devicep->length = offsetof(struct device_path, path);
+ size += devicep->length;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, buf, size);
+}
+
+int efi_remove_boot_option(uint16_t id) {
+ char boot_id[9];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ xsprintf(boot_id, "Boot%04X", id);
+ return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, 0);
+}
+
+int efi_get_boot_order(uint16_t **order) {
+ _cleanup_free_ void *buf = NULL;
+ size_t l;
+ int r;
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable(EFI_VENDOR_GLOBAL, "BootOrder", NULL, &buf, &l);
+ if (r < 0)
+ return r;
+
+ if (l <= 0)
+ return -ENOENT;
+
+ if (l % sizeof(uint16_t) > 0 ||
+ l / sizeof(uint16_t) > INT_MAX)
+ return -EINVAL;
+
+ *order = TAKE_PTR(buf);
+ return (int) (l / sizeof(uint16_t));
+}
+
+int efi_set_boot_order(uint16_t *order, size_t n) {
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ return efi_set_variable(EFI_VENDOR_GLOBAL, "BootOrder", order, n * sizeof(uint16_t));
+}
+
+static int boot_id_hex(const char s[static 4]) {
+ int id = 0, i;
+
+ assert(s);
+
+ for (i = 0; i < 4; i++)
+ if (s[i] >= '0' && s[i] <= '9')
+ id |= (s[i] - '0') << (3 - i) * 4;
+ else if (s[i] >= 'A' && s[i] <= 'F')
+ id |= (s[i] - 'A' + 10) << (3 - i) * 4;
+ else
+ return -EINVAL;
+
+ return id;
+}
+
+static int cmp_uint16(const uint16_t *a, const uint16_t *b) {
+ return CMP(*a, *b);
+}
+
+int efi_get_boot_options(uint16_t **options) {
+ _cleanup_closedir_ DIR *dir = NULL;
+ _cleanup_free_ uint16_t *list = NULL;
+ struct dirent *de;
+ size_t alloc = 0;
+ int count = 0;
+
+ assert(options);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ dir = opendir("/sys/firmware/efi/efivars/");
+ if (!dir)
+ return -errno;
+
+ FOREACH_DIRENT(de, dir, return -errno) {
+ int id;
+
+ if (strncmp(de->d_name, "Boot", 4) != 0)
+ continue;
+
+ if (strlen(de->d_name) != 45)
+ continue;
+
+ if (strcmp(de->d_name + 8, "-8be4df61-93ca-11d2-aa0d-00e098032b8c") != 0)
+ continue;
+
+ id = boot_id_hex(de->d_name + 4);
+ if (id < 0)
+ continue;
+
+ if (!GREEDY_REALLOC(list, alloc, count + 1))
+ return -ENOMEM;
+
+ list[count++] = id;
+ }
+
+ typesafe_qsort(list, count, cmp_uint16);
+
+ *options = TAKE_PTR(list);
+
+ return count;
+}
+
+static int read_usec(sd_id128_t vendor, const char *name, usec_t *u) {
+ _cleanup_free_ char *j = NULL;
+ int r;
+ uint64_t x = 0;
+
+ assert(name);
+ assert(u);
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, name, &j);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(j, &x);
+ if (r < 0)
+ return r;
+
+ *u = x;
+ return 0;
+}
+
+int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) {
+ uint64_t x, y;
+ int r;
+
+ assert(firmware);
+ assert(loader);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeInitUSec", &x);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read LoaderTimeInitUSec: %m");
+
+ r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeExecUSec", &y);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read LoaderTimeExecUSec: %m");
+
+ if (y == 0 || y < x || y - x > USEC_PER_HOUR)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+ "Bad LoaderTimeInitUSec=%"PRIu64", LoaderTimeExecUSec=%" PRIu64"; refusing.",
+ x, y);
+
+ *firmware = x;
+ *loader = y;
+
+ return 0;
+}
+
+int efi_loader_get_device_part_uuid(sd_id128_t *u) {
+ _cleanup_free_ char *p = NULL;
+ int r, parsed[16];
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderDevicePartUUID", &p);
+ if (r < 0)
+ return r;
+
+ if (sscanf(p, SD_ID128_UUID_FORMAT_STR,
+ &parsed[0], &parsed[1], &parsed[2], &parsed[3],
+ &parsed[4], &parsed[5], &parsed[6], &parsed[7],
+ &parsed[8], &parsed[9], &parsed[10], &parsed[11],
+ &parsed[12], &parsed[13], &parsed[14], &parsed[15]) != 16)
+ return -EIO;
+
+ if (u) {
+ unsigned i;
+
+ for (i = 0; i < ELEMENTSOF(parsed); i++)
+ u->bytes[i] = parsed[i];
+ }
+
+ return 0;
+}
+
+int efi_loader_get_entries(char ***ret) {
+ _cleanup_free_ char16_t *entries = NULL;
+ _cleanup_strv_free_ char **l = NULL;
+ size_t size, i, start;
+ int r;
+
+ assert(ret);
+
+ if (!is_efi_boot())
+ return -EOPNOTSUPP;
+
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderEntries", NULL, (void**) &entries, &size);
+ if (r < 0)
+ return r;
+
+ /* The variable contains a series of individually NUL terminated UTF-16 strings. */
+
+ for (i = 0, start = 0;; i++) {
+ _cleanup_free_ char *decoded = NULL;
+ bool end;
+
+ /* Is this the end of the variable's data? */
+ end = i * sizeof(char16_t) >= size;
+
+ /* Are we in the middle of a string? (i.e. not at the end of the variable, nor at a NUL terminator?) If
+ * so, let's go to the next entry. */
+ if (!end && entries[i] != 0)
+ continue;
+
+ /* We reached the end of a string, let's decode it into UTF-8 */
+ decoded = utf16_to_utf8(entries + start, (i - start) * sizeof(char16_t));
+ if (!decoded)
+ return -ENOMEM;
+
+ if (efi_loader_entry_name_valid(decoded)) {
+ r = strv_consume(&l, TAKE_PTR(decoded));
+ if (r < 0)
+ return r;
+ } else
+ log_debug("Ignoring invalid loader entry '%s'.", decoded);
+
+ /* We reached the end of the variable */
+ if (end)
+ break;
+
+ /* Continue after the NUL byte */
+ start = i + 1;
+ }
+
+ *ret = TAKE_PTR(l);
+ return 0;
+}
+
+int efi_loader_get_features(uint64_t *ret) {
+ _cleanup_free_ void *v = NULL;
+ size_t s;
+ int r;
+
+ if (!is_efi_boot()) {
+ *ret = 0;
+ return 0;
+ }
+
+ r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderFeatures", NULL, &v, &s);
+ if (r == -ENOENT) {
+ _cleanup_free_ char *info = NULL;
+
+ /* The new (v240+) LoaderFeatures variable is not supported, let's see if it's systemd-boot at all */
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderInfo", &info);
+ if (r < 0) {
+ if (r != -ENOENT)
+ return r;
+
+ /* Variable not set, definitely means not systemd-boot */
+
+ } else if (first_word(info, "systemd-boot")) {
+
+ /* An older systemd-boot version. Let's hardcode the feature set, since it was pretty
+ * static in all its versions. */
+
+ *ret = EFI_LOADER_FEATURE_CONFIG_TIMEOUT |
+ EFI_LOADER_FEATURE_ENTRY_DEFAULT |
+ EFI_LOADER_FEATURE_ENTRY_ONESHOT;
+
+ return 0;
+ }
+
+ /* No features supported */
+ *ret = 0;
+ return 0;
+ }
+ if (r < 0)
+ return r;
+
+ if (s != sizeof(uint64_t))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "LoaderFeatures EFI variable doesn't have the right size.");
+
+ memcpy(ret, v, sizeof(uint64_t));
+ return 0;
+}
+
+int efi_loader_get_config_timeout_one_shot(usec_t *ret) {
+ _cleanup_free_ char *v = NULL, *fn = NULL;
+ static struct stat cache_stat = {};
+ struct stat new_stat;
+ static usec_t cache;
+ uint64_t sec;
+ int r;
+
+ assert(ret);
+
+ fn = efi_variable_path(EFI_VENDOR_LOADER, "LoaderConfigTimeoutOneShot");
+ if (!fn)
+ return -ENOMEM;
+
+ /* stat() the EFI variable, to see if the mtime changed. If it did we need to cache again. */
+ if (stat(fn, &new_stat) < 0)
+ return -errno;
+
+ if (stat_inode_unmodified(&new_stat, &cache_stat)) {
+ *ret = cache;
+ return 0;
+ }
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderConfigTimeoutOneShot", &v);
+ if (r < 0)
+ return r;
+
+ r = safe_atou64(v, &sec);
+ if (r < 0)
+ return r;
+ if (sec > USEC_INFINITY / USEC_PER_SEC)
+ return -ERANGE;
+
+ cache_stat = new_stat;
+ *ret = cache = sec * USEC_PER_SEC; /* return in µs */
+ return 0;
+}
+
+int efi_loader_update_entry_one_shot_cache(char **cache, struct stat *cache_stat) {
+ _cleanup_free_ char *fn = NULL, *v = NULL;
+ struct stat new_stat;
+ int r;
+
+ assert(cache);
+ assert(cache_stat);
+
+ fn = efi_variable_path(EFI_VENDOR_LOADER, "LoaderEntryOneShot");
+ if (!fn)
+ return -ENOMEM;
+
+ /* stat() the EFI variable, to see if the mtime changed. If it did we need to cache again. */
+ if (stat(fn, &new_stat) < 0)
+ return -errno;
+
+ if (stat_inode_unmodified(&new_stat, cache_stat))
+ return 0;
+
+ r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryOneShot", &v);
+ if (r < 0)
+ return r;
+
+ if (!efi_loader_entry_name_valid(v))
+ return -EINVAL;
+
+ *cache_stat = new_stat;
+ free_and_replace(*cache, v);
+
+ return 0;
+}
+
+#endif
+
+bool efi_loader_entry_name_valid(const char *s) {
+ if (isempty(s))
+ return false;
+
+ if (strlen(s) > FILENAME_MAX) /* Make sure entry names fit in filenames */
+ return false;
+
+ return in_charset(s, ALPHANUMERICAL "+-_.");
+}
+
+char *efi_tilt_backslashes(char *s) {
+ char *p;
+
+ for (p = s; *p; p++)
+ if (*p == '\\')
+ *p = '/';
+
+ return s;
+}
diff --git a/src/shared/efi-loader.h b/src/shared/efi-loader.h
new file mode 100644
index 0000000..34476f4
--- /dev/null
+++ b/src/shared/efi-loader.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "efivars.h"
+
+#include <sys/stat.h>
+
+#if ENABLE_EFI
+
+int efi_reboot_to_firmware_supported(void);
+int efi_get_reboot_to_firmware(void);
+int efi_set_reboot_to_firmware(bool value);
+
+int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active);
+int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path);
+int efi_remove_boot_option(uint16_t id);
+int efi_get_boot_order(uint16_t **order);
+int efi_set_boot_order(uint16_t *order, size_t n);
+int efi_get_boot_options(uint16_t **options);
+
+int efi_loader_get_device_part_uuid(sd_id128_t *u);
+int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader);
+
+int efi_loader_get_entries(char ***ret);
+
+int efi_loader_get_features(uint64_t *ret);
+
+int efi_loader_get_config_timeout_one_shot(usec_t *ret);
+int efi_loader_update_entry_one_shot_cache(char **cache, struct stat *cache_stat);
+
+#else
+
+static inline int efi_reboot_to_firmware_supported(void) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_reboot_to_firmware(void) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_reboot_to_firmware(bool value) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_remove_boot_option(uint16_t id) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_order(uint16_t **order) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_set_boot_order(uint16_t *order, size_t n) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_get_boot_options(uint16_t **options) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_device_part_uuid(sd_id128_t *u) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_entries(char ***ret) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_features(uint64_t *ret) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_get_config_timeout_one_shot(usec_t *ret) {
+ return -EOPNOTSUPP;
+}
+
+static inline int efi_loader_update_entry_one_shot_cache(char **cache, struct stat *cache_stat) {
+ return -EOPNOTSUPP;
+}
+
+#endif
+
+bool efi_loader_entry_name_valid(const char *s);
+
+char *efi_tilt_backslashes(char *s);
diff --git a/src/shared/enable-mempool.c b/src/shared/enable-mempool.c
new file mode 100644
index 0000000..1abfccb
--- /dev/null
+++ b/src/shared/enable-mempool.c
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "mempool.h"
+
+const bool mempool_use_allowed = true;
diff --git a/src/shared/env-file-label.c b/src/shared/env-file-label.c
new file mode 100644
index 0000000..468afce
--- /dev/null
+++ b/src/shared/env-file-label.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+
+#include "env-file-label.h"
+#include "env-file.h"
+#include "selinux-util.h"
+
+int write_env_file_label(const char *fname, char **l) {
+ int r;
+
+ r = mac_selinux_create_file_prepare(fname, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = write_env_file(fname, l);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
diff --git a/src/shared/env-file-label.h b/src/shared/env-file-label.h
new file mode 100644
index 0000000..d68058a
--- /dev/null
+++ b/src/shared/env-file-label.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to
+ * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not
+ * for all */
+
+int write_env_file_label(const char *fname, char **l);
diff --git a/src/shared/ethtool-util.c b/src/shared/ethtool-util.c
new file mode 100644
index 0000000..e6fab26
--- /dev/null
+++ b/src/shared/ethtool-util.c
@@ -0,0 +1,1149 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/sockios.h>
+
+#include "conf-parser.h"
+#include "ethtool-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "log.h"
+#include "memory-util.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "strxcpyx.h"
+
+static const char* const duplex_table[_DUP_MAX] = {
+ [DUP_FULL] = "full",
+ [DUP_HALF] = "half"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(duplex, Duplex);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_duplex, duplex, Duplex, "Failed to parse duplex setting");
+
+static const char* const wol_table[_WOL_MAX] = {
+ [WOL_PHY] = "phy",
+ [WOL_UCAST] = "unicast",
+ [WOL_MCAST] = "multicast",
+ [WOL_BCAST] = "broadcast",
+ [WOL_ARP] = "arp",
+ [WOL_MAGIC] = "magic",
+ [WOL_MAGICSECURE] = "secureon",
+ [WOL_OFF] = "off",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(wol, WakeOnLan);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_wol, wol, WakeOnLan, "Failed to parse WakeOnLan setting");
+
+static const char* const port_table[] = {
+ [NET_DEV_PORT_TP] = "tp",
+ [NET_DEV_PORT_AUI] = "aui",
+ [NET_DEV_PORT_MII] = "mii",
+ [NET_DEV_PORT_FIBRE] = "fibre",
+ [NET_DEV_PORT_BNC] = "bnc",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(port, NetDevPort);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_port, port, NetDevPort, "Failed to parse Port setting");
+
+static const char* const netdev_feature_table[_NET_DEV_FEAT_MAX] = {
+ [NET_DEV_FEAT_RX] = "rx-checksum",
+ [NET_DEV_FEAT_TX] = "tx-checksum-", /* The suffix "-" means any feature beginning with "tx-checksum-" */
+ [NET_DEV_FEAT_GSO] = "tx-generic-segmentation",
+ [NET_DEV_FEAT_GRO] = "rx-gro",
+ [NET_DEV_FEAT_LRO] = "rx-lro",
+ [NET_DEV_FEAT_TSO] = "tx-tcp-segmentation",
+ [NET_DEV_FEAT_TSO6] = "tx-tcp6-segmentation",
+};
+
+static const char* const ethtool_link_mode_bit_table[] = {
+ [ETHTOOL_LINK_MODE_10baseT_Half_BIT] = "10baset-half",
+ [ETHTOOL_LINK_MODE_10baseT_Full_BIT] = "10baset-full",
+ [ETHTOOL_LINK_MODE_100baseT_Half_BIT] = "100baset-half",
+ [ETHTOOL_LINK_MODE_100baseT_Full_BIT] = "100baset-full",
+ [ETHTOOL_LINK_MODE_1000baseT_Half_BIT] = "1000baset-half",
+ [ETHTOOL_LINK_MODE_1000baseT_Full_BIT] = "1000baset-full",
+ [ETHTOOL_LINK_MODE_Autoneg_BIT] = "autonegotiation",
+ [ETHTOOL_LINK_MODE_TP_BIT] = "tp",
+ [ETHTOOL_LINK_MODE_AUI_BIT] = "aui",
+ [ETHTOOL_LINK_MODE_MII_BIT] = "mii",
+ [ETHTOOL_LINK_MODE_FIBRE_BIT] = "fibre",
+ [ETHTOOL_LINK_MODE_BNC_BIT] = "bnc",
+ [ETHTOOL_LINK_MODE_10000baseT_Full_BIT] = "10000baset-full",
+ [ETHTOOL_LINK_MODE_Pause_BIT] = "pause",
+ [ETHTOOL_LINK_MODE_Asym_Pause_BIT] = "asym-pause",
+ [ETHTOOL_LINK_MODE_2500baseX_Full_BIT] = "2500basex-full",
+ [ETHTOOL_LINK_MODE_Backplane_BIT] = "backplane",
+ [ETHTOOL_LINK_MODE_1000baseKX_Full_BIT] = "1000basekx-full",
+ [ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT] = "10000basekx4-full",
+ [ETHTOOL_LINK_MODE_10000baseKR_Full_BIT] = "10000basekr-full",
+ [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = "10000baser-fec",
+ [ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT] = "20000basemld2-full",
+ [ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT] = "20000basekr2-full",
+ [ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT] = "40000basekr4-full",
+ [ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT] = "40000basecr4-full",
+ [ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT] = "40000basesr4-full",
+ [ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT] = "40000baselr4-full",
+ [ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT] = "56000basekr4-full",
+ [ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT] = "56000basecr4-full",
+ [ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT] = "56000basesr4-full",
+ [ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT] = "56000baselr4-full",
+ [ETHTOOL_LINK_MODE_25000baseCR_Full_BIT] = "25000basecr-full",
+ [ETHTOOL_LINK_MODE_25000baseKR_Full_BIT] = "25000basekr-full",
+ [ETHTOOL_LINK_MODE_25000baseSR_Full_BIT] = "25000basesr-full",
+ [ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT] = "50000basecr2-full",
+ [ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT] = "50000basekr2-full",
+ [ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT] = "100000basekr4-full",
+ [ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT] = "100000basesr4-full",
+ [ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT] = "100000basecr4-full",
+ [ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT] = "100000baselr4-er4-full",
+ [ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT] = "50000basesr2-full",
+ [ETHTOOL_LINK_MODE_1000baseX_Full_BIT] = "1000basex-full",
+ [ETHTOOL_LINK_MODE_10000baseCR_Full_BIT] = "10000basecr-full",
+ [ETHTOOL_LINK_MODE_10000baseSR_Full_BIT] = "10000basesr-full",
+ [ETHTOOL_LINK_MODE_10000baseLR_Full_BIT] = "10000baselr-full",
+ [ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT] = "10000baselrm-full",
+ [ETHTOOL_LINK_MODE_10000baseER_Full_BIT] = "10000baseer-full",
+ [ETHTOOL_LINK_MODE_2500baseT_Full_BIT] = "2500baset-full",
+ [ETHTOOL_LINK_MODE_5000baseT_Full_BIT] = "5000baset-full",
+ [ETHTOOL_LINK_MODE_FEC_NONE_BIT] = "fec-none",
+ [ETHTOOL_LINK_MODE_FEC_RS_BIT] = "fec-rs",
+ [ETHTOOL_LINK_MODE_FEC_BASER_BIT] = "fec-baser",
+ [ETHTOOL_LINK_MODE_50000baseKR_Full_BIT] = "50000basekr-full",
+ [ETHTOOL_LINK_MODE_50000baseSR_Full_BIT] = "50000basesr-full",
+ [ETHTOOL_LINK_MODE_50000baseCR_Full_BIT] = "50000basecr-full",
+ [ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT] = "50000baselr-er-fr-full",
+ [ETHTOOL_LINK_MODE_50000baseDR_Full_BIT] = "50000basedr-full",
+ [ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT] = "100000basekr2-full",
+ [ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT] = "100000basesr2-full",
+ [ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT] = "100000basecr2-full",
+ [ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT] = "100000baselr2-er2-fr2-full",
+ [ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT] = "100000basedr2-full",
+ [ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT] = "200000basekr4-full",
+ [ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT] = "200000basesr4-full",
+ [ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT] = "200000baselr4-er4-fr4-full",
+ [ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT] = "200000basedr4-full",
+ [ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT] = "200000basecr4-full",
+ [ETHTOOL_LINK_MODE_100baseT1_Full_BIT] = "100baset1-full",
+ [ETHTOOL_LINK_MODE_1000baseT1_Full_BIT] = "1000baset1-full",
+ [ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT] = "400000basekr8-full",
+ [ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT] = "400000basesr8-full",
+ [ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT] = "400000baselr8-er8-fr8-full",
+ [ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT] = "400000basedr8-full",
+ [ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT] = "400000basecr8-full",
+ [ETHTOOL_LINK_MODE_FEC_LLRS_BIT] = "fec-llrs",
+ [ETHTOOL_LINK_MODE_100000baseKR_Full_BIT] = "100000basekr-full",
+ [ETHTOOL_LINK_MODE_100000baseSR_Full_BIT] = "100000basesr-full",
+ [ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT] = "100000baselr-er-fr-full",
+ [ETHTOOL_LINK_MODE_100000baseCR_Full_BIT] = "100000basecr-full",
+ [ETHTOOL_LINK_MODE_100000baseDR_Full_BIT] = "100000basedr-full",
+ [ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT] = "200000basekr2-full",
+ [ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT] = "200000basesr2-full",
+ [ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT] = "200000baselr2-er2-fr2-full",
+ [ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT] = "200000basedr2-full",
+ [ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT] = "200000basecr2-full",
+ [ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT] = "400000basekr4-full",
+ [ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT] = "400000basesr4-full",
+ [ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT] = "400000baselr4-er4-fr4-full",
+ [ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT] = "400000basedr4-full",
+ [ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT] = "400000basecr4-full",
+};
+/* Make sure the array is large enough to fit all bits */
+assert_cc((ELEMENTSOF(ethtool_link_mode_bit_table)-1) / 32 < N_ADVERTISE);
+
+DEFINE_STRING_TABLE_LOOKUP(ethtool_link_mode_bit, enum ethtool_link_mode_bit_indices);
+
+static int ethtool_connect_or_warn(int *ret, bool warn) {
+ int fd;
+
+ assert_return(ret, -EINVAL);
+
+ fd = socket_ioctl_fd();
+ if (fd < 0)
+ return log_full_errno(warn ? LOG_WARNING: LOG_DEBUG, fd,
+ "ethtool: could not create control socket: %m");
+
+ *ret = fd;
+
+ return 0;
+}
+
+int ethtool_get_driver(int *ethtool_fd, const char *ifname, char **ret) {
+ struct ethtool_drvinfo ecmd = {
+ .cmd = ETHTOOL_GDRVINFO,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ char *d;
+ int r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+ assert(ret);
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (isempty(ecmd.driver))
+ return -ENODATA;
+
+ d = strdup(ecmd.driver);
+ if (!d)
+ return -ENOMEM;
+
+ *ret = d;
+ return 0;
+}
+
+int ethtool_get_link_info(
+ int *ethtool_fd,
+ const char *ifname,
+ int *ret_autonegotiation,
+ uint64_t *ret_speed,
+ Duplex *ret_duplex,
+ NetDevPort *ret_port) {
+
+ struct ethtool_cmd ecmd = {
+ .cmd = ETHTOOL_GSET,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ int r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, false);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ret_autonegotiation)
+ *ret_autonegotiation = ecmd.autoneg;
+
+ if (ret_speed) {
+ uint32_t speed;
+
+ speed = ethtool_cmd_speed(&ecmd);
+ *ret_speed = speed == (uint32_t) SPEED_UNKNOWN ?
+ UINT64_MAX : (uint64_t) speed * 1000 * 1000;
+ }
+
+ if (ret_duplex)
+ *ret_duplex = ecmd.duplex;
+
+ if (ret_port)
+ *ret_port = ecmd.port;
+
+ return 0;
+}
+
+int ethtool_get_permanent_macaddr(int *ethtool_fd, const char *ifname, struct ether_addr *ret) {
+ _cleanup_close_ int fd = -1;
+ struct {
+ struct ethtool_perm_addr addr;
+ uint8_t space[MAX_ADDR_LEN];
+ } epaddr = {
+ .addr.cmd = ETHTOOL_GPERMADDR,
+ .addr.size = MAX_ADDR_LEN,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (caddr_t) &epaddr,
+ };
+ int r;
+
+ assert(ifname);
+ assert(ret);
+
+ if (!ethtool_fd)
+ ethtool_fd = &fd;
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, false);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (epaddr.addr.size != 6)
+ return -EOPNOTSUPP;
+
+#pragma GCC diagnostic push
+#if HAVE_ZERO_LENGTH_BOUNDS
+# pragma GCC diagnostic ignored "-Wzero-length-bounds"
+#endif
+ for (size_t i = 0; i < epaddr.addr.size; i++)
+ ret->ether_addr_octet[i] = epaddr.addr.data[i];
+#pragma GCC diagnostic pop
+
+ return 0;
+}
+
+int ethtool_set_speed(int *ethtool_fd, const char *ifname, unsigned speed, Duplex duplex) {
+ struct ethtool_cmd ecmd = {
+ .cmd = ETHTOOL_GSET,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ bool need_update = false;
+ int r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+
+ if (speed == 0 && duplex == _DUP_INVALID)
+ return 0;
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ethtool_cmd_speed(&ecmd) != speed) {
+ ethtool_cmd_speed_set(&ecmd, speed);
+ need_update = true;
+ }
+
+ switch (duplex) {
+ case DUP_HALF:
+ if (ecmd.duplex != DUPLEX_HALF) {
+ ecmd.duplex = DUPLEX_HALF;
+ need_update = true;
+ }
+ break;
+ case DUP_FULL:
+ if (ecmd.duplex != DUPLEX_FULL) {
+ ecmd.duplex = DUPLEX_FULL;
+ need_update = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SSET;
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int ethtool_set_wol(int *ethtool_fd, const char *ifname, WakeOnLan wol) {
+ struct ethtool_wolinfo ecmd = {
+ .cmd = ETHTOOL_GWOL,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ bool need_update = false;
+ int r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+
+ if (wol == _WOL_INVALID)
+ return 0;
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ switch (wol) {
+ case WOL_PHY:
+ if (ecmd.wolopts != WAKE_PHY) {
+ ecmd.wolopts = WAKE_PHY;
+ need_update = true;
+ }
+ break;
+ case WOL_UCAST:
+ if (ecmd.wolopts != WAKE_UCAST) {
+ ecmd.wolopts = WAKE_UCAST;
+ need_update = true;
+ }
+ break;
+ case WOL_MCAST:
+ if (ecmd.wolopts != WAKE_MCAST) {
+ ecmd.wolopts = WAKE_MCAST;
+ need_update = true;
+ }
+ break;
+ case WOL_BCAST:
+ if (ecmd.wolopts != WAKE_BCAST) {
+ ecmd.wolopts = WAKE_BCAST;
+ need_update = true;
+ }
+ break;
+ case WOL_ARP:
+ if (ecmd.wolopts != WAKE_ARP) {
+ ecmd.wolopts = WAKE_ARP;
+ need_update = true;
+ }
+ break;
+ case WOL_MAGIC:
+ if (ecmd.wolopts != WAKE_MAGIC) {
+ ecmd.wolopts = WAKE_MAGIC;
+ need_update = true;
+ }
+ break;
+ case WOL_MAGICSECURE:
+ if (ecmd.wolopts != WAKE_MAGICSECURE) {
+ ecmd.wolopts = WAKE_MAGICSECURE;
+ need_update = true;
+ }
+ break;
+ case WOL_OFF:
+ if (ecmd.wolopts != 0) {
+ ecmd.wolopts = 0;
+ need_update = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SWOL;
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int ethtool_set_nic_buffer_size(int *ethtool_fd, const char *ifname, const netdev_ring_param *ring) {
+ struct ethtool_ringparam ecmd = {
+ .cmd = ETHTOOL_GRINGPARAM,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ bool need_update = false;
+ int r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+ assert(ring);
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ring->rx_pending_set && ecmd.rx_pending != ring->rx_pending) {
+ ecmd.rx_pending = ring->rx_pending;
+ need_update = true;
+ }
+
+ if (ring->rx_mini_pending_set && ecmd.rx_mini_pending != ring->rx_mini_pending) {
+ ecmd.rx_mini_pending = ring->rx_mini_pending;
+ need_update = true;
+ }
+
+ if (ring->rx_jumbo_pending_set && ecmd.rx_jumbo_pending != ring->rx_jumbo_pending) {
+ ecmd.rx_jumbo_pending = ring->rx_jumbo_pending;
+ need_update = true;
+ }
+
+ if (ring->tx_pending_set && ecmd.tx_pending != ring->tx_pending) {
+ ecmd.tx_pending = ring->tx_pending;
+ need_update = true;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SRINGPARAM;
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int get_stringset(int ethtool_fd, struct ifreq *ifr, int stringset_id, struct ethtool_gstrings **ret) {
+ _cleanup_free_ struct ethtool_gstrings *strings = NULL;
+ struct {
+ struct ethtool_sset_info info;
+ uint32_t space;
+ } buffer = {
+ .info = {
+ .cmd = ETHTOOL_GSSET_INFO,
+ .sset_mask = UINT64_C(1) << stringset_id,
+ },
+ };
+ unsigned len;
+ int r;
+
+ assert(ethtool_fd >= 0);
+ assert(ifr);
+ assert(ret);
+
+ ifr->ifr_data = (void *) &buffer.info;
+
+ r = ioctl(ethtool_fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ if (!buffer.info.sset_mask)
+ return -EINVAL;
+
+#pragma GCC diagnostic push
+#if HAVE_ZERO_LENGTH_BOUNDS
+# pragma GCC diagnostic ignored "-Wzero-length-bounds"
+#endif
+ len = buffer.info.data[0];
+#pragma GCC diagnostic pop
+
+ strings = malloc0(sizeof(struct ethtool_gstrings) + len * ETH_GSTRING_LEN);
+ if (!strings)
+ return -ENOMEM;
+
+ strings->cmd = ETHTOOL_GSTRINGS;
+ strings->string_set = stringset_id;
+ strings->len = len;
+
+ ifr->ifr_data = (void *) strings;
+
+ r = ioctl(ethtool_fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ *ret = TAKE_PTR(strings);
+
+ return 0;
+}
+
+static int set_features_bit(
+ const struct ethtool_gstrings *strings,
+ const char *feature,
+ bool flag,
+ struct ethtool_sfeatures *sfeatures) {
+ bool found = false;
+
+ assert(strings);
+ assert(feature);
+ assert(sfeatures);
+
+ for (size_t i = 0; i < strings->len; i++)
+ if (streq((char *) &strings->data[i * ETH_GSTRING_LEN], feature) ||
+ (endswith(feature, "-") && startswith((char *) &strings->data[i * ETH_GSTRING_LEN], feature))) {
+ size_t block, bit;
+
+ block = i / 32;
+ bit = i % 32;
+
+ sfeatures->features[block].valid |= 1 << bit;
+ SET_FLAG(sfeatures->features[block].requested, 1 << bit, flag);
+ found = true;
+ }
+
+ return found ? 0 : -ENODATA;
+}
+
+int ethtool_set_features(int *ethtool_fd, const char *ifname, const int *features) {
+ _cleanup_free_ struct ethtool_gstrings *strings = NULL;
+ struct ethtool_sfeatures *sfeatures;
+ struct ifreq ifr = {};
+ int i, r;
+
+ assert(ethtool_fd);
+ assert(ifname);
+ assert(features);
+
+ if (*ethtool_fd < 0) {
+ r = ethtool_connect_or_warn(ethtool_fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = get_stringset(*ethtool_fd, &ifr, ETH_SS_FEATURES, &strings);
+ if (r < 0)
+ return log_warning_errno(r, "ethtool: could not get ethtool features for %s", ifname);
+
+ sfeatures = alloca0(sizeof(struct ethtool_sfeatures) + DIV_ROUND_UP(strings->len, 32U) * sizeof(sfeatures->features[0]));
+ sfeatures->cmd = ETHTOOL_SFEATURES;
+ sfeatures->size = DIV_ROUND_UP(strings->len, 32U);
+
+ for (i = 0; i < _NET_DEV_FEAT_MAX; i++)
+ if (features[i] != -1) {
+ r = set_features_bit(strings, netdev_feature_table[i], features[i], sfeatures);
+ if (r < 0) {
+ log_warning_errno(r, "ethtool: could not find feature, ignoring: %s", netdev_feature_table[i]);
+ continue;
+ }
+ }
+
+ ifr.ifr_data = (void *) sfeatures;
+
+ r = ioctl(*ethtool_fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return log_warning_errno(r, "ethtool: could not set ethtool features for %s", ifname);
+
+ return 0;
+}
+
+static int get_glinksettings(int fd, struct ifreq *ifr, struct ethtool_link_usettings **ret) {
+ struct ecmd {
+ struct ethtool_link_settings req;
+ __u32 link_mode_data[3 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ } ecmd = {
+ .req.cmd = ETHTOOL_GLINKSETTINGS,
+ };
+ struct ethtool_link_usettings *u;
+ unsigned offset;
+ int r;
+
+ assert(fd >= 0);
+ assert(ifr);
+ assert(ret);
+
+ /* The interaction user/kernel via the new API requires a small ETHTOOL_GLINKSETTINGS
+ handshake first to agree on the length of the link mode bitmaps. If kernel doesn't
+ agree with user, it returns the bitmap length it is expecting from user as a negative
+ length (and cmd field is 0). When kernel and user agree, kernel returns valid info in
+ all fields (ie. link mode length > 0 and cmd is ETHTOOL_GLINKSETTINGS). Based on
+ https://github.com/torvalds/linux/commit/3f1ac7a700d039c61d8d8b99f28d605d489a60cf
+ */
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ecmd.req.link_mode_masks_nwords >= 0 || ecmd.req.cmd != ETHTOOL_GLINKSETTINGS)
+ return -EOPNOTSUPP;
+
+ ecmd.req.link_mode_masks_nwords = -ecmd.req.link_mode_masks_nwords;
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ if (ecmd.req.link_mode_masks_nwords <= 0 || ecmd.req.cmd != ETHTOOL_GLINKSETTINGS)
+ return -EOPNOTSUPP;
+
+ u = new(struct ethtool_link_usettings, 1);
+ if (!u)
+ return -ENOMEM;
+
+ *u = (struct ethtool_link_usettings) {
+ .base = ecmd.req,
+ };
+
+ offset = 0;
+ memcpy(u->link_modes.supported, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(u->link_modes.advertising, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(u->link_modes.lp_advertising, &ecmd.link_mode_data[offset], 4 * ecmd.req.link_mode_masks_nwords);
+
+ *ret = u;
+
+ return 0;
+}
+
+static int get_gset(int fd, struct ifreq *ifr, struct ethtool_link_usettings **ret) {
+ struct ethtool_link_usettings *e;
+ struct ethtool_cmd ecmd = {
+ .cmd = ETHTOOL_GSET,
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(ifr);
+ assert(ret);
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ e = new(struct ethtool_link_usettings, 1);
+ if (!e)
+ return -ENOMEM;
+
+ *e = (struct ethtool_link_usettings) {
+ .base.cmd = ETHTOOL_GSET,
+ .base.link_mode_masks_nwords = 1,
+ .base.speed = ethtool_cmd_speed(&ecmd),
+ .base.duplex = ecmd.duplex,
+ .base.port = ecmd.port,
+ .base.phy_address = ecmd.phy_address,
+ .base.autoneg = ecmd.autoneg,
+ .base.mdio_support = ecmd.mdio_support,
+
+ .link_modes.supported[0] = ecmd.supported,
+ .link_modes.advertising[0] = ecmd.advertising,
+ .link_modes.lp_advertising[0] = ecmd.lp_advertising,
+ };
+
+ *ret = e;
+
+ return 0;
+}
+
+static int set_slinksettings(int fd, struct ifreq *ifr, const struct ethtool_link_usettings *u) {
+ struct {
+ struct ethtool_link_settings req;
+ __u32 link_mode_data[3 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ } ecmd = {};
+ unsigned offset;
+ int r;
+
+ assert(fd >= 0);
+ assert(ifr);
+ assert(u);
+
+ if (u->base.cmd != ETHTOOL_GLINKSETTINGS || u->base.link_mode_masks_nwords <= 0)
+ return -EINVAL;
+
+ ecmd.req = u->base;
+ ecmd.req.cmd = ETHTOOL_SLINKSETTINGS;
+ offset = 0;
+ memcpy(&ecmd.link_mode_data[offset], u->link_modes.supported, 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(&ecmd.link_mode_data[offset], u->link_modes.advertising, 4 * ecmd.req.link_mode_masks_nwords);
+
+ offset += ecmd.req.link_mode_masks_nwords;
+ memcpy(&ecmd.link_mode_data[offset], u->link_modes.lp_advertising, 4 * ecmd.req.link_mode_masks_nwords);
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int set_sset(int fd, struct ifreq *ifr, const struct ethtool_link_usettings *u) {
+ struct ethtool_cmd ecmd = {
+ .cmd = ETHTOOL_SSET,
+ };
+ int r;
+
+ assert(fd >= 0);
+ assert(ifr);
+ assert(u);
+
+ if (u->base.cmd != ETHTOOL_GSET || u->base.link_mode_masks_nwords <= 0)
+ return -EINVAL;
+
+ ecmd.supported = u->link_modes.supported[0];
+ ecmd.advertising = u->link_modes.advertising[0];
+ ecmd.lp_advertising = u->link_modes.lp_advertising[0];
+
+ ethtool_cmd_speed_set(&ecmd, u->base.speed);
+
+ ecmd.duplex = u->base.duplex;
+ ecmd.port = u->base.port;
+ ecmd.phy_address = u->base.phy_address;
+ ecmd.autoneg = u->base.autoneg;
+ ecmd.mdio_support = u->base.mdio_support;
+ ecmd.eth_tp_mdix = u->base.eth_tp_mdix;
+ ecmd.eth_tp_mdix_ctrl = u->base.eth_tp_mdix_ctrl;
+
+ ifr->ifr_data = (void *) &ecmd;
+
+ r = ioctl(fd, SIOCETHTOOL, ifr);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+/* If autonegotiation is disabled, the speed and duplex represent the fixed link
+ * mode and are writable if the driver supports multiple link modes. If it is
+ * enabled then they are read-only. If the link is up they represent the negotiated
+ * link mode; if the link is down, the speed is 0, %SPEED_UNKNOWN or the highest
+ * enabled speed and @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode.
+ */
+int ethtool_set_glinksettings(
+ int *fd,
+ const char *ifname,
+ int autonegotiation,
+ const uint32_t advertise[static N_ADVERTISE],
+ uint64_t speed,
+ Duplex duplex,
+ NetDevPort port) {
+
+ _cleanup_free_ struct ethtool_link_usettings *u = NULL;
+ struct ifreq ifr = {};
+ int r;
+
+ assert(fd);
+ assert(ifname);
+ assert(advertise);
+
+ if (autonegotiation != AUTONEG_DISABLE && memeqzero(advertise, sizeof(uint32_t) * N_ADVERTISE)) {
+ log_info("ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable.");
+ return 0;
+ }
+
+ if (*fd < 0) {
+ r = ethtool_connect_or_warn(fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = get_glinksettings(*fd, &ifr, &u);
+ if (r < 0) {
+ r = get_gset(*fd, &ifr, &u);
+ if (r < 0)
+ return log_warning_errno(r, "ethtool: Cannot get device settings for %s : %m", ifname);
+ }
+
+ if (speed > 0)
+ u->base.speed = DIV_ROUND_UP(speed, 1000000);
+
+ if (duplex != _DUP_INVALID)
+ u->base.duplex = duplex;
+
+ if (port != _NET_DEV_PORT_INVALID)
+ u->base.port = port;
+
+ if (autonegotiation >= 0)
+ u->base.autoneg = autonegotiation;
+
+ if (!memeqzero(advertise, sizeof(uint32_t) * N_ADVERTISE)) {
+ u->base.autoneg = AUTONEG_ENABLE;
+ memcpy(&u->link_modes.advertising, advertise, sizeof(uint32_t) * N_ADVERTISE);
+ memzero((uint8_t*) &u->link_modes.advertising + sizeof(uint32_t) * N_ADVERTISE,
+ ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NBYTES - sizeof(uint32_t) * N_ADVERTISE);
+ }
+
+ if (u->base.cmd == ETHTOOL_GLINKSETTINGS)
+ r = set_slinksettings(*fd, &ifr, u);
+ else
+ r = set_sset(*fd, &ifr, u);
+ if (r < 0)
+ return log_warning_errno(r, "ethtool: Cannot set device settings for %s: %m", ifname);
+
+ return r;
+}
+
+int ethtool_set_channels(int *fd, const char *ifname, const netdev_channels *channels) {
+ struct ethtool_channels ecmd = {
+ .cmd = ETHTOOL_GCHANNELS,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ bool need_update = false;
+ int r;
+
+ assert(fd);
+ assert(ifname);
+ assert(channels);
+
+ if (*fd < 0) {
+ r = ethtool_connect_or_warn(fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (channels->rx_count_set && ecmd.rx_count != channels->rx_count) {
+ ecmd.rx_count = channels->rx_count;
+ need_update = true;
+ }
+
+ if (channels->tx_count_set && ecmd.tx_count != channels->tx_count) {
+ ecmd.tx_count = channels->tx_count;
+ need_update = true;
+ }
+
+ if (channels->other_count_set && ecmd.other_count != channels->other_count) {
+ ecmd.other_count = channels->other_count;
+ need_update = true;
+ }
+
+ if (channels->combined_count_set && ecmd.combined_count != channels->combined_count) {
+ ecmd.combined_count = channels->combined_count;
+ need_update = true;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SCHANNELS;
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int ethtool_set_flow_control(int *fd, const char *ifname, int rx, int tx, int autoneg) {
+ struct ethtool_pauseparam ecmd = {
+ .cmd = ETHTOOL_GPAUSEPARAM,
+ };
+ struct ifreq ifr = {
+ .ifr_data = (void*) &ecmd,
+ };
+ bool need_update = false;
+ int r;
+
+ assert(fd);
+ assert(ifname);
+
+ if (*fd < 0) {
+ r = ethtool_connect_or_warn(fd, true);
+ if (r < 0)
+ return r;
+ }
+
+ strscpy(ifr.ifr_name, IFNAMSIZ, ifname);
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+
+ if (rx >= 0 && ecmd.rx_pause != (uint32_t) rx) {
+ ecmd.rx_pause = rx;
+ need_update = true;
+ }
+
+ if (tx >= 0 && ecmd.tx_pause != (uint32_t) tx) {
+ ecmd.tx_pause = tx;
+ need_update = true;
+ }
+
+ if (autoneg >= 0 && ecmd.autoneg != (uint32_t) autoneg) {
+ ecmd.autoneg = autoneg;
+ need_update = true;
+ }
+
+ if (need_update) {
+ ecmd.cmd = ETHTOOL_SPAUSEPARAM;
+
+ r = ioctl(*fd, SIOCETHTOOL, &ifr);
+ if (r < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int config_parse_channel(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ netdev_channels *channels = data;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse channel value for %s=, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+ if (k < 1) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid %s= value, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "RxChannels")) {
+ channels->rx_count = k;
+ channels->rx_count_set = true;
+ } else if (streq(lvalue, "TxChannels")) {
+ channels->tx_count = k;
+ channels->tx_count_set = true;
+ } else if (streq(lvalue, "OtherChannels")) {
+ channels->other_count = k;
+ channels->other_count_set = true;
+ } else if (streq(lvalue, "CombinedChannels")) {
+ channels->combined_count = k;
+ channels->combined_count_set = true;
+ }
+
+ return 0;
+}
+
+int config_parse_advertise(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ uint32_t *advertise = data;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (isempty(rvalue)) {
+ /* Empty string resets the value. */
+ memzero(advertise, sizeof(uint32_t) * N_ADVERTISE);
+ return 0;
+ }
+
+ for (p = rvalue;;) {
+ _cleanup_free_ char *w = NULL;
+ enum ethtool_link_mode_bit_indices mode;
+
+ r = extract_first_word(&p, &w, NULL, 0);
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to split advertise modes '%s', ignoring assignment: %m", rvalue);
+ return 0;
+ }
+ if (r == 0)
+ return 0;
+
+ mode = ethtool_link_mode_bit_from_string(w);
+ /* We reuse the kernel provided enum which does not contain negative value. So, the cast
+ * below is mandatory. Otherwise, the check below always passes and access an invalid address. */
+ if ((int) mode < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Failed to parse advertise mode, ignoring: %s", w);
+ continue;
+ }
+
+ advertise[mode / 32] |= 1UL << (mode % 32);
+ }
+}
+
+int config_parse_nic_buffer_size(const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ netdev_ring_param *ring = data;
+ uint32_t k;
+ int r;
+
+ assert(filename);
+ assert(section);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = safe_atou32(rvalue, &k);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse interface buffer value, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (k < 1) {
+ log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Invalid %s= value, ignoring: %s", lvalue, rvalue);
+ return 0;
+ }
+
+ if (streq(lvalue, "RxBufferSize")) {
+ ring->rx_pending = k;
+ ring->rx_pending_set = true;
+ } else if (streq(lvalue, "RxMiniBufferSize")) {
+ ring->rx_mini_pending = k;
+ ring->rx_mini_pending_set = true;
+ } else if (streq(lvalue, "RxJumboBufferSize")) {
+ ring->rx_jumbo_pending = k;
+ ring->rx_jumbo_pending_set = true;
+ } else if (streq(lvalue, "TxBufferSize")) {
+ ring->tx_pending = k;
+ ring->tx_pending_set = true;
+ }
+
+ return 0;
+}
diff --git a/src/shared/ethtool-util.h b/src/shared/ethtool-util.h
new file mode 100644
index 0000000..f94b3e1
--- /dev/null
+++ b/src/shared/ethtool-util.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <macro.h>
+#include <net/ethernet.h>
+#include <linux/ethtool.h>
+
+#include "conf-parser.h"
+
+#define N_ADVERTISE 3
+
+/* we can't use DUPLEX_ prefix, as it
+ * clashes with <linux/ethtool.h> */
+typedef enum Duplex {
+ DUP_HALF = DUPLEX_HALF,
+ DUP_FULL = DUPLEX_FULL,
+ _DUP_MAX,
+ _DUP_INVALID = -1
+} Duplex;
+
+typedef enum WakeOnLan {
+ WOL_PHY,
+ WOL_UCAST,
+ WOL_MCAST,
+ WOL_BCAST,
+ WOL_ARP,
+ WOL_MAGIC,
+ WOL_MAGICSECURE,
+ WOL_OFF,
+ _WOL_MAX,
+ _WOL_INVALID = -1
+} WakeOnLan;
+
+typedef enum NetDevFeature {
+ NET_DEV_FEAT_RX,
+ NET_DEV_FEAT_TX,
+ NET_DEV_FEAT_GSO,
+ NET_DEV_FEAT_GRO,
+ NET_DEV_FEAT_LRO,
+ NET_DEV_FEAT_TSO,
+ NET_DEV_FEAT_TSO6,
+ _NET_DEV_FEAT_MAX,
+ _NET_DEV_FEAT_INVALID = -1
+} NetDevFeature;
+
+typedef enum NetDevPort {
+ NET_DEV_PORT_TP = PORT_TP,
+ NET_DEV_PORT_AUI = PORT_AUI,
+ NET_DEV_PORT_MII = PORT_MII,
+ NET_DEV_PORT_FIBRE = PORT_FIBRE,
+ NET_DEV_PORT_BNC = PORT_BNC,
+ NET_DEV_PORT_DA = PORT_DA,
+ NET_DEV_PORT_NONE = PORT_NONE,
+ NET_DEV_PORT_OTHER = PORT_OTHER,
+ _NET_DEV_PORT_MAX,
+ _NET_DEV_PORT_INVALID = -1
+} NetDevPort;
+
+#define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32 (SCHAR_MAX)
+#define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NBYTES (4 * ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32)
+
+/* layout of the struct passed from/to userland */
+struct ethtool_link_usettings {
+ struct ethtool_link_settings base;
+
+ struct {
+ uint32_t supported[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ uint32_t advertising[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ uint32_t lp_advertising[ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
+ } link_modes;
+};
+
+typedef struct netdev_channels {
+ uint32_t rx_count;
+ uint32_t tx_count;
+ uint32_t other_count;
+ uint32_t combined_count;
+
+ bool rx_count_set;
+ bool tx_count_set;
+ bool other_count_set;
+ bool combined_count_set;
+} netdev_channels;
+
+typedef struct netdev_ring_param {
+ uint32_t rx_pending;
+ uint32_t rx_mini_pending;
+ uint32_t rx_jumbo_pending;
+ uint32_t tx_pending;
+
+ bool rx_pending_set;
+ bool rx_mini_pending_set;
+ bool rx_jumbo_pending_set;
+ bool tx_pending_set;
+} netdev_ring_param;
+
+int ethtool_get_driver(int *ethtool_fd, const char *ifname, char **ret);
+int ethtool_get_link_info(int *ethtool_fd, const char *ifname,
+ int *ret_autonegotiation, uint64_t *ret_speed,
+ Duplex *ret_duplex, NetDevPort *ret_port);
+int ethtool_get_permanent_macaddr(int *ethtool_fd, const char *ifname, struct ether_addr *ret);
+int ethtool_set_speed(int *ethtool_fd, const char *ifname, unsigned speed, Duplex duplex);
+int ethtool_set_wol(int *ethtool_fd, const char *ifname, WakeOnLan wol);
+int ethtool_set_nic_buffer_size(int *ethtool_fd, const char *ifname, const netdev_ring_param *ring);
+int ethtool_set_features(int *ethtool_fd, const char *ifname, const int *features);
+int ethtool_set_glinksettings(int *ethtool_fd, const char *ifname,
+ int autonegotiation, const uint32_t advertise[static N_ADVERTISE],
+ uint64_t speed, Duplex duplex, NetDevPort port);
+int ethtool_set_channels(int *ethtool_fd, const char *ifname, const netdev_channels *channels);
+int ethtool_set_flow_control(int *fd, const char *ifname, int rx, int tx, int autoneg);
+
+const char *duplex_to_string(Duplex d) _const_;
+Duplex duplex_from_string(const char *d) _pure_;
+
+const char *wol_to_string(WakeOnLan wol) _const_;
+WakeOnLan wol_from_string(const char *wol) _pure_;
+
+const char *port_to_string(NetDevPort port) _const_;
+NetDevPort port_from_string(const char *port) _pure_;
+
+const char *ethtool_link_mode_bit_to_string(enum ethtool_link_mode_bit_indices val) _const_;
+enum ethtool_link_mode_bit_indices ethtool_link_mode_bit_from_string(const char *str) _pure_;
+
+CONFIG_PARSER_PROTOTYPE(config_parse_duplex);
+CONFIG_PARSER_PROTOTYPE(config_parse_wol);
+CONFIG_PARSER_PROTOTYPE(config_parse_port);
+CONFIG_PARSER_PROTOTYPE(config_parse_channel);
+CONFIG_PARSER_PROTOTYPE(config_parse_advertise);
+CONFIG_PARSER_PROTOTYPE(config_parse_nic_buffer_size);
diff --git a/src/shared/exec-util.c b/src/shared/exec-util.c
new file mode 100644
index 0000000..61ee3b1
--- /dev/null
+++ b/src/shared/exec-util.c
@@ -0,0 +1,446 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <dirent.h>
+#include <errno.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "exec-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
+#include "macro.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "serialize.h"
+#include "set.h"
+#include "signal-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "tmpfile-util.h"
+#include "util.h"
+
+/* Put this test here for a lack of better place */
+assert_cc(EAGAIN == EWOULDBLOCK);
+
+static int do_spawn(const char *path, char *argv[], int stdout_fd, pid_t *pid) {
+
+ pid_t _pid;
+ int r;
+
+ if (null_or_empty_path(path)) {
+ log_debug("%s is empty (a mask).", path);
+ return 0;
+ }
+
+ r = safe_fork("(direxec)", FORK_DEATHSIG|FORK_LOG, &_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char *_argv[2];
+
+ if (stdout_fd >= 0) {
+ r = rearrange_stdio(STDIN_FILENO, stdout_fd, STDERR_FILENO);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+ }
+
+ (void) rlimit_nofile_safe();
+
+ if (!argv) {
+ _argv[0] = (char*) path;
+ _argv[1] = NULL;
+ argv = _argv;
+ } else
+ argv[0] = (char*) path;
+
+ execv(path, argv);
+ log_error_errno(errno, "Failed to execute %s: %m", path);
+ _exit(EXIT_FAILURE);
+ }
+
+ *pid = _pid;
+ return 1;
+}
+
+static int do_execute(
+ char **directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ int output_fd,
+ char *argv[],
+ char *envp[],
+ ExecDirFlags flags) {
+
+ _cleanup_hashmap_free_free_ Hashmap *pids = NULL;
+ _cleanup_strv_free_ char **paths = NULL;
+ char **path, **e;
+ int r;
+ bool parallel_execution;
+
+ /* We fork this all off from a child process so that we can somewhat cleanly make
+ * use of SIGALRM to set a time limit.
+ *
+ * We attempt to perform parallel execution if configured by the user, however
+ * if `callbacks` is nonnull, execution must be serial.
+ */
+ parallel_execution = FLAGS_SET(flags, EXEC_DIR_PARALLEL) && !callbacks;
+
+ r = conf_files_list_strv(&paths, NULL, NULL, CONF_FILES_EXECUTABLE|CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, (const char* const*) directories);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enumerate executables: %m");
+
+ if (parallel_execution) {
+ pids = hashmap_new(NULL);
+ if (!pids)
+ return log_oom();
+ }
+
+ /* Abort execution of this process after the timeout. We simply rely on SIGALRM as
+ * default action terminating the process, and turn on alarm(). */
+
+ if (timeout != USEC_INFINITY)
+ alarm(DIV_ROUND_UP(timeout, USEC_PER_SEC));
+
+ STRV_FOREACH(e, envp)
+ if (putenv(*e) != 0)
+ return log_error_errno(errno, "Failed to set environment variable: %m");
+
+ STRV_FOREACH(path, paths) {
+ _cleanup_free_ char *t = NULL;
+ _cleanup_close_ int fd = -1;
+ pid_t pid;
+
+ t = strdup(*path);
+ if (!t)
+ return log_oom();
+
+ if (callbacks) {
+ fd = open_serialization_fd(basename(*path));
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open serialization file: %m");
+ }
+
+ r = do_spawn(t, argv, fd, &pid);
+ if (r <= 0)
+ continue;
+
+ if (parallel_execution) {
+ r = hashmap_put(pids, PID_TO_PTR(pid), t);
+ if (r < 0)
+ return log_oom();
+ t = NULL;
+ } else {
+ r = wait_for_terminate_and_check(t, pid, WAIT_LOG);
+ if (FLAGS_SET(flags, EXEC_DIR_IGNORE_ERRORS)) {
+ if (r < 0)
+ continue;
+ } else if (r > 0)
+ return r;
+
+ if (callbacks) {
+ if (lseek(fd, 0, SEEK_SET) < 0)
+ return log_error_errno(errno, "Failed to seek on serialization fd: %m");
+
+ r = callbacks[STDOUT_GENERATE](fd, callback_args[STDOUT_GENERATE]);
+ fd = -1;
+ if (r < 0)
+ return log_error_errno(r, "Failed to process output from %s: %m", *path);
+ }
+ }
+ }
+
+ if (callbacks) {
+ r = callbacks[STDOUT_COLLECT](output_fd, callback_args[STDOUT_COLLECT]);
+ if (r < 0)
+ return log_error_errno(r, "Callback two failed: %m");
+ }
+
+ while (!hashmap_isempty(pids)) {
+ _cleanup_free_ char *t = NULL;
+ pid_t pid;
+
+ pid = PTR_TO_PID(hashmap_first_key(pids));
+ assert(pid > 0);
+
+ t = hashmap_remove(pids, PID_TO_PTR(pid));
+ assert(t);
+
+ r = wait_for_terminate_and_check(t, pid, WAIT_LOG);
+ if (!FLAGS_SET(flags, EXEC_DIR_IGNORE_ERRORS) && r > 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int execute_directories(
+ const char* const* directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ char *argv[],
+ char *envp[],
+ ExecDirFlags flags) {
+
+ char **dirs = (char**) directories;
+ _cleanup_close_ int fd = -1;
+ char *name;
+ int r;
+ pid_t executor_pid;
+
+ assert(!strv_isempty(dirs));
+
+ name = basename(dirs[0]);
+ assert(!isempty(name));
+
+ if (callbacks) {
+ assert(callback_args);
+ assert(callbacks[STDOUT_GENERATE]);
+ assert(callbacks[STDOUT_COLLECT]);
+ assert(callbacks[STDOUT_CONSUME]);
+
+ fd = open_serialization_fd(name);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to open serialization file: %m");
+ }
+
+ /* Executes all binaries in the directories serially or in parallel and waits for
+ * them to finish. Optionally a timeout is applied. If a file with the same name
+ * exists in more than one directory, the earliest one wins. */
+
+ r = safe_fork("(sd-executor)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG, &executor_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ r = do_execute(dirs, timeout, callbacks, callback_args, fd, argv, envp, flags);
+ _exit(r < 0 ? EXIT_FAILURE : r);
+ }
+
+ r = wait_for_terminate_and_check("(sd-executor)", executor_pid, 0);
+ if (r < 0)
+ return r;
+ if (!FLAGS_SET(flags, EXEC_DIR_IGNORE_ERRORS) && r > 0)
+ return r;
+
+ if (!callbacks)
+ return 0;
+
+ if (lseek(fd, 0, SEEK_SET) < 0)
+ return log_error_errno(errno, "Failed to rewind serialization fd: %m");
+
+ r = callbacks[STDOUT_CONSUME](fd, callback_args[STDOUT_CONSUME]);
+ fd = -1;
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse returned data: %m");
+ return 0;
+}
+
+static int gather_environment_generate(int fd, void *arg) {
+ char ***env = arg, **x, **y;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_strv_free_ char **new = NULL;
+ int r;
+
+ /* Read a series of VAR=value assignments from fd, use them to update the list of
+ * variables in env. Also update the exported environment.
+ *
+ * fd is always consumed, even on error.
+ */
+
+ assert(env);
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ r = load_env_file_pairs(f, NULL, &new);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(x, y, new) {
+ char *p;
+
+ if (!env_name_is_valid(*x)) {
+ log_warning("Invalid variable assignment \"%s=...\", ignoring.", *x);
+ continue;
+ }
+
+ p = strjoin(*x, "=", *y);
+ if (!p)
+ return -ENOMEM;
+
+ r = strv_env_replace(env, p);
+ if (r < 0)
+ return r;
+
+ if (setenv(*x, *y, true) < 0)
+ return -errno;
+ }
+
+ return r;
+}
+
+static int gather_environment_collect(int fd, void *arg) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char ***env = arg;
+ int r;
+
+ /* Write out a series of env=cescape(VAR=value) assignments to fd. */
+
+ assert(env);
+
+ f = fdopen(fd, "w");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ r = serialize_strv(f, "env", *env);
+ if (r < 0)
+ return r;
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int gather_environment_consume(int fd, void *arg) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char ***env = arg;
+ int r = 0;
+
+ /* Read a series of env=cescape(VAR=value) assignments from fd into env. */
+
+ assert(env);
+
+ f = fdopen(fd, "r");
+ if (!f) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ const char *v;
+ int k;
+
+ k = read_line(f, LONG_LINE_MAX, &line);
+ if (k < 0)
+ return k;
+ if (k == 0)
+ break;
+
+ v = startswith(line, "env=");
+ if (!v) {
+ log_debug("Serialization line \"%s\" unexpectedly didn't start with \"env=\".", line);
+ if (r == 0)
+ r = -EINVAL;
+
+ continue;
+ }
+
+ k = deserialize_environment(v, env);
+ if (k < 0) {
+ log_debug_errno(k, "Invalid serialization line \"%s\": %m", line);
+
+ if (r == 0)
+ r = k;
+ }
+ }
+
+ return r;
+}
+
+int exec_command_flags_from_strv(char **ex_opts, ExecCommandFlags *flags) {
+ ExecCommandFlags ex_flag, ret_flags = 0;
+ char **opt;
+
+ assert(flags);
+
+ STRV_FOREACH(opt, ex_opts) {
+ ex_flag = exec_command_flags_from_string(*opt);
+ if (ex_flag >= 0)
+ ret_flags |= ex_flag;
+ else
+ return -EINVAL;
+ }
+
+ *flags = ret_flags;
+
+ return 0;
+}
+
+int exec_command_flags_to_strv(ExecCommandFlags flags, char ***ex_opts) {
+ _cleanup_strv_free_ char **ret_opts = NULL;
+ ExecCommandFlags it = flags;
+ const char *str;
+ int i, r;
+
+ assert(ex_opts);
+
+ for (i = 0; it != 0; it &= ~(1 << i), i++) {
+ if (FLAGS_SET(flags, (1 << i))) {
+ str = exec_command_flags_to_string(1 << i);
+ if (!str)
+ return -EINVAL;
+
+ r = strv_extend(&ret_opts, str);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *ex_opts = TAKE_PTR(ret_opts);
+
+ return 0;
+}
+
+const gather_stdout_callback_t gather_environment[] = {
+ gather_environment_generate,
+ gather_environment_collect,
+ gather_environment_consume,
+};
+
+static const char* const exec_command_strings[] = {
+ "ignore-failure", /* EXEC_COMMAND_IGNORE_FAILURE */
+ "privileged", /* EXEC_COMMAND_FULLY_PRIVILEGED */
+ "no-setuid", /* EXEC_COMMAND_NO_SETUID */
+ "ambient", /* EXEC_COMMAND_AMBIENT_MAGIC */
+ "no-env-expand", /* EXEC_COMMAND_NO_ENV_EXPAND */
+};
+
+const char* exec_command_flags_to_string(ExecCommandFlags i) {
+ size_t idx;
+
+ for (idx = 0; idx < ELEMENTSOF(exec_command_strings); idx++)
+ if (i == (1 << idx))
+ return exec_command_strings[idx];
+
+ return NULL;
+}
+
+ExecCommandFlags exec_command_flags_from_string(const char *s) {
+ ssize_t idx;
+
+ idx = string_table_lookup(exec_command_strings, ELEMENTSOF(exec_command_strings), s);
+
+ if (idx < 0)
+ return _EXEC_COMMAND_FLAGS_INVALID;
+ else
+ return 1 << idx;
+}
diff --git a/src/shared/exec-util.h b/src/shared/exec-util.h
new file mode 100644
index 0000000..a69d57c
--- /dev/null
+++ b/src/shared/exec-util.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+
+typedef int (*gather_stdout_callback_t) (int fd, void *arg);
+
+enum {
+ STDOUT_GENERATE, /* from generators to helper process */
+ STDOUT_COLLECT, /* from helper process to main process */
+ STDOUT_CONSUME, /* process data in main process */
+ _STDOUT_CONSUME_MAX,
+};
+
+typedef enum {
+ EXEC_DIR_NONE = 0, /* No execdir flags */
+ EXEC_DIR_PARALLEL = 1 << 0, /* Execute scripts in parallel, if possible */
+ EXEC_DIR_IGNORE_ERRORS = 1 << 1, /* Ignore non-zero exit status of scripts */
+} ExecDirFlags;
+
+typedef enum ExecCommandFlags {
+ EXEC_COMMAND_IGNORE_FAILURE = 1 << 0,
+ EXEC_COMMAND_FULLY_PRIVILEGED = 1 << 1,
+ EXEC_COMMAND_NO_SETUID = 1 << 2,
+ EXEC_COMMAND_AMBIENT_MAGIC = 1 << 3,
+ EXEC_COMMAND_NO_ENV_EXPAND = 1 << 4,
+ _EXEC_COMMAND_FLAGS_INVALID = -1,
+} ExecCommandFlags;
+
+int execute_directories(
+ const char* const* directories,
+ usec_t timeout,
+ gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX],
+ void* const callback_args[_STDOUT_CONSUME_MAX],
+ char *argv[],
+ char *envp[],
+ ExecDirFlags flags);
+
+int exec_command_flags_from_strv(char **ex_opts, ExecCommandFlags *flags);
+int exec_command_flags_to_strv(ExecCommandFlags flags, char ***ex_opts);
+
+extern const gather_stdout_callback_t gather_environment[_STDOUT_CONSUME_MAX];
+
+const char* exec_command_flags_to_string(ExecCommandFlags i);
+ExecCommandFlags exec_command_flags_from_string(const char *s);
diff --git a/src/shared/exit-status.c b/src/shared/exit-status.c
new file mode 100644
index 0000000..b71dd7a
--- /dev/null
+++ b/src/shared/exit-status.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <signal.h>
+#include <stdlib.h>
+#include <sysexits.h>
+
+#include "exit-status.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "set.h"
+#include "string-util.h"
+
+const ExitStatusMapping exit_status_mappings[256] = {
+ /* Exit status ranges:
+ *
+ * 0…1 │ ISO C, EXIT_SUCCESS + EXIT_FAILURE
+ * 2…7 │ LSB exit codes for init scripts
+ * 8…63 │ (Currently unmapped)
+ * 64…78 │ BSD defined exit codes
+ * 79…199 │ (Currently unmapped)
+ * 200…242 │ systemd's private error codes (might be extended to 254 in future development)
+ * 243…254 │ (Currently unmapped, but see above)
+ *
+ * 255 │ EXIT_EXCEPTION (We use this to propagate exit-by-signal events. It's frequently used by others apps (like bash)
+ * │ to indicate exit reason that cannot really be expressed in a single exit status value — such as a propagated
+ * │ signal or such, and we follow that logic here.)
+ */
+
+ [EXIT_SUCCESS] = { "SUCCESS", EXIT_STATUS_LIBC },
+ [EXIT_FAILURE] = { "FAILURE", EXIT_STATUS_LIBC },
+
+ [EXIT_CHDIR] = { "CHDIR", EXIT_STATUS_SYSTEMD },
+ [EXIT_NICE] = { "NICE", EXIT_STATUS_SYSTEMD },
+ [EXIT_FDS] = { "FDS", EXIT_STATUS_SYSTEMD },
+ [EXIT_EXEC] = { "EXEC", EXIT_STATUS_SYSTEMD },
+ [EXIT_MEMORY] = { "MEMORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_LIMITS] = { "LIMITS", EXIT_STATUS_SYSTEMD },
+ [EXIT_OOM_ADJUST] = { "OOM_ADJUST", EXIT_STATUS_SYSTEMD },
+ [EXIT_SIGNAL_MASK] = { "SIGNAL_MASK", EXIT_STATUS_SYSTEMD },
+ [EXIT_STDIN] = { "STDIN", EXIT_STATUS_SYSTEMD },
+ [EXIT_STDOUT] = { "STDOUT", EXIT_STATUS_SYSTEMD },
+ [EXIT_CHROOT] = { "CHROOT", EXIT_STATUS_SYSTEMD },
+ [EXIT_IOPRIO] = { "IOPRIO", EXIT_STATUS_SYSTEMD },
+ [EXIT_TIMERSLACK] = { "TIMERSLACK", EXIT_STATUS_SYSTEMD },
+ [EXIT_SECUREBITS] = { "SECUREBITS", EXIT_STATUS_SYSTEMD },
+ [EXIT_SETSCHEDULER] = { "SETSCHEDULER", EXIT_STATUS_SYSTEMD },
+ [EXIT_CPUAFFINITY] = { "CPUAFFINITY", EXIT_STATUS_SYSTEMD },
+ [EXIT_GROUP] = { "GROUP", EXIT_STATUS_SYSTEMD },
+ [EXIT_USER] = { "USER", EXIT_STATUS_SYSTEMD },
+ [EXIT_CAPABILITIES] = { "CAPABILITIES", EXIT_STATUS_SYSTEMD },
+ [EXIT_CGROUP] = { "CGROUP", EXIT_STATUS_SYSTEMD },
+ [EXIT_SETSID] = { "SETSID", EXIT_STATUS_SYSTEMD },
+ [EXIT_CONFIRM] = { "CONFIRM", EXIT_STATUS_SYSTEMD },
+ [EXIT_STDERR] = { "STDERR", EXIT_STATUS_SYSTEMD },
+ [EXIT_PAM] = { "PAM", EXIT_STATUS_SYSTEMD },
+ [EXIT_NETWORK] = { "NETWORK", EXIT_STATUS_SYSTEMD },
+ [EXIT_NAMESPACE] = { "NAMESPACE", EXIT_STATUS_SYSTEMD },
+ [EXIT_NO_NEW_PRIVILEGES] = { "NO_NEW_PRIVILEGES", EXIT_STATUS_SYSTEMD },
+ [EXIT_SECCOMP] = { "SECCOMP", EXIT_STATUS_SYSTEMD },
+ [EXIT_SELINUX_CONTEXT] = { "SELINUX_CONTEXT", EXIT_STATUS_SYSTEMD },
+ [EXIT_PERSONALITY] = { "PERSONALITY", EXIT_STATUS_SYSTEMD },
+ [EXIT_APPARMOR_PROFILE] = { "APPARMOR", EXIT_STATUS_SYSTEMD },
+ [EXIT_ADDRESS_FAMILIES] = { "ADDRESS_FAMILIES", EXIT_STATUS_SYSTEMD },
+ [EXIT_RUNTIME_DIRECTORY] = { "RUNTIME_DIRECTORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_CHOWN] = { "CHOWN", EXIT_STATUS_SYSTEMD },
+ [EXIT_SMACK_PROCESS_LABEL] = { "SMACK_PROCESS_LABEL", EXIT_STATUS_SYSTEMD },
+ [EXIT_KEYRING] = { "KEYRING", EXIT_STATUS_SYSTEMD },
+ [EXIT_STATE_DIRECTORY] = { "STATE_DIRECTORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_CACHE_DIRECTORY] = { "CACHE_DIRECTORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_LOGS_DIRECTORY] = { "LOGS_DIRECTORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_CONFIGURATION_DIRECTORY] = { "CONFIGURATION_DIRECTORY", EXIT_STATUS_SYSTEMD },
+ [EXIT_NUMA_POLICY] = { "NUMA_POLICY", EXIT_STATUS_SYSTEMD },
+ [EXIT_CREDENTIALS] = { "CREDENTIALS", EXIT_STATUS_SYSTEMD },
+
+ [EXIT_EXCEPTION] = { "EXCEPTION", EXIT_STATUS_SYSTEMD },
+
+ [EXIT_INVALIDARGUMENT] = { "INVALIDARGUMENT", EXIT_STATUS_LSB },
+ [EXIT_NOTIMPLEMENTED] = { "NOTIMPLEMENTED", EXIT_STATUS_LSB },
+ [EXIT_NOPERMISSION] = { "NOPERMISSION", EXIT_STATUS_LSB },
+ [EXIT_NOTINSTALLED] = { "NOTINSTALLED", EXIT_STATUS_LSB },
+ [EXIT_NOTCONFIGURED] = { "NOTCONFIGURED", EXIT_STATUS_LSB },
+ [EXIT_NOTRUNNING] = { "NOTRUNNING", EXIT_STATUS_LSB },
+
+ [EX_USAGE] = { "USAGE", EXIT_STATUS_BSD },
+ [EX_DATAERR] = { "DATAERR", EXIT_STATUS_BSD },
+ [EX_NOINPUT] = { "NOINPUT", EXIT_STATUS_BSD },
+ [EX_NOUSER] = { "NOUSER", EXIT_STATUS_BSD },
+ [EX_NOHOST] = { "NOHOST", EXIT_STATUS_BSD },
+ [EX_UNAVAILABLE] = { "UNAVAILABLE", EXIT_STATUS_BSD },
+ [EX_SOFTWARE] = { "SOFTWARE", EXIT_STATUS_BSD },
+ [EX_OSERR] = { "OSERR", EXIT_STATUS_BSD },
+ [EX_OSFILE] = { "OSFILE", EXIT_STATUS_BSD },
+ [EX_CANTCREAT] = { "CANTCREAT", EXIT_STATUS_BSD },
+ [EX_IOERR] = { "IOERR", EXIT_STATUS_BSD },
+ [EX_TEMPFAIL] = { "TEMPFAIL", EXIT_STATUS_BSD },
+ [EX_PROTOCOL] = { "PROTOCOL", EXIT_STATUS_BSD },
+ [EX_NOPERM] = { "NOPERM", EXIT_STATUS_BSD },
+ [EX_CONFIG] = { "CONFIG", EXIT_STATUS_BSD },
+};
+
+const char* exit_status_to_string(int code, ExitStatusClass class) {
+ if (code < 0 || (size_t) code >= ELEMENTSOF(exit_status_mappings))
+ return NULL;
+ return class & exit_status_mappings[code].class ? exit_status_mappings[code].name : NULL;
+}
+
+const char* exit_status_class(int code) {
+ if (code < 0 || (size_t) code >= ELEMENTSOF(exit_status_mappings))
+ return NULL;
+
+ switch (exit_status_mappings[code].class) {
+ case EXIT_STATUS_LIBC:
+ return "libc";
+ case EXIT_STATUS_SYSTEMD:
+ return "systemd";
+ case EXIT_STATUS_LSB:
+ return "LSB";
+ case EXIT_STATUS_BSD:
+ return "BSD";
+ default: return NULL;
+ }
+}
+
+int exit_status_from_string(const char *s) {
+ uint8_t val;
+ int r;
+
+ for (size_t i = 0; i < ELEMENTSOF(exit_status_mappings); i++)
+ if (streq_ptr(s, exit_status_mappings[i].name))
+ return i;
+
+ r = safe_atou8(s, &val);
+ if (r < 0)
+ return r;
+
+ return val;
+}
+
+bool is_clean_exit(int code, int status, ExitClean clean, const ExitStatusSet *success_status) {
+ if (code == CLD_EXITED)
+ return status == 0 ||
+ (success_status &&
+ bitmap_isset(&success_status->status, status));
+
+ /* If a daemon does not implement handlers for some of the signals, we do not consider this an
+ unclean shutdown */
+ if (code == CLD_KILLED)
+ return
+ (clean == EXIT_CLEAN_DAEMON && IN_SET(status, SIGHUP, SIGINT, SIGTERM, SIGPIPE)) ||
+ (success_status &&
+ bitmap_isset(&success_status->signal, status));
+
+ return false;
+}
+
+void exit_status_set_free(ExitStatusSet *x) {
+ assert(x);
+
+ bitmap_clear(&x->status);
+ bitmap_clear(&x->signal);
+}
+
+bool exit_status_set_is_empty(const ExitStatusSet *x) {
+ if (!x)
+ return true;
+
+ return bitmap_isclear(&x->status) && bitmap_isclear(&x->signal);
+}
+
+bool exit_status_set_test(const ExitStatusSet *x, int code, int status) {
+ if (code == CLD_EXITED && bitmap_isset(&x->status, status))
+ return true;
+
+ if (IN_SET(code, CLD_KILLED, CLD_DUMPED) && bitmap_isset(&x->signal, status))
+ return true;
+
+ return false;
+}
diff --git a/src/shared/exit-status.h b/src/shared/exit-status.h
new file mode 100644
index 0000000..05707bf
--- /dev/null
+++ b/src/shared/exit-status.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "bitmap.h"
+#include "hashmap.h"
+#include "macro.h"
+
+/* This defines pretty names for the LSB 'start' verb exit codes. Note that they shouldn't be confused with
+ * the LSB 'status' verb exit codes which are defined very differently. For details see:
+ *
+ * https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html
+ */
+
+enum {
+ /* EXIT_SUCCESS defined by libc */
+ /* EXIT_FAILURE defined by libc */
+ EXIT_INVALIDARGUMENT = 2,
+ EXIT_NOTIMPLEMENTED = 3,
+ EXIT_NOPERMISSION = 4,
+ EXIT_NOTINSTALLED = 5,
+ EXIT_NOTCONFIGURED = 6,
+ EXIT_NOTRUNNING = 7,
+
+ /* BSD's sysexits.h defines a couple EX_xyz exit codes in the range 64 … 78 */
+
+ /* The LSB suggests that error codes >= 200 are "reserved". We use them here under the assumption
+ * that they hence are unused by init scripts. */
+ EXIT_CHDIR = 200,
+ EXIT_NICE,
+ EXIT_FDS,
+ EXIT_EXEC,
+ EXIT_MEMORY,
+ EXIT_LIMITS,
+ EXIT_OOM_ADJUST,
+ EXIT_SIGNAL_MASK,
+ EXIT_STDIN,
+ EXIT_STDOUT,
+ EXIT_CHROOT, /* 210 */
+ EXIT_IOPRIO,
+ EXIT_TIMERSLACK,
+ EXIT_SECUREBITS,
+ EXIT_SETSCHEDULER,
+ EXIT_CPUAFFINITY,
+ EXIT_GROUP,
+ EXIT_USER,
+ EXIT_CAPABILITIES,
+ EXIT_CGROUP,
+ EXIT_SETSID, /* 220 */
+ EXIT_CONFIRM,
+ EXIT_STDERR,
+ _EXIT_RESERVED, /* used to be tcpwrap, don't reuse! */
+ EXIT_PAM,
+ EXIT_NETWORK,
+ EXIT_NAMESPACE,
+ EXIT_NO_NEW_PRIVILEGES,
+ EXIT_SECCOMP,
+ EXIT_SELINUX_CONTEXT,
+ EXIT_PERSONALITY, /* 230 */
+ EXIT_APPARMOR_PROFILE,
+ EXIT_ADDRESS_FAMILIES,
+ EXIT_RUNTIME_DIRECTORY,
+ _EXIT_RESERVED2, /* used to be used by kdbus, don't reuse */
+ EXIT_CHOWN,
+ EXIT_SMACK_PROCESS_LABEL,
+ EXIT_KEYRING,
+ EXIT_STATE_DIRECTORY,
+ EXIT_CACHE_DIRECTORY,
+ EXIT_LOGS_DIRECTORY, /* 240 */
+ EXIT_CONFIGURATION_DIRECTORY,
+ EXIT_NUMA_POLICY,
+ EXIT_CREDENTIALS,
+
+ EXIT_EXCEPTION = 255, /* Whenever we want to propagate an abnormal/signal exit, in line with bash */
+};
+
+typedef enum ExitStatusClass {
+ EXIT_STATUS_LIBC = 1 << 0, /* libc EXIT_STATUS/EXIT_FAILURE */
+ EXIT_STATUS_SYSTEMD = 1 << 1, /* systemd's own exit codes */
+ EXIT_STATUS_LSB = 1 << 2, /* LSB exit codes */
+ EXIT_STATUS_BSD = 1 << 3, /* BSD (EX_xyz) exit codes */
+ EXIT_STATUS_FULL = EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD | EXIT_STATUS_LSB | EXIT_STATUS_BSD,
+} ExitStatusClass;
+
+typedef struct ExitStatusSet {
+ Bitmap status;
+ Bitmap signal;
+} ExitStatusSet;
+
+const char* exit_status_to_string(int code, ExitStatusClass class) _const_;
+const char* exit_status_class(int code) _const_;
+int exit_status_from_string(const char *s) _pure_;
+
+typedef struct ExitStatusMapping {
+ const char *name;
+ ExitStatusClass class;
+} ExitStatusMapping;
+
+extern const ExitStatusMapping exit_status_mappings[256];
+
+typedef enum ExitClean {
+ EXIT_CLEAN_DAEMON,
+ EXIT_CLEAN_COMMAND,
+} ExitClean;
+
+bool is_clean_exit(int code, int status, ExitClean clean, const ExitStatusSet *success_status);
+
+void exit_status_set_free(ExitStatusSet *x);
+bool exit_status_set_is_empty(const ExitStatusSet *x);
+bool exit_status_set_test(const ExitStatusSet *x, int code, int status);
diff --git a/src/shared/fdset.c b/src/shared/fdset.c
new file mode 100644
index 0000000..679e4aa
--- /dev/null
+++ b/src/shared/fdset.c
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+
+#include "sd-daemon.h"
+
+#include "alloc-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fdset.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+
+#define MAKE_SET(s) ((Set*) s)
+#define MAKE_FDSET(s) ((FDSet*) s)
+
+FDSet *fdset_new(void) {
+ return MAKE_FDSET(set_new(NULL));
+}
+
+int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds) {
+ size_t i;
+ FDSet *s;
+ int r;
+
+ assert(ret);
+
+ s = fdset_new();
+ if (!s)
+ return -ENOMEM;
+
+ for (i = 0; i < n_fds; i++) {
+
+ r = fdset_put(s, fds[i]);
+ if (r < 0) {
+ set_free(MAKE_SET(s));
+ return r;
+ }
+ }
+
+ *ret = s;
+ return 0;
+}
+
+void fdset_close(FDSet *s) {
+ void *p;
+
+ while ((p = set_steal_first(MAKE_SET(s)))) {
+ /* Valgrind's fd might have ended up in this set here, due to fdset_new_fill(). We'll ignore
+ * all failures here, so that the EBADFD that valgrind will return us on close() doesn't
+ * influence us */
+
+ /* When reloading duplicates of the private bus connection fds and suchlike are closed here,
+ * which has no effect at all, since they are only duplicates. So don't be surprised about
+ * these log messages. */
+
+ log_debug("Closing set fd %i", PTR_TO_FD(p));
+ (void) close_nointr(PTR_TO_FD(p));
+ }
+}
+
+FDSet* fdset_free(FDSet *s) {
+ fdset_close(s);
+ set_free(MAKE_SET(s));
+ return NULL;
+}
+
+int fdset_put(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return set_put(MAKE_SET(s), FD_TO_PTR(fd));
+}
+
+int fdset_put_dup(FDSet *s, int fd) {
+ int copy, r;
+
+ assert(s);
+ assert(fd >= 0);
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ r = fdset_put(s, copy);
+ if (r < 0) {
+ safe_close(copy);
+ return r;
+ }
+
+ return copy;
+}
+
+bool fdset_contains(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return !!set_get(MAKE_SET(s), FD_TO_PTR(fd));
+}
+
+int fdset_remove(FDSet *s, int fd) {
+ assert(s);
+ assert(fd >= 0);
+
+ return set_remove(MAKE_SET(s), FD_TO_PTR(fd)) ? fd : -ENOENT;
+}
+
+int fdset_new_fill(FDSet **_s) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+ FDSet *s;
+
+ assert(_s);
+
+ /* Creates an fdset and fills in all currently open file
+ * descriptors. */
+
+ d = opendir("/proc/self/fd");
+ if (!d)
+ return -errno;
+
+ s = fdset_new();
+ if (!s) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ int fd = -1;
+
+ r = safe_atoi(de->d_name, &fd);
+ if (r < 0)
+ goto finish;
+
+ if (fd < 3)
+ continue;
+
+ if (fd == dirfd(d))
+ continue;
+
+ r = fdset_put(s, fd);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = 0;
+ *_s = TAKE_PTR(s);
+
+finish:
+ /* We won't close the fds here! */
+ if (s)
+ set_free(MAKE_SET(s));
+
+ return r;
+}
+
+int fdset_cloexec(FDSet *fds, bool b) {
+ void *p;
+ int r;
+
+ assert(fds);
+
+ SET_FOREACH(p, MAKE_SET(fds)) {
+ r = fd_cloexec(PTR_TO_FD(p), b);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int fdset_new_listen_fds(FDSet **_s, bool unset) {
+ int n, fd, r;
+ FDSet *s;
+
+ assert(_s);
+
+ /* Creates an fdset and fills in all passed file descriptors */
+
+ s = fdset_new();
+ if (!s) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ n = sd_listen_fds(unset);
+ for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) {
+ r = fdset_put(s, fd);
+ if (r < 0)
+ goto fail;
+ }
+
+ *_s = s;
+ return 0;
+
+fail:
+ if (s)
+ set_free(MAKE_SET(s));
+
+ return r;
+}
+
+int fdset_close_others(FDSet *fds) {
+ void *e;
+ int *a = NULL;
+ size_t j = 0, m;
+
+ m = fdset_size(fds);
+
+ if (m > 0) {
+ a = newa(int, m);
+ SET_FOREACH(e, MAKE_SET(fds))
+ a[j++] = PTR_TO_FD(e);
+ }
+
+ assert(j == m);
+
+ return close_all_fds(a, j);
+}
+
+unsigned fdset_size(FDSet *fds) {
+ return set_size(MAKE_SET(fds));
+}
+
+bool fdset_isempty(FDSet *fds) {
+ return set_isempty(MAKE_SET(fds));
+}
+
+int fdset_iterate(FDSet *s, Iterator *i) {
+ void *p;
+
+ if (!set_iterate(MAKE_SET(s), i, &p))
+ return -ENOENT;
+
+ return PTR_TO_FD(p);
+}
+
+int fdset_steal_first(FDSet *fds) {
+ void *p;
+
+ p = set_steal_first(MAKE_SET(fds));
+ if (!p)
+ return -ENOENT;
+
+ return PTR_TO_FD(p);
+}
diff --git a/src/shared/fdset.h b/src/shared/fdset.h
new file mode 100644
index 0000000..39d15ee
--- /dev/null
+++ b/src/shared/fdset.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+#include "set.h"
+
+typedef struct FDSet FDSet;
+
+FDSet* fdset_new(void);
+FDSet* fdset_free(FDSet *s);
+
+int fdset_put(FDSet *s, int fd);
+int fdset_put_dup(FDSet *s, int fd);
+
+bool fdset_contains(FDSet *s, int fd);
+int fdset_remove(FDSet *s, int fd);
+
+int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds);
+int fdset_new_fill(FDSet **ret);
+int fdset_new_listen_fds(FDSet **ret, bool unset);
+
+int fdset_cloexec(FDSet *fds, bool b);
+
+int fdset_close_others(FDSet *fds);
+
+unsigned fdset_size(FDSet *fds);
+bool fdset_isempty(FDSet *fds);
+
+int fdset_iterate(FDSet *s, Iterator *i);
+
+int fdset_steal_first(FDSet *fds);
+
+void fdset_close(FDSet *fds);
+
+#define _FDSET_FOREACH(fd, fds, i) \
+ for (Iterator i = ITERATOR_FIRST; ((fd) = fdset_iterate((fds), &i)) >= 0; )
+#define FDSET_FOREACH(fd, fds) \
+ _FDSET_FOREACH(fd, fds, UNIQ_T(i, UNIQ))
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FDSet*, fdset_free);
+#define _cleanup_fdset_free_ _cleanup_(fdset_freep)
diff --git a/src/shared/fileio-label.c b/src/shared/fileio-label.c
new file mode 100644
index 0000000..d03b054
--- /dev/null
+++ b/src/shared/fileio-label.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+
+#include "fileio-label.h"
+#include "fileio.h"
+#include "selinux-util.h"
+
+int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts) {
+ int r;
+
+ r = mac_selinux_create_file_prepare(fn, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = write_string_file_ts(fn, line, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC, ts);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
+
+int create_shutdown_run_nologin_or_warn(void) {
+ int r;
+
+ /* This is used twice: once in systemd-user-sessions.service, in order to block logins when we actually go
+ * down, and once in systemd-logind.service when shutdowns are scheduled, and logins are to be turned off a bit
+ * in advance. We use the same wording of the message in both cases. */
+
+ r = write_string_file_atomic_label("/run/nologin",
+ "System is going down. Unprivileged users are not permitted to log in anymore. "
+ "For technical details, see pam_nologin(8).");
+ if (r < 0)
+ return log_error_errno(r, "Failed to create /run/nologin: %m");
+
+ return 0;
+}
diff --git a/src/shared/fileio-label.h b/src/shared/fileio-label.h
new file mode 100644
index 0000000..03b4a16
--- /dev/null
+++ b/src/shared/fileio-label.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to
+ * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not
+ * for all */
+
+int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts);
+static inline int write_string_file_atomic_label(const char *fn, const char *line) {
+ return write_string_file_atomic_label_ts(fn, line, NULL);
+}
+
+int create_shutdown_run_nologin_or_warn(void);
diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c
new file mode 100644
index 0000000..007d2cb
--- /dev/null
+++ b/src/shared/firewall-util.c
@@ -0,0 +1,350 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* Temporary work-around for broken glibc vs. linux kernel header definitions
+ * This is already fixed upstream, remove this when distributions have updated.
+ */
+#define _NET_IF_H 1
+
+#include <arpa/inet.h>
+#include <endian.h>
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <net/if.h>
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+#include <linux/if.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/nf_nat.h>
+#include <linux/netfilter/xt_addrtype.h>
+#include <libiptc/libiptc.h>
+
+#include "alloc-util.h"
+#include "firewall-util.h"
+#include "in-addr-util.h"
+#include "macro.h"
+#include "socket-util.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct xtc_handle*, iptc_free);
+
+static int entry_fill_basics(
+ struct ipt_entry *entry,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+
+ assert(entry);
+
+ if (out_interface && !ifname_valid(out_interface))
+ return -EINVAL;
+ if (in_interface && !ifname_valid(in_interface))
+ return -EINVAL;
+
+ entry->ip.proto = protocol;
+
+ if (in_interface) {
+ size_t l;
+
+ l = strlen(in_interface);
+ assert(l < sizeof entry->ip.iniface);
+ assert(l < sizeof entry->ip.iniface_mask);
+
+ strcpy(entry->ip.iniface, in_interface);
+ memset(entry->ip.iniface_mask, 0xFF, l + 1);
+ }
+ if (source) {
+ entry->ip.src = source->in;
+ in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
+ }
+
+ if (out_interface) {
+ size_t l = strlen(out_interface);
+ assert(l < sizeof entry->ip.outiface);
+ assert(l < sizeof entry->ip.outiface_mask);
+
+ strcpy(entry->ip.outiface, out_interface);
+ memset(entry->ip.outiface_mask, 0xFF, l + 1);
+ }
+ if (destination) {
+ entry->ip.dst = destination->in;
+ in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
+ }
+
+ return 0;
+}
+
+int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+
+ static const xt_chainlabel chain = "POSTROUTING";
+ _cleanup_(iptc_freep) struct xtc_handle *h = NULL;
+ struct ipt_entry *entry, *mask;
+ struct ipt_entry_target *t;
+ size_t sz;
+ struct nf_nat_ipv4_multi_range_compat *mr;
+ int r;
+
+ if (af != AF_INET)
+ return -EOPNOTSUPP;
+
+ if (!IN_SET(protocol, 0, IPPROTO_TCP, IPPROTO_UDP))
+ return -EOPNOTSUPP;
+
+ h = iptc_init("nat");
+ if (!h)
+ return -errno;
+
+ sz = XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+
+ /* Put together the entry we want to add or remove */
+ entry = alloca0(sz);
+ entry->next_offset = sz;
+ entry->target_offset = XT_ALIGN(sizeof(struct ipt_entry));
+ r = entry_fill_basics(entry, protocol, NULL, source, source_prefixlen, out_interface, destination, destination_prefixlen);
+ if (r < 0)
+ return r;
+
+ /* Fill in target part */
+ t = ipt_get_target(entry);
+ t->u.target_size =
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+ strncpy(t->u.user.name, "MASQUERADE", sizeof(t->u.user.name));
+ mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
+ mr->rangesize = 1;
+
+ /* Create a search mask entry */
+ mask = alloca(sz);
+ memset(mask, 0xFF, sz);
+
+ if (add) {
+ if (iptc_check_entry(chain, entry, (unsigned char*) mask, h))
+ return 0;
+ if (errno != ENOENT) /* if other error than not existing yet, fail */
+ return -errno;
+
+ if (!iptc_insert_entry(chain, entry, 0, h))
+ return -errno;
+ } else {
+ if (!iptc_delete_entry(chain, entry, (unsigned char*) mask, h)) {
+ if (errno == ENOENT) /* if it's already gone, all is good! */
+ return 0;
+
+ return -errno;
+ }
+ }
+
+ if (!iptc_commit(h))
+ return -errno;
+
+ return 0;
+}
+
+int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote) {
+
+ static const xt_chainlabel chain_pre = "PREROUTING", chain_output = "OUTPUT";
+ _cleanup_(iptc_freep) struct xtc_handle *h = NULL;
+ struct ipt_entry *entry, *mask;
+ struct ipt_entry_target *t;
+ struct ipt_entry_match *m;
+ struct xt_addrtype_info_v1 *at;
+ struct nf_nat_ipv4_multi_range_compat *mr;
+ size_t sz, msz;
+ int r;
+
+ assert(add || !previous_remote);
+
+ if (af != AF_INET)
+ return -EOPNOTSUPP;
+
+ if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP))
+ return -EOPNOTSUPP;
+
+ if (local_port <= 0)
+ return -EINVAL;
+
+ if (remote_port <= 0)
+ return -EINVAL;
+
+ h = iptc_init("nat");
+ if (!h)
+ return -errno;
+
+ sz = XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+
+ if (protocol == IPPROTO_TCP)
+ msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_tcp));
+ else
+ msz = XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_udp));
+
+ sz += msz;
+
+ /* Fill in basic part */
+ entry = alloca0(sz);
+ entry->next_offset = sz;
+ entry->target_offset =
+ XT_ALIGN(sizeof(struct ipt_entry)) +
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) +
+ msz;
+ r = entry_fill_basics(entry, protocol, in_interface, source, source_prefixlen, NULL, destination, destination_prefixlen);
+ if (r < 0)
+ return r;
+
+ /* Fill in first match */
+ m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)));
+ m->u.match_size = msz;
+ if (protocol == IPPROTO_TCP) {
+ struct xt_tcp *tcp;
+
+ strncpy(m->u.user.name, "tcp", sizeof(m->u.user.name));
+ tcp = (struct xt_tcp*) m->data;
+ tcp->dpts[0] = tcp->dpts[1] = local_port;
+ tcp->spts[0] = 0;
+ tcp->spts[1] = 0xFFFF;
+
+ } else {
+ struct xt_udp *udp;
+
+ strncpy(m->u.user.name, "udp", sizeof(m->u.user.name));
+ udp = (struct xt_udp*) m->data;
+ udp->dpts[0] = udp->dpts[1] = local_port;
+ udp->spts[0] = 0;
+ udp->spts[1] = 0xFFFF;
+ }
+
+ /* Fill in second match */
+ m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)) + msz);
+ m->u.match_size =
+ XT_ALIGN(sizeof(struct ipt_entry_match)) +
+ XT_ALIGN(sizeof(struct xt_addrtype_info_v1));
+ strncpy(m->u.user.name, "addrtype", sizeof(m->u.user.name));
+ m->u.user.revision = 1;
+ at = (struct xt_addrtype_info_v1*) m->data;
+ at->dest = XT_ADDRTYPE_LOCAL;
+
+ /* Fill in target part */
+ t = ipt_get_target(entry);
+ t->u.target_size =
+ XT_ALIGN(sizeof(struct ipt_entry_target)) +
+ XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat));
+ strncpy(t->u.user.name, "DNAT", sizeof(t->u.user.name));
+ mr = (struct nf_nat_ipv4_multi_range_compat*) t->data;
+ mr->rangesize = 1;
+ mr->range[0].flags = NF_NAT_RANGE_PROTO_SPECIFIED|NF_NAT_RANGE_MAP_IPS;
+ mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
+ if (protocol == IPPROTO_TCP)
+ mr->range[0].min.tcp.port = mr->range[0].max.tcp.port = htobe16(remote_port);
+ else
+ mr->range[0].min.udp.port = mr->range[0].max.udp.port = htobe16(remote_port);
+
+ mask = alloca0(sz);
+ memset(mask, 0xFF, sz);
+
+ if (add) {
+ /* Add the PREROUTING rule, if it is missing so far */
+ if (!iptc_check_entry(chain_pre, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -EINVAL;
+
+ if (!iptc_insert_entry(chain_pre, entry, 0, h))
+ return -errno;
+ }
+
+ /* If a previous remote is set, remove its entry */
+ if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
+ mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
+
+ if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr;
+ }
+
+ /* Add the OUTPUT rule, if it is missing so far */
+ if (!in_interface) {
+
+ /* Don't apply onto loopback addresses */
+ if (!destination) {
+ entry->ip.dst.s_addr = htobe32(0x7F000000);
+ entry->ip.dmsk.s_addr = htobe32(0xFF000000);
+ entry->ip.invflags = IPT_INV_DSTIP;
+ }
+
+ if (!iptc_check_entry(chain_output, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+
+ if (!iptc_insert_entry(chain_output, entry, 0, h))
+ return -errno;
+ }
+
+ /* If a previous remote is set, remove its entry */
+ if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) {
+ mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr;
+
+ if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+ }
+ }
+ } else {
+ if (!iptc_delete_entry(chain_pre, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+
+ if (!in_interface) {
+ if (!destination) {
+ entry->ip.dst.s_addr = htobe32(0x7F000000);
+ entry->ip.dmsk.s_addr = htobe32(0xFF000000);
+ entry->ip.invflags = IPT_INV_DSTIP;
+ }
+
+ if (!iptc_delete_entry(chain_output, entry, (unsigned char*) mask, h)) {
+ if (errno != ENOENT)
+ return -errno;
+ }
+ }
+ }
+
+ if (!iptc_commit(h))
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/firewall-util.h b/src/shared/firewall-util.h
new file mode 100644
index 0000000..0a51a3c
--- /dev/null
+++ b/src/shared/firewall-util.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "in-addr-util.h"
+
+#if HAVE_LIBIPTC
+
+int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen);
+
+int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote);
+
+#else
+
+static inline int fw_add_masquerade(
+ bool add,
+ int af,
+ int protocol,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const char *out_interface,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen) {
+ return -EOPNOTSUPP;
+}
+
+static inline int fw_add_local_dnat(
+ bool add,
+ int af,
+ int protocol,
+ const char *in_interface,
+ const union in_addr_union *source,
+ unsigned source_prefixlen,
+ const union in_addr_union *destination,
+ unsigned destination_prefixlen,
+ uint16_t local_port,
+ const union in_addr_union *remote,
+ uint16_t remote_port,
+ const union in_addr_union *previous_remote) {
+ return -EOPNOTSUPP;
+}
+
+#endif
diff --git a/src/shared/format-table.c b/src/shared/format-table.c
new file mode 100644
index 0000000..a13a198
--- /dev/null
+++ b/src/shared/format-table.c
@@ -0,0 +1,2549 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <ctype.h>
+#include <net/if.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "format-table.h"
+#include "format-util.h"
+#include "gunicode.h"
+#include "id128-util.h"
+#include "in-addr-util.h"
+#include "locale-util.h"
+#include "memory-util.h"
+#include "pager.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "sort-util.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+
+#define DEFAULT_WEIGHT 100
+
+/*
+ A few notes on implementation details:
+
+ - TableCell is a 'fake' structure, it's just used as data type to pass references to specific cell positions in the
+ table. It can be easily converted to an index number and back.
+
+ - TableData is where the actual data is stored: it encapsulates the data and formatting for a specific cell. It's
+ 'pseudo-immutable' and ref-counted. When a cell's data's formatting is to be changed, we duplicate the object if the
+ ref-counting is larger than 1. Note that TableData and its ref-counting is mostly not visible to the outside. The
+ outside only sees Table and TableCell.
+
+ - The Table object stores a simple one-dimensional array of references to TableData objects, one row after the
+ previous one.
+
+ - There's no special concept of a "row" or "column" in the table, and no special concept of the "header" row. It's all
+ derived from the cell index: we know how many cells are to be stored in a row, and can determine the rest from
+ that. The first row is always the header row. If header display is turned off we simply skip outputting the first
+ row. Also, when sorting rows we always leave the first row where it is, as the header shouldn't move.
+
+ - Note because there's no row and no column object some properties that might be appropriate as row/column properties
+ are exposed as cell properties instead. For example, the "weight" of a column (which is used to determine where to
+ add/remove space preferable when expanding/compressing tables horizontally) is actually made the "weight" of a
+ cell. Given that we usually need it per-column though we will calculate the average across every cell of the column
+ instead.
+
+ - To make things easy, when cells are added without any explicit configured formatting, then we'll copy the formatting
+ from the same cell in the previous cell. This is particularly useful for the "weight" of the cell (see above), as
+ this means setting the weight of the cells of the header row will nicely propagate to all cells in the other rows.
+*/
+
+typedef struct TableData {
+ unsigned n_ref;
+ TableDataType type;
+
+ size_t minimum_width; /* minimum width for the column */
+ size_t maximum_width; /* maximum width for the column */
+ size_t formatted_for_width; /* the width we tried to format for */
+ unsigned weight; /* the horizontal weight for this column, in case the table is expanded/compressed */
+ unsigned ellipsize_percent; /* 0 … 100, where to place the ellipsis when compression is needed */
+ unsigned align_percent; /* 0 … 100, where to pad with spaces when expanding is needed. 0: left-aligned, 100: right-aligned */
+
+ bool uppercase; /* Uppercase string on display */
+
+ const char *color; /* ANSI color string to use for this cell. When written to terminal should not move cursor. Will automatically be reset after the cell */
+ const char *rgap_color; /* The ANSI color to use for the gap right of this cell. Usually used to underline entire rows in a gapless fashion */
+ char *url; /* A URL to use for a clickable hyperlink */
+ char *formatted; /* A cached textual representation of the cell data, before ellipsation/alignment */
+
+ union {
+ uint8_t data[0]; /* data is generic array */
+ bool boolean;
+ usec_t timestamp;
+ usec_t timespan;
+ uint64_t size;
+ char string[0];
+ char **strv;
+ int int_val;
+ int8_t int8;
+ int16_t int16;
+ int32_t int32;
+ int64_t int64;
+ unsigned uint_val;
+ uint8_t uint8;
+ uint16_t uint16;
+ uint32_t uint32;
+ uint64_t uint64;
+ int percent; /* we use 'int' as datatype for percent values in order to match the result of parse_percent() */
+ int ifindex;
+ union in_addr_union address;
+ sd_id128_t id128;
+ /* … add more here as we start supporting more cell data types … */
+ };
+} TableData;
+
+static size_t TABLE_CELL_TO_INDEX(TableCell *cell) {
+ size_t i;
+
+ assert(cell);
+
+ i = PTR_TO_SIZE(cell);
+ assert(i > 0);
+
+ return i-1;
+}
+
+static TableCell* TABLE_INDEX_TO_CELL(size_t index) {
+ assert(index != (size_t) -1);
+ return SIZE_TO_PTR(index + 1);
+}
+
+struct Table {
+ size_t n_columns;
+ size_t n_cells;
+
+ bool header; /* Whether to show the header row? */
+ size_t width; /* If == 0 format this as wide as necessary. If (size_t) -1 format this to console
+ * width or less wide, but not wider. Otherwise the width to format this table in. */
+ size_t cell_height_max; /* Maximum number of lines per cell. (If there are more, ellipsis is shown. If (size_t) -1 then no limit is set, the default. == 0 is not allowed.) */
+
+ TableData **data;
+ size_t n_allocated;
+
+ size_t *display_map; /* List of columns to show (by their index). It's fine if columns are listed multiple times or not at all */
+ size_t n_display_map;
+
+ size_t *sort_map; /* The columns to order rows by, in order of preference. */
+ size_t n_sort_map;
+
+ bool *reverse_map;
+
+ char *empty_string;
+};
+
+Table *table_new_raw(size_t n_columns) {
+ _cleanup_(table_unrefp) Table *t = NULL;
+
+ assert(n_columns > 0);
+
+ t = new(Table, 1);
+ if (!t)
+ return NULL;
+
+ *t = (struct Table) {
+ .n_columns = n_columns,
+ .header = true,
+ .width = (size_t) -1,
+ .cell_height_max = (size_t) -1,
+ };
+
+ return TAKE_PTR(t);
+}
+
+Table *table_new_internal(const char *first_header, ...) {
+ _cleanup_(table_unrefp) Table *t = NULL;
+ size_t n_columns = 1;
+ va_list ap;
+ int r;
+
+ assert(first_header);
+
+ va_start(ap, first_header);
+ for (;;) {
+ if (!va_arg(ap, const char*))
+ break;
+
+ n_columns++;
+ }
+ va_end(ap);
+
+ t = table_new_raw(n_columns);
+ if (!t)
+ return NULL;
+
+ va_start(ap, first_header);
+ for (const char *h = first_header; h; h = va_arg(ap, const char*)) {
+ TableCell *cell;
+
+ r = table_add_cell(t, &cell, TABLE_STRING, h);
+ if (r < 0) {
+ va_end(ap);
+ return NULL;
+ }
+
+ /* Make the table header uppercase */
+ r = table_set_uppercase(t, cell, true);
+ if (r < 0) {
+ va_end(ap);
+ return NULL;
+ }
+ }
+ va_end(ap);
+
+ assert(t->n_columns == t->n_cells);
+ return TAKE_PTR(t);
+}
+
+static TableData *table_data_free(TableData *d) {
+ assert(d);
+
+ free(d->formatted);
+ free(d->url);
+
+ if (IN_SET(d->type, TABLE_STRV, TABLE_STRV_WRAPPED))
+ strv_free(d->strv);
+
+ return mfree(d);
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(TableData, table_data, table_data_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(TableData*, table_data_unref);
+
+Table *table_unref(Table *t) {
+ if (!t)
+ return NULL;
+
+ for (size_t i = 0; i < t->n_cells; i++)
+ table_data_unref(t->data[i]);
+
+ free(t->data);
+ free(t->display_map);
+ free(t->sort_map);
+ free(t->reverse_map);
+ free(t->empty_string);
+
+ return mfree(t);
+}
+
+static size_t table_data_size(TableDataType type, const void *data) {
+
+ switch (type) {
+
+ case TABLE_EMPTY:
+ return 0;
+
+ case TABLE_STRING:
+ case TABLE_PATH:
+ return strlen(data) + 1;
+
+ case TABLE_STRV:
+ case TABLE_STRV_WRAPPED:
+ return sizeof(char **);
+
+ case TABLE_BOOLEAN:
+ return sizeof(bool);
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESTAMP_UTC:
+ case TABLE_TIMESTAMP_RELATIVE:
+ case TABLE_TIMESPAN:
+ case TABLE_TIMESPAN_MSEC:
+ return sizeof(usec_t);
+
+ case TABLE_SIZE:
+ case TABLE_INT64:
+ case TABLE_UINT64:
+ case TABLE_BPS:
+ return sizeof(uint64_t);
+
+ case TABLE_INT32:
+ case TABLE_UINT32:
+ return sizeof(uint32_t);
+
+ case TABLE_INT16:
+ case TABLE_UINT16:
+ return sizeof(uint16_t);
+
+ case TABLE_INT8:
+ case TABLE_UINT8:
+ return sizeof(uint8_t);
+
+ case TABLE_INT:
+ case TABLE_UINT:
+ case TABLE_PERCENT:
+ case TABLE_IFINDEX:
+ return sizeof(int);
+
+ case TABLE_IN_ADDR:
+ return sizeof(struct in_addr);
+
+ case TABLE_IN6_ADDR:
+ return sizeof(struct in6_addr);
+
+ case TABLE_UUID:
+ case TABLE_ID128:
+ return sizeof(sd_id128_t);
+
+ default:
+ assert_not_reached("Uh? Unexpected cell type");
+ }
+}
+
+static bool table_data_matches(
+ TableData *d,
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ size_t k, l;
+ assert(d);
+
+ if (d->type != type)
+ return false;
+
+ if (d->minimum_width != minimum_width)
+ return false;
+
+ if (d->maximum_width != maximum_width)
+ return false;
+
+ if (d->weight != weight)
+ return false;
+
+ if (d->align_percent != align_percent)
+ return false;
+
+ if (d->ellipsize_percent != ellipsize_percent)
+ return false;
+
+ /* If a color/url/uppercase flag is set, refuse to merge */
+ if (d->color || d->rgap_color)
+ return false;
+ if (d->url)
+ return false;
+ if (d->uppercase)
+ return false;
+
+ k = table_data_size(type, data);
+ l = table_data_size(d->type, d->data);
+ if (k != l)
+ return false;
+
+ return memcmp_safe(data, d->data, l) == 0;
+}
+
+static TableData *table_data_new(
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ _cleanup_free_ TableData *d = NULL;
+ size_t data_size;
+
+ data_size = table_data_size(type, data);
+
+ d = malloc0(offsetof(TableData, data) + data_size);
+ if (!d)
+ return NULL;
+
+ d->n_ref = 1;
+ d->type = type;
+ d->minimum_width = minimum_width;
+ d->maximum_width = maximum_width;
+ d->weight = weight;
+ d->align_percent = align_percent;
+ d->ellipsize_percent = ellipsize_percent;
+
+ if (IN_SET(type, TABLE_STRV, TABLE_STRV_WRAPPED)) {
+ d->strv = strv_copy(data);
+ if (!d->strv)
+ return NULL;
+ } else
+ memcpy_safe(d->data, data, data_size);
+
+ return TAKE_PTR(d);
+}
+
+int table_add_cell_full(
+ Table *t,
+ TableCell **ret_cell,
+ TableDataType type,
+ const void *data,
+ size_t minimum_width,
+ size_t maximum_width,
+ unsigned weight,
+ unsigned align_percent,
+ unsigned ellipsize_percent) {
+
+ _cleanup_(table_data_unrefp) TableData *d = NULL;
+ TableData *p;
+
+ assert(t);
+ assert(type >= 0);
+ assert(type < _TABLE_DATA_TYPE_MAX);
+
+ /* Special rule: patch NULL data fields to the empty field */
+ if (!data)
+ type = TABLE_EMPTY;
+
+ /* Determine the cell adjacent to the current one, but one row up */
+ if (t->n_cells >= t->n_columns)
+ assert_se(p = t->data[t->n_cells - t->n_columns]);
+ else
+ p = NULL;
+
+ /* If formatting parameters are left unspecified, copy from the previous row */
+ if (minimum_width == (size_t) -1)
+ minimum_width = p ? p->minimum_width : 1;
+
+ if (weight == (unsigned) -1)
+ weight = p ? p->weight : DEFAULT_WEIGHT;
+
+ if (align_percent == (unsigned) -1)
+ align_percent = p ? p->align_percent : 0;
+
+ if (ellipsize_percent == (unsigned) -1)
+ ellipsize_percent = p ? p->ellipsize_percent : 100;
+
+ assert(align_percent <= 100);
+ assert(ellipsize_percent <= 100);
+
+ /* Small optimization: Pretty often adjacent cells in two subsequent lines have the same data and
+ * formatting. Let's see if we can reuse the cell data and ref it once more. */
+
+ if (p && table_data_matches(p, type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent))
+ d = table_data_ref(p);
+ else {
+ d = table_data_new(type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent);
+ if (!d)
+ return -ENOMEM;
+ }
+
+ if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns)))
+ return -ENOMEM;
+
+ if (ret_cell)
+ *ret_cell = TABLE_INDEX_TO_CELL(t->n_cells);
+
+ t->data[t->n_cells++] = TAKE_PTR(d);
+
+ return 0;
+}
+
+int table_add_cell_stringf(Table *t, TableCell **ret_cell, const char *format, ...) {
+ _cleanup_free_ char *buffer = NULL;
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = vasprintf(&buffer, format, ap);
+ va_end(ap);
+ if (r < 0)
+ return -ENOMEM;
+
+ return table_add_cell(t, ret_cell, TABLE_STRING, buffer);
+}
+
+int table_fill_empty(Table *t, size_t until_column) {
+ int r;
+
+ assert(t);
+
+ /* Fill the rest of the current line with empty cells until we reach the specified column. Will add
+ * at least one cell. Pass 0 in order to fill a line to the end or insert an empty line. */
+
+ if (until_column >= t->n_columns)
+ return -EINVAL;
+
+ do {
+ r = table_add_cell(t, NULL, TABLE_EMPTY, NULL);
+ if (r < 0)
+ return r;
+
+ } while ((t->n_cells % t->n_columns) != until_column);
+
+ return 0;
+}
+
+int table_dup_cell(Table *t, TableCell *cell) {
+ size_t i;
+
+ assert(t);
+
+ /* Add the data of the specified cell a second time as a new cell to the end. */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns)))
+ return -ENOMEM;
+
+ t->data[t->n_cells++] = table_data_ref(t->data[i]);
+ return 0;
+}
+
+static int table_dedup_cell(Table *t, TableCell *cell) {
+ _cleanup_free_ char *curl = NULL;
+ TableData *nd, *od;
+ size_t i;
+
+ assert(t);
+
+ /* Helper call that ensures the specified cell's data object has a ref count of 1, which we can use before
+ * changing a cell's formatting without effecting every other cell's formatting that shares the same data */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ assert_se(od = t->data[i]);
+ if (od->n_ref == 1)
+ return 0;
+
+ assert(od->n_ref > 1);
+
+ if (od->url) {
+ curl = strdup(od->url);
+ if (!curl)
+ return -ENOMEM;
+ }
+
+ nd = table_data_new(
+ od->type,
+ od->data,
+ od->minimum_width,
+ od->maximum_width,
+ od->weight,
+ od->align_percent,
+ od->ellipsize_percent);
+ if (!nd)
+ return -ENOMEM;
+
+ nd->color = od->color;
+ nd->rgap_color = od->rgap_color;
+ nd->url = TAKE_PTR(curl);
+ nd->uppercase = od->uppercase;
+
+ table_data_unref(od);
+ t->data[i] = nd;
+
+ assert(nd->n_ref == 1);
+
+ return 1;
+}
+
+static TableData *table_get_data(Table *t, TableCell *cell) {
+ size_t i;
+
+ assert(t);
+ assert(cell);
+
+ /* Get the data object of the specified cell, or NULL if it doesn't exist */
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return NULL;
+
+ assert(t->data[i]);
+ assert(t->data[i]->n_ref > 0);
+
+ return t->data[i];
+}
+
+int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (minimum_width == (size_t) -1)
+ minimum_width = 1;
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->minimum_width = minimum_width;
+ return 0;
+}
+
+int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->maximum_width = maximum_width;
+ return 0;
+}
+
+int table_set_weight(Table *t, TableCell *cell, unsigned weight) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (weight == (unsigned) -1)
+ weight = DEFAULT_WEIGHT;
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->weight = weight;
+ return 0;
+}
+
+int table_set_align_percent(Table *t, TableCell *cell, unsigned percent) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (percent == (unsigned) -1)
+ percent = 0;
+
+ assert(percent <= 100);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->align_percent = percent;
+ return 0;
+}
+
+int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (percent == (unsigned) -1)
+ percent = 100;
+
+ assert(percent <= 100);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->ellipsize_percent = percent;
+ return 0;
+}
+
+int table_set_color(Table *t, TableCell *cell, const char *color) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->color = empty_to_null(color);
+ return 0;
+}
+
+int table_set_rgap_color(Table *t, TableCell *cell, const char *color) {
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ table_get_data(t, cell)->rgap_color = empty_to_null(color);
+ return 0;
+}
+
+int table_set_url(Table *t, TableCell *cell, const char *url) {
+ _cleanup_free_ char *copy = NULL;
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ if (url) {
+ copy = strdup(url);
+ if (!copy)
+ return -ENOMEM;
+ }
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ return free_and_replace(table_get_data(t, cell)->url, copy);
+}
+
+int table_set_uppercase(Table *t, TableCell *cell, bool b) {
+ TableData *d;
+ int r;
+
+ assert(t);
+ assert(cell);
+
+ r = table_dedup_cell(t, cell);
+ if (r < 0)
+ return r;
+
+ assert_se(d = table_get_data(t, cell));
+
+ if (d->uppercase == b)
+ return 0;
+
+ d->formatted = mfree(d->formatted);
+ d->uppercase = b;
+ return 1;
+}
+
+int table_update(Table *t, TableCell *cell, TableDataType type, const void *data) {
+ _cleanup_free_ char *curl = NULL;
+ TableData *nd, *od;
+ size_t i;
+
+ assert(t);
+ assert(cell);
+
+ i = TABLE_CELL_TO_INDEX(cell);
+ if (i >= t->n_cells)
+ return -ENXIO;
+
+ assert_se(od = t->data[i]);
+
+ if (od->url) {
+ curl = strdup(od->url);
+ if (!curl)
+ return -ENOMEM;
+ }
+
+ nd = table_data_new(
+ type,
+ data,
+ od->minimum_width,
+ od->maximum_width,
+ od->weight,
+ od->align_percent,
+ od->ellipsize_percent);
+ if (!nd)
+ return -ENOMEM;
+
+ nd->color = od->color;
+ nd->rgap_color = od->rgap_color;
+ nd->url = TAKE_PTR(curl);
+ nd->uppercase = od->uppercase;
+
+ table_data_unref(od);
+ t->data[i] = nd;
+
+ return 0;
+}
+
+int table_add_many_internal(Table *t, TableDataType first_type, ...) {
+ TableDataType type;
+ va_list ap;
+ TableCell *last_cell = NULL;
+ int r;
+
+ assert(t);
+ assert(first_type >= 0);
+ assert(first_type < _TABLE_DATA_TYPE_MAX);
+
+ type = first_type;
+
+ va_start(ap, first_type);
+ for (;;) {
+ const void *data;
+ union {
+ uint64_t size;
+ usec_t usec;
+ int int_val;
+ int8_t int8;
+ int16_t int16;
+ int32_t int32;
+ int64_t int64;
+ unsigned uint_val;
+ uint8_t uint8;
+ uint16_t uint16;
+ uint32_t uint32;
+ uint64_t uint64;
+ int percent;
+ int ifindex;
+ bool b;
+ union in_addr_union address;
+ sd_id128_t id128;
+ } buffer;
+
+ switch (type) {
+
+ case TABLE_EMPTY:
+ data = NULL;
+ break;
+
+ case TABLE_STRING:
+ case TABLE_PATH:
+ data = va_arg(ap, const char *);
+ break;
+
+ case TABLE_STRV:
+ case TABLE_STRV_WRAPPED:
+ data = va_arg(ap, char * const *);
+ break;
+
+ case TABLE_BOOLEAN:
+ buffer.b = va_arg(ap, int);
+ data = &buffer.b;
+ break;
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESTAMP_UTC:
+ case TABLE_TIMESTAMP_RELATIVE:
+ case TABLE_TIMESPAN:
+ case TABLE_TIMESPAN_MSEC:
+ buffer.usec = va_arg(ap, usec_t);
+ data = &buffer.usec;
+ break;
+
+ case TABLE_SIZE:
+ case TABLE_BPS:
+ buffer.size = va_arg(ap, uint64_t);
+ data = &buffer.size;
+ break;
+
+ case TABLE_INT:
+ buffer.int_val = va_arg(ap, int);
+ data = &buffer.int_val;
+ break;
+
+ case TABLE_INT8: {
+ int x = va_arg(ap, int);
+ assert(x >= INT8_MIN && x <= INT8_MAX);
+
+ buffer.int8 = x;
+ data = &buffer.int8;
+ break;
+ }
+
+ case TABLE_INT16: {
+ int x = va_arg(ap, int);
+ assert(x >= INT16_MIN && x <= INT16_MAX);
+
+ buffer.int16 = x;
+ data = &buffer.int16;
+ break;
+ }
+
+ case TABLE_INT32:
+ buffer.int32 = va_arg(ap, int32_t);
+ data = &buffer.int32;
+ break;
+
+ case TABLE_INT64:
+ buffer.int64 = va_arg(ap, int64_t);
+ data = &buffer.int64;
+ break;
+
+ case TABLE_UINT:
+ buffer.uint_val = va_arg(ap, unsigned);
+ data = &buffer.uint_val;
+ break;
+
+ case TABLE_UINT8: {
+ unsigned x = va_arg(ap, unsigned);
+ assert(x <= UINT8_MAX);
+
+ buffer.uint8 = x;
+ data = &buffer.uint8;
+ break;
+ }
+
+ case TABLE_UINT16: {
+ unsigned x = va_arg(ap, unsigned);
+ assert(x <= UINT16_MAX);
+
+ buffer.uint16 = x;
+ data = &buffer.uint16;
+ break;
+ }
+
+ case TABLE_UINT32:
+ buffer.uint32 = va_arg(ap, uint32_t);
+ data = &buffer.uint32;
+ break;
+
+ case TABLE_UINT64:
+ buffer.uint64 = va_arg(ap, uint64_t);
+ data = &buffer.uint64;
+ break;
+
+ case TABLE_PERCENT:
+ buffer.percent = va_arg(ap, int);
+ data = &buffer.percent;
+ break;
+
+ case TABLE_IFINDEX:
+ buffer.ifindex = va_arg(ap, int);
+ data = &buffer.ifindex;
+ break;
+
+ case TABLE_IN_ADDR:
+ buffer.address = *va_arg(ap, union in_addr_union *);
+ data = &buffer.address.in;
+ break;
+
+ case TABLE_IN6_ADDR:
+ buffer.address = *va_arg(ap, union in_addr_union *);
+ data = &buffer.address.in6;
+ break;
+
+ case TABLE_UUID:
+ case TABLE_ID128:
+ buffer.id128 = va_arg(ap, sd_id128_t);
+ data = &buffer.id128;
+ break;
+
+ case TABLE_SET_MINIMUM_WIDTH: {
+ size_t w = va_arg(ap, size_t);
+
+ r = table_set_minimum_width(t, last_cell, w);
+ break;
+ }
+
+ case TABLE_SET_MAXIMUM_WIDTH: {
+ size_t w = va_arg(ap, size_t);
+ r = table_set_maximum_width(t, last_cell, w);
+ break;
+ }
+
+ case TABLE_SET_WEIGHT: {
+ unsigned w = va_arg(ap, unsigned);
+ r = table_set_weight(t, last_cell, w);
+ break;
+ }
+
+ case TABLE_SET_ALIGN_PERCENT: {
+ unsigned p = va_arg(ap, unsigned);
+ r = table_set_align_percent(t, last_cell, p);
+ break;
+ }
+
+ case TABLE_SET_ELLIPSIZE_PERCENT: {
+ unsigned p = va_arg(ap, unsigned);
+ r = table_set_ellipsize_percent(t, last_cell, p);
+ break;
+ }
+
+ case TABLE_SET_COLOR: {
+ const char *c = va_arg(ap, const char*);
+ r = table_set_color(t, last_cell, c);
+ break;
+ }
+
+ case TABLE_SET_RGAP_COLOR: {
+ const char *c = va_arg(ap, const char*);
+ r = table_set_rgap_color(t, last_cell, c);
+ break;
+ }
+
+ case TABLE_SET_BOTH_COLORS: {
+ const char *c = va_arg(ap, const char*);
+
+ r = table_set_color(t, last_cell, c);
+ if (r < 0) {
+ va_end(ap);
+ return r;
+ }
+
+ r = table_set_rgap_color(t, last_cell, c);
+ break;
+ }
+
+ case TABLE_SET_URL: {
+ const char *u = va_arg(ap, const char*);
+ r = table_set_url(t, last_cell, u);
+ break;
+ }
+
+ case TABLE_SET_UPPERCASE: {
+ int u = va_arg(ap, int);
+ r = table_set_uppercase(t, last_cell, u);
+ break;
+ }
+
+ case _TABLE_DATA_TYPE_MAX:
+ /* Used as end marker */
+ va_end(ap);
+ return 0;
+
+ default:
+ assert_not_reached("Uh? Unexpected data type.");
+ }
+
+ if (type < _TABLE_DATA_TYPE_MAX)
+ r = table_add_cell(t, &last_cell, type, data);
+
+ if (r < 0) {
+ va_end(ap);
+ return r;
+ }
+
+ type = va_arg(ap, TableDataType);
+ }
+}
+
+void table_set_header(Table *t, bool b) {
+ assert(t);
+
+ t->header = b;
+}
+
+void table_set_width(Table *t, size_t width) {
+ assert(t);
+
+ t->width = width;
+}
+
+void table_set_cell_height_max(Table *t, size_t height) {
+ assert(t);
+ assert(height >= 1 || height == (size_t) -1);
+
+ t->cell_height_max = height;
+}
+
+int table_set_empty_string(Table *t, const char *empty) {
+ assert(t);
+
+ return free_and_strdup(&t->empty_string, empty);
+}
+
+int table_set_display_all(Table *t) {
+ assert(t);
+
+ size_t allocated = t->n_display_map;
+
+ if (!GREEDY_REALLOC(t->display_map, allocated, MAX(t->n_columns, allocated)))
+ return -ENOMEM;
+
+ for (size_t i = 0; i < t->n_columns; i++)
+ t->display_map[i] = i;
+
+ t->n_display_map = t->n_columns;
+
+ return 0;
+}
+
+int table_set_display(Table *t, size_t first_column, ...) {
+ size_t allocated, column;
+ va_list ap;
+
+ assert(t);
+
+ allocated = t->n_display_map;
+ column = first_column;
+
+ va_start(ap, first_column);
+ for (;;) {
+ assert(column < t->n_columns);
+
+ if (!GREEDY_REALLOC(t->display_map, allocated, MAX(t->n_columns, t->n_display_map+1))) {
+ va_end(ap);
+ return -ENOMEM;
+ }
+
+ t->display_map[t->n_display_map++] = column;
+
+ column = va_arg(ap, size_t);
+ if (column == (size_t) -1)
+ break;
+
+ }
+ va_end(ap);
+
+ return 0;
+}
+
+int table_set_sort(Table *t, size_t first_column, ...) {
+ size_t allocated, column;
+ va_list ap;
+
+ assert(t);
+
+ allocated = t->n_sort_map;
+ column = first_column;
+
+ va_start(ap, first_column);
+ for (;;) {
+ assert(column < t->n_columns);
+
+ if (!GREEDY_REALLOC(t->sort_map, allocated, MAX(t->n_columns, t->n_sort_map+1))) {
+ va_end(ap);
+ return -ENOMEM;
+ }
+
+ t->sort_map[t->n_sort_map++] = column;
+
+ column = va_arg(ap, size_t);
+ if (column == (size_t) -1)
+ break;
+ }
+ va_end(ap);
+
+ return 0;
+}
+
+int table_hide_column_from_display(Table *t, size_t column) {
+ int r;
+
+ assert(t);
+ assert(column < t->n_columns);
+
+ /* If the display map is empty, initialize it with all available columns */
+ if (!t->display_map) {
+ r = table_set_display_all(t);
+ if (r < 0)
+ return r;
+ }
+
+ size_t allocated = t->n_display_map, cur = 0;
+
+ for (size_t i = 0; i < allocated; i++) {
+ if (t->display_map[i] == column)
+ continue;
+
+ t->display_map[cur++] = t->display_map[i];
+ }
+
+ t->n_display_map = cur;
+
+ return 0;
+}
+
+static int cell_data_compare(TableData *a, size_t index_a, TableData *b, size_t index_b) {
+ assert(a);
+ assert(b);
+
+ if (a->type == b->type) {
+
+ /* We only define ordering for cells of the same data type. If cells with different data types are
+ * compared we follow the order the cells were originally added in */
+
+ switch (a->type) {
+
+ case TABLE_STRING:
+ return strcmp(a->string, b->string);
+
+ case TABLE_PATH:
+ return path_compare(a->string, b->string);
+
+ case TABLE_STRV:
+ case TABLE_STRV_WRAPPED:
+ return strv_compare(a->strv, b->strv);
+
+ case TABLE_BOOLEAN:
+ if (!a->boolean && b->boolean)
+ return -1;
+ if (a->boolean && !b->boolean)
+ return 1;
+ return 0;
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESTAMP_UTC:
+ case TABLE_TIMESTAMP_RELATIVE:
+ return CMP(a->timestamp, b->timestamp);
+
+ case TABLE_TIMESPAN:
+ case TABLE_TIMESPAN_MSEC:
+ return CMP(a->timespan, b->timespan);
+
+ case TABLE_SIZE:
+ case TABLE_BPS:
+ return CMP(a->size, b->size);
+
+ case TABLE_INT:
+ return CMP(a->int_val, b->int_val);
+
+ case TABLE_INT8:
+ return CMP(a->int8, b->int8);
+
+ case TABLE_INT16:
+ return CMP(a->int16, b->int16);
+
+ case TABLE_INT32:
+ return CMP(a->int32, b->int32);
+
+ case TABLE_INT64:
+ return CMP(a->int64, b->int64);
+
+ case TABLE_UINT:
+ return CMP(a->uint_val, b->uint_val);
+
+ case TABLE_UINT8:
+ return CMP(a->uint8, b->uint8);
+
+ case TABLE_UINT16:
+ return CMP(a->uint16, b->uint16);
+
+ case TABLE_UINT32:
+ return CMP(a->uint32, b->uint32);
+
+ case TABLE_UINT64:
+ return CMP(a->uint64, b->uint64);
+
+ case TABLE_PERCENT:
+ return CMP(a->percent, b->percent);
+
+ case TABLE_IFINDEX:
+ return CMP(a->ifindex, b->ifindex);
+
+ case TABLE_IN_ADDR:
+ return CMP(a->address.in.s_addr, b->address.in.s_addr);
+
+ case TABLE_IN6_ADDR:
+ return memcmp(&a->address.in6, &b->address.in6, FAMILY_ADDRESS_SIZE(AF_INET6));
+
+ case TABLE_UUID:
+ case TABLE_ID128:
+ return memcmp(&a->id128, &b->id128, sizeof(sd_id128_t));
+
+ default:
+ ;
+ }
+ }
+
+ /* Generic fallback using the original order in which the cells where added. */
+ return CMP(index_a, index_b);
+}
+
+static int table_data_compare(const size_t *a, const size_t *b, Table *t) {
+ int r;
+
+ assert(t);
+ assert(t->sort_map);
+
+ /* Make sure the header stays at the beginning */
+ if (*a < t->n_columns && *b < t->n_columns)
+ return 0;
+ if (*a < t->n_columns)
+ return -1;
+ if (*b < t->n_columns)
+ return 1;
+
+ /* Order other lines by the sorting map */
+ for (size_t i = 0; i < t->n_sort_map; i++) {
+ TableData *d, *dd;
+
+ d = t->data[*a + t->sort_map[i]];
+ dd = t->data[*b + t->sort_map[i]];
+
+ r = cell_data_compare(d, *a, dd, *b);
+ if (r != 0)
+ return t->reverse_map && t->reverse_map[t->sort_map[i]] ? -r : r;
+ }
+
+ /* Order identical lines by the order there were originally added in */
+ return CMP(*a, *b);
+}
+
+static char* format_strv_width(char **strv, size_t column_width) {
+ _cleanup_fclose_ FILE *f = NULL;
+ size_t sz = 0;
+ _cleanup_free_ char *buf = NULL;
+
+ f = open_memstream_unlocked(&buf, &sz);
+ if (!f)
+ return NULL;
+
+ size_t position = 0;
+ char **p;
+ STRV_FOREACH(p, strv) {
+ size_t our_len = utf8_console_width(*p); /* This returns -1 on invalid utf-8 (which shouldn't happen).
+ * If that happens, we'll just print one item per line. */
+
+ if (position == 0) {
+ fputs(*p, f);
+ position = our_len;
+ } else if (size_add(size_add(position, 1), our_len) <= column_width) {
+ fprintf(f, " %s", *p);
+ position = size_add(size_add(position, 1), our_len);
+ } else {
+ fprintf(f, "\n%s", *p);
+ position = our_len;
+ }
+ }
+
+ if (fflush_and_check(f) < 0)
+ return NULL;
+
+ f = safe_fclose(f);
+ return TAKE_PTR(buf);
+}
+
+static const char *table_data_format(Table *t, TableData *d, bool avoid_uppercasing, size_t column_width, bool *have_soft) {
+ assert(d);
+
+ if (d->formatted &&
+ /* Only TABLE_STRV_WRAPPED adjust based on column_width so far… */
+ (d->type != TABLE_STRV_WRAPPED || d->formatted_for_width == column_width))
+ return d->formatted;
+
+ switch (d->type) {
+ case TABLE_EMPTY:
+ return strempty(t->empty_string);
+
+ case TABLE_STRING:
+ case TABLE_PATH:
+ if (d->uppercase && !avoid_uppercasing) {
+ d->formatted = new(char, strlen(d->string) + 1);
+ if (!d->formatted)
+ return NULL;
+
+ char *q = d->formatted;
+ for (char *p = d->string; *p; p++, q++)
+ *q = (char) toupper((unsigned char) *p);
+ *q = 0;
+
+ return d->formatted;
+ }
+
+ return d->string;
+
+ case TABLE_STRV:
+ if (strv_isempty(d->strv))
+ return strempty(t->empty_string);
+
+ d->formatted = strv_join(d->strv, "\n");
+ if (!d->formatted)
+ return NULL;
+ break;
+
+ case TABLE_STRV_WRAPPED: {
+ if (strv_isempty(d->strv))
+ return strempty(t->empty_string);
+
+ char *buf = format_strv_width(d->strv, column_width);
+ if (!buf)
+ return NULL;
+
+ free_and_replace(d->formatted, buf);
+ d->formatted_for_width = column_width;
+ if (have_soft)
+ *have_soft = true;
+
+ break;
+ }
+
+ case TABLE_BOOLEAN:
+ return yes_no(d->boolean);
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESTAMP_UTC:
+ case TABLE_TIMESTAMP_RELATIVE: {
+ _cleanup_free_ char *p;
+ char *ret;
+
+ p = new(char, FORMAT_TIMESTAMP_MAX);
+ if (!p)
+ return NULL;
+
+ if (d->type == TABLE_TIMESTAMP)
+ ret = format_timestamp(p, FORMAT_TIMESTAMP_MAX, d->timestamp);
+ else if (d->type == TABLE_TIMESTAMP_UTC)
+ ret = format_timestamp_style(p, FORMAT_TIMESTAMP_MAX, d->timestamp, TIMESTAMP_UTC);
+ else
+ ret = format_timestamp_relative(p, FORMAT_TIMESTAMP_MAX, d->timestamp);
+ if (!ret)
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_TIMESPAN:
+ case TABLE_TIMESPAN_MSEC: {
+ _cleanup_free_ char *p;
+
+ p = new(char, FORMAT_TIMESPAN_MAX);
+ if (!p)
+ return NULL;
+
+ if (!format_timespan(p, FORMAT_TIMESPAN_MAX, d->timespan,
+ d->type == TABLE_TIMESPAN ? 0 : USEC_PER_MSEC))
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_SIZE: {
+ _cleanup_free_ char *p;
+
+ p = new(char, FORMAT_BYTES_MAX);
+ if (!p)
+ return NULL;
+
+ if (!format_bytes(p, FORMAT_BYTES_MAX, d->size))
+ return "n/a";
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_BPS: {
+ _cleanup_free_ char *p;
+ size_t n;
+
+ p = new(char, FORMAT_BYTES_MAX+2);
+ if (!p)
+ return NULL;
+
+ if (!format_bytes_full(p, FORMAT_BYTES_MAX, d->size, 0))
+ return "n/a";
+
+ n = strlen(p);
+ strscpy(p + n, FORMAT_BYTES_MAX + 2 - n, "bps");
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_INT: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->int_val) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%i", d->int_val);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_INT8: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->int8) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIi8, d->int8);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_INT16: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->int16) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIi16, d->int16);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_INT32: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->int32) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIi32, d->int32);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_INT64: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->int64) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIi64, d->int64);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint_val) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%u", d->uint_val);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT8: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint8) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu8, d->uint8);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT16: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint16) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu16, d->uint16);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT32: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint32) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu32, d->uint32);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_UINT64: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->uint64) + 1);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%" PRIu64, d->uint64);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_PERCENT: {
+ _cleanup_free_ char *p;
+
+ p = new(char, DECIMAL_STR_WIDTH(d->percent) + 2);
+ if (!p)
+ return NULL;
+
+ sprintf(p, "%i%%" , d->percent);
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_IFINDEX: {
+ _cleanup_free_ char *p = NULL;
+ char name[IF_NAMESIZE + 1];
+
+ if (format_ifname(d->ifindex, name)) {
+ p = strdup(name);
+ if (!p)
+ return NULL;
+ } else {
+ if (asprintf(&p, "%i" , d->ifindex) < 0)
+ return NULL;
+ }
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_IN_ADDR:
+ case TABLE_IN6_ADDR: {
+ _cleanup_free_ char *p = NULL;
+
+ if (in_addr_to_string(d->type == TABLE_IN_ADDR ? AF_INET : AF_INET6,
+ &d->address, &p) < 0)
+ return NULL;
+
+ d->formatted = TAKE_PTR(p);
+ break;
+ }
+
+ case TABLE_ID128: {
+ char *p;
+
+ p = new(char, SD_ID128_STRING_MAX);
+ if (!p)
+ return NULL;
+
+ d->formatted = sd_id128_to_string(d->id128, p);
+ break;
+ }
+
+ case TABLE_UUID: {
+ char *p;
+
+ p = new(char, ID128_UUID_STRING_MAX);
+ if (!p)
+ return NULL;
+
+ d->formatted = id128_to_uuid_string(d->id128, p);
+ break;
+ }
+
+ default:
+ assert_not_reached("Unexpected type?");
+ }
+
+ return d->formatted;
+}
+
+static int console_width_height(
+ const char *s,
+ size_t *ret_width,
+ size_t *ret_height) {
+
+ size_t max_width = 0, height = 0;
+ const char *p;
+
+ assert(s);
+
+ /* Determine the width and height in console character cells the specified string needs. */
+
+ do {
+ size_t k;
+
+ p = strchr(s, '\n');
+ if (p) {
+ _cleanup_free_ char *c = NULL;
+
+ c = strndup(s, p - s);
+ if (!c)
+ return -ENOMEM;
+
+ k = utf8_console_width(c);
+ s = p + 1;
+ } else {
+ k = utf8_console_width(s);
+ s = NULL;
+ }
+ if (k == (size_t) -1)
+ return -EINVAL;
+ if (k > max_width)
+ max_width = k;
+
+ height++;
+ } while (!isempty(s));
+
+ if (ret_width)
+ *ret_width = max_width;
+
+ if (ret_height)
+ *ret_height = height;
+
+ return 0;
+}
+
+static int table_data_requested_width_height(
+ Table *table,
+ TableData *d,
+ size_t available_width,
+ size_t *ret_width,
+ size_t *ret_height,
+ bool *have_soft) {
+
+ _cleanup_free_ char *truncated = NULL;
+ bool truncation_applied = false;
+ size_t width, height;
+ const char *t;
+ int r;
+ bool soft = false;
+
+ t = table_data_format(table, d, false, available_width, &soft);
+ if (!t)
+ return -ENOMEM;
+
+ if (table->cell_height_max != (size_t) -1) {
+ r = string_truncate_lines(t, table->cell_height_max, &truncated);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ truncation_applied = true;
+
+ t = truncated;
+ }
+
+ r = console_width_height(t, &width, &height);
+ if (r < 0)
+ return r;
+
+ if (d->maximum_width != (size_t) -1 && width > d->maximum_width)
+ width = d->maximum_width;
+
+ if (width < d->minimum_width)
+ width = d->minimum_width;
+
+ if (ret_width)
+ *ret_width = width;
+ if (ret_height)
+ *ret_height = height;
+ if (have_soft && soft)
+ *have_soft = true;
+
+ return truncation_applied;
+}
+
+static char *align_string_mem(const char *str, const char *url, size_t new_length, unsigned percent) {
+ size_t w = 0, space, lspace, old_length, clickable_length;
+ _cleanup_free_ char *clickable = NULL;
+ const char *p;
+ char *ret;
+ int r;
+
+ /* As with ellipsize_mem(), 'old_length' is a byte size while 'new_length' is a width in character cells */
+
+ assert(str);
+ assert(percent <= 100);
+
+ old_length = strlen(str);
+
+ if (url) {
+ r = terminal_urlify(url, str, &clickable);
+ if (r < 0)
+ return NULL;
+
+ clickable_length = strlen(clickable);
+ } else
+ clickable_length = old_length;
+
+ /* Determine current width on screen */
+ p = str;
+ while (p < str + old_length) {
+ char32_t c;
+
+ if (utf8_encoded_to_unichar(p, &c) < 0) {
+ p++, w++; /* count invalid chars as 1 */
+ continue;
+ }
+
+ p = utf8_next_char(p);
+ w += unichar_iswide(c) ? 2 : 1;
+ }
+
+ /* Already wider than the target, if so, don't do anything */
+ if (w >= new_length)
+ return clickable ? TAKE_PTR(clickable) : strdup(str);
+
+ /* How much spaces shall we add? An how much on the left side? */
+ space = new_length - w;
+ lspace = space * percent / 100U;
+
+ ret = new(char, space + clickable_length + 1);
+ if (!ret)
+ return NULL;
+
+ for (size_t i = 0; i < lspace; i++)
+ ret[i] = ' ';
+ memcpy(ret + lspace, clickable ?: str, clickable_length);
+ for (size_t i = lspace + clickable_length; i < space + clickable_length; i++)
+ ret[i] = ' ';
+
+ ret[space + clickable_length] = 0;
+ return ret;
+}
+
+static bool table_data_isempty(TableData *d) {
+ assert(d);
+
+ if (d->type == TABLE_EMPTY)
+ return true;
+
+ /* Let's also consider an empty strv as truly empty. */
+ if (IN_SET(d->type, TABLE_STRV, TABLE_STRV_WRAPPED))
+ return strv_isempty(d->strv);
+
+ /* Note that an empty string we do not consider empty here! */
+ return false;
+}
+
+static const char* table_data_color(TableData *d) {
+ assert(d);
+
+ if (d->color)
+ return d->color;
+
+ /* Let's implicitly color all "empty" cells in grey, in case an "empty_string" is set that is not empty */
+ if (table_data_isempty(d))
+ return ansi_grey();
+
+ return NULL;
+}
+
+static const char* table_data_rgap_color(TableData *d) {
+ assert(d);
+
+ if (d->rgap_color)
+ return d->rgap_color;
+
+ return NULL;
+}
+
+int table_print(Table *t, FILE *f) {
+ size_t n_rows, *minimum_width, *maximum_width, display_columns, *requested_width,
+ table_minimum_width, table_maximum_width, table_requested_width, table_effective_width,
+ *width = NULL;
+ _cleanup_free_ size_t *sorted = NULL;
+ uint64_t *column_weight, weight_sum;
+ int r;
+
+ assert(t);
+
+ if (!f)
+ f = stdout;
+
+ /* Ensure we have no incomplete rows */
+ assert(t->n_cells % t->n_columns == 0);
+
+ n_rows = t->n_cells / t->n_columns;
+ assert(n_rows > 0); /* at least the header row must be complete */
+
+ if (t->sort_map) {
+ /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */
+
+ sorted = new(size_t, n_rows);
+ if (!sorted)
+ return -ENOMEM;
+
+ for (size_t i = 0; i < n_rows; i++)
+ sorted[i] = i * t->n_columns;
+
+ typesafe_qsort_r(sorted, n_rows, table_data_compare, t);
+ }
+
+ if (t->display_map)
+ display_columns = t->n_display_map;
+ else
+ display_columns = t->n_columns;
+
+ assert(display_columns > 0);
+
+ minimum_width = newa(size_t, display_columns);
+ maximum_width = newa(size_t, display_columns);
+ requested_width = newa(size_t, display_columns);
+ column_weight = newa0(uint64_t, display_columns);
+
+ for (size_t j = 0; j < display_columns; j++) {
+ minimum_width[j] = 1;
+ maximum_width[j] = (size_t) -1;
+ }
+
+ for (unsigned pass = 0; pass < 2; pass++) {
+ /* First pass: determine column sizes */
+
+ for (size_t j = 0; j < display_columns; j++)
+ requested_width[j] = (size_t) -1;
+
+ bool any_soft = false;
+
+ for (size_t i = t->header ? 0 : 1; i < n_rows; i++) {
+ TableData **row;
+
+ /* Note that we don't care about ordering at this time, as we just want to determine column sizes,
+ * hence we don't care for sorted[] during the first pass. */
+ row = t->data + i * t->n_columns;
+
+ for (size_t j = 0; j < display_columns; j++) {
+ TableData *d;
+ size_t req_width, req_height;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ r = table_data_requested_width_height(t, d,
+ width ? width[j] : SIZE_MAX,
+ &req_width, &req_height, &any_soft);
+ if (r < 0)
+ return r;
+ if (r > 0) { /* Truncated because too many lines? */
+ _cleanup_free_ char *last = NULL;
+ const char *field;
+
+ /* If we are going to show only the first few lines of a cell that has
+ * multiple make sure that we have enough space horizontally to show an
+ * ellipsis. Hence, let's figure out the last line, and account for its
+ * length plus ellipsis. */
+
+ field = table_data_format(t, d, false,
+ width ? width[j] : SIZE_MAX,
+ &any_soft);
+ if (!field)
+ return -ENOMEM;
+
+ assert_se(t->cell_height_max > 0);
+ r = string_extract_line(field, t->cell_height_max-1, &last);
+ if (r < 0)
+ return r;
+
+ req_width = MAX(req_width,
+ utf8_console_width(last) +
+ utf8_console_width(special_glyph(SPECIAL_GLYPH_ELLIPSIS)));
+ }
+
+ /* Determine the biggest width that any cell in this column would like to have */
+ if (requested_width[j] == (size_t) -1 ||
+ requested_width[j] < req_width)
+ requested_width[j] = req_width;
+
+ /* Determine the minimum width any cell in this column needs */
+ if (minimum_width[j] < d->minimum_width)
+ minimum_width[j] = d->minimum_width;
+
+ /* Determine the maximum width any cell in this column needs */
+ if (d->maximum_width != (size_t) -1 &&
+ (maximum_width[j] == (size_t) -1 ||
+ maximum_width[j] > d->maximum_width))
+ maximum_width[j] = d->maximum_width;
+
+ /* Determine the full columns weight */
+ column_weight[j] += d->weight;
+ }
+ }
+
+ /* One space between each column */
+ table_requested_width = table_minimum_width = table_maximum_width = display_columns - 1;
+
+ /* Calculate the total weight for all columns, plus the minimum, maximum and requested width for the table. */
+ weight_sum = 0;
+ for (size_t j = 0; j < display_columns; j++) {
+ weight_sum += column_weight[j];
+
+ table_minimum_width += minimum_width[j];
+
+ if (maximum_width[j] == (size_t) -1)
+ table_maximum_width = (size_t) -1;
+ else
+ table_maximum_width += maximum_width[j];
+
+ table_requested_width += requested_width[j];
+ }
+
+ /* Calculate effective table width */
+ if (t->width != 0 && t->width != (size_t) -1)
+ table_effective_width = t->width;
+ else if (t->width == 0 ||
+ ((pass > 0 || !any_soft) && (pager_have() || !isatty(STDOUT_FILENO))))
+ table_effective_width = table_requested_width;
+ else
+ table_effective_width = MIN(table_requested_width, columns());
+
+ if (table_maximum_width != (size_t) -1 && table_effective_width > table_maximum_width)
+ table_effective_width = table_maximum_width;
+
+ if (table_effective_width < table_minimum_width)
+ table_effective_width = table_minimum_width;
+
+ if (!width)
+ width = newa(size_t, display_columns);
+
+ if (table_effective_width >= table_requested_width) {
+ size_t extra;
+
+ /* We have extra room, let's distribute it among columns according to their weights. We first provide
+ * each column with what it asked for and the distribute the rest. */
+
+ extra = table_effective_width - table_requested_width;
+
+ for (size_t j = 0; j < display_columns; j++) {
+ size_t delta;
+
+ if (weight_sum == 0)
+ width[j] = requested_width[j] + extra / (display_columns - j); /* Avoid division by zero */
+ else
+ width[j] = requested_width[j] + (extra * column_weight[j]) / weight_sum;
+
+ if (maximum_width[j] != (size_t) -1 && width[j] > maximum_width[j])
+ width[j] = maximum_width[j];
+
+ if (width[j] < minimum_width[j])
+ width[j] = minimum_width[j];
+
+ assert(width[j] >= requested_width[j]);
+ delta = width[j] - requested_width[j];
+
+ /* Subtract what we just added from the rest */
+ if (extra > delta)
+ extra -= delta;
+ else
+ extra = 0;
+
+ assert(weight_sum >= column_weight[j]);
+ weight_sum -= column_weight[j];
+ }
+
+ break; /* Every column should be happy, no need to repeat calculations. */
+ } else {
+ /* We need to compress the table, columns can't get what they asked for. We first provide each column
+ * with the minimum they need, and then distribute anything left. */
+ bool finalize = false;
+ size_t extra;
+
+ extra = table_effective_width - table_minimum_width;
+
+ for (size_t j = 0; j < display_columns; j++)
+ width[j] = (size_t) -1;
+
+ for (;;) {
+ bool restart = false;
+
+ for (size_t j = 0; j < display_columns; j++) {
+ size_t delta, w;
+
+ /* Did this column already get something assigned? If so, let's skip to the next */
+ if (width[j] != (size_t) -1)
+ continue;
+
+ if (weight_sum == 0)
+ w = minimum_width[j] + extra / (display_columns - j); /* avoid division by zero */
+ else
+ w = minimum_width[j] + (extra * column_weight[j]) / weight_sum;
+
+ if (w >= requested_width[j]) {
+ /* Never give more than requested. If we hit a column like this, there's more
+ * space to allocate to other columns which means we need to restart the
+ * iteration. However, if we hit a column like this, let's assign it the space
+ * it wanted for good early.*/
+
+ w = requested_width[j];
+ restart = true;
+
+ } else if (!finalize)
+ continue;
+
+ width[j] = w;
+
+ assert(w >= minimum_width[j]);
+ delta = w - minimum_width[j];
+
+ assert(delta <= extra);
+ extra -= delta;
+
+ assert(weight_sum >= column_weight[j]);
+ weight_sum -= column_weight[j];
+
+ if (restart && !finalize)
+ break;
+ }
+
+ if (finalize)
+ break;
+
+ if (!restart)
+ finalize = true;
+ }
+
+ if (!any_soft) /* Some columns got less than requested. If some cells were "soft",
+ * let's try to reformat them with the new widths. Otherwise, let's
+ * move on. */
+ break;
+ }
+ }
+
+ /* Second pass: show output */
+ for (size_t i = t->header ? 0 : 1; i < n_rows; i++) {
+ size_t n_subline = 0;
+ bool more_sublines;
+ TableData **row;
+
+ if (sorted)
+ row = t->data + sorted[i];
+ else
+ row = t->data + i * t->n_columns;
+
+ do {
+ const char *gap_color = NULL;
+ more_sublines = false;
+
+ for (size_t j = 0; j < display_columns; j++) {
+ _cleanup_free_ char *buffer = NULL, *extracted = NULL;
+ bool lines_truncated = false;
+ const char *field, *color = NULL;
+ TableData *d;
+ size_t l;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ field = table_data_format(t, d, false, width[j], NULL);
+ if (!field)
+ return -ENOMEM;
+
+ r = string_extract_line(field, n_subline, &extracted);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* There are more lines to come */
+ if ((t->cell_height_max == (size_t) -1 || n_subline + 1 < t->cell_height_max))
+ more_sublines = true; /* There are more lines to come */
+ else
+ lines_truncated = true;
+ }
+ if (extracted)
+ field = extracted;
+
+ l = utf8_console_width(field);
+ if (l > width[j]) {
+ /* Field is wider than allocated space. Let's ellipsize */
+
+ buffer = ellipsize(field, width[j], /* ellipsize at the end if we truncated coming lines, otherwise honour configuration */
+ lines_truncated ? 100 : d->ellipsize_percent);
+ if (!buffer)
+ return -ENOMEM;
+
+ field = buffer;
+ } else {
+ if (lines_truncated) {
+ _cleanup_free_ char *padded = NULL;
+
+ /* We truncated more lines of this cell, let's add an
+ * ellipsis. We first append it, but that might make our
+ * string grow above what we have space for, hence ellipsize
+ * right after. This will truncate the ellipsis and add a new
+ * one. */
+
+ padded = strjoin(field, special_glyph(SPECIAL_GLYPH_ELLIPSIS));
+ if (!padded)
+ return -ENOMEM;
+
+ buffer = ellipsize(padded, width[j], 100);
+ if (!buffer)
+ return -ENOMEM;
+
+ field = buffer;
+ l = utf8_console_width(field);
+ }
+
+ if (l < width[j]) {
+ _cleanup_free_ char *aligned = NULL;
+ /* Field is shorter than allocated space. Let's align with spaces */
+
+ aligned = align_string_mem(field, d->url, width[j], d->align_percent);
+ if (!aligned)
+ return -ENOMEM;
+
+ free_and_replace(buffer, aligned);
+ field = buffer;
+ }
+ }
+
+ if (l >= width[j] && d->url) {
+ _cleanup_free_ char *clickable = NULL;
+
+ r = terminal_urlify(d->url, field, &clickable);
+ if (r < 0)
+ return r;
+
+ free_and_replace(buffer, clickable);
+ field = buffer;
+ }
+
+ if (colors_enabled()) {
+ if (gap_color)
+ fputs(gap_color, f);
+ else if (row == t->data) /* underline header line fully, including the column separator */
+ fputs(ansi_underline(), f);
+ }
+
+ if (j > 0)
+ fputc(' ', f); /* column separator left of cell */
+
+ if (colors_enabled()) {
+ color = table_data_color(d);
+
+ /* Undo gap color */
+ if (gap_color || (color && row == t->data))
+ fputs(ANSI_NORMAL, f);
+
+ if (color)
+ fputs(color, f);
+ else if (gap_color && row == t->data) /* underline header line cell */
+ fputs(ansi_underline(), f);
+ }
+
+ fputs(field, f);
+
+ if (colors_enabled() && (color || row == t->data))
+ fputs(ANSI_NORMAL, f);
+
+ gap_color = table_data_rgap_color(d);
+ }
+
+ fputc('\n', f);
+ n_subline ++;
+ } while (more_sublines);
+ }
+
+ return fflush_and_check(f);
+}
+
+int table_format(Table *t, char **ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char *buf = NULL;
+ size_t sz = 0;
+ int r;
+
+ f = open_memstream_unlocked(&buf, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ r = table_print(t, f);
+ if (r < 0)
+ return r;
+
+ f = safe_fclose(f);
+
+ *ret = buf;
+
+ return 0;
+}
+
+size_t table_get_rows(Table *t) {
+ if (!t)
+ return 0;
+
+ assert(t->n_columns > 0);
+ return t->n_cells / t->n_columns;
+}
+
+size_t table_get_columns(Table *t) {
+ if (!t)
+ return 0;
+
+ assert(t->n_columns > 0);
+ return t->n_columns;
+}
+
+int table_set_reverse(Table *t, size_t column, bool b) {
+ assert(t);
+ assert(column < t->n_columns);
+
+ if (!t->reverse_map) {
+ if (!b)
+ return 0;
+
+ t->reverse_map = new0(bool, t->n_columns);
+ if (!t->reverse_map)
+ return -ENOMEM;
+ }
+
+ t->reverse_map[column] = b;
+ return 0;
+}
+
+TableCell *table_get_cell(Table *t, size_t row, size_t column) {
+ size_t i;
+
+ assert(t);
+
+ if (column >= t->n_columns)
+ return NULL;
+
+ i = row * t->n_columns + column;
+ if (i >= t->n_cells)
+ return NULL;
+
+ return TABLE_INDEX_TO_CELL(i);
+}
+
+const void *table_get(Table *t, TableCell *cell) {
+ TableData *d;
+
+ assert(t);
+
+ d = table_get_data(t, cell);
+ if (!d)
+ return NULL;
+
+ return d->data;
+}
+
+const void* table_get_at(Table *t, size_t row, size_t column) {
+ TableCell *cell;
+
+ cell = table_get_cell(t, row, column);
+ if (!cell)
+ return NULL;
+
+ return table_get(t, cell);
+}
+
+static int table_data_to_json(TableData *d, JsonVariant **ret) {
+
+ switch (d->type) {
+
+ case TABLE_EMPTY:
+ return json_variant_new_null(ret);
+
+ case TABLE_STRING:
+ case TABLE_PATH:
+ return json_variant_new_string(ret, d->string);
+
+ case TABLE_STRV:
+ case TABLE_STRV_WRAPPED:
+ return json_variant_new_array_strv(ret, d->strv);
+
+ case TABLE_BOOLEAN:
+ return json_variant_new_boolean(ret, d->boolean);
+
+ case TABLE_TIMESTAMP:
+ case TABLE_TIMESTAMP_UTC:
+ case TABLE_TIMESTAMP_RELATIVE:
+ if (d->timestamp == USEC_INFINITY)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->timestamp);
+
+ case TABLE_TIMESPAN:
+ case TABLE_TIMESPAN_MSEC:
+ if (d->timespan == USEC_INFINITY)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->timespan);
+
+ case TABLE_SIZE:
+ case TABLE_BPS:
+ if (d->size == (uint64_t) -1)
+ return json_variant_new_null(ret);
+
+ return json_variant_new_unsigned(ret, d->size);
+
+ case TABLE_INT:
+ return json_variant_new_integer(ret, d->int_val);
+
+ case TABLE_INT8:
+ return json_variant_new_integer(ret, d->int8);
+
+ case TABLE_INT16:
+ return json_variant_new_integer(ret, d->int16);
+
+ case TABLE_INT32:
+ return json_variant_new_integer(ret, d->int32);
+
+ case TABLE_INT64:
+ return json_variant_new_integer(ret, d->int64);
+
+ case TABLE_UINT:
+ return json_variant_new_unsigned(ret, d->uint_val);
+
+ case TABLE_UINT8:
+ return json_variant_new_unsigned(ret, d->uint8);
+
+ case TABLE_UINT16:
+ return json_variant_new_unsigned(ret, d->uint16);
+
+ case TABLE_UINT32:
+ return json_variant_new_unsigned(ret, d->uint32);
+
+ case TABLE_UINT64:
+ return json_variant_new_unsigned(ret, d->uint64);
+
+ case TABLE_PERCENT:
+ return json_variant_new_integer(ret, d->percent);
+
+ case TABLE_IFINDEX:
+ return json_variant_new_integer(ret, d->ifindex);
+
+ case TABLE_IN_ADDR:
+ return json_variant_new_array_bytes(ret, &d->address, FAMILY_ADDRESS_SIZE(AF_INET));
+
+ case TABLE_IN6_ADDR:
+ return json_variant_new_array_bytes(ret, &d->address, FAMILY_ADDRESS_SIZE(AF_INET6));
+
+ case TABLE_ID128: {
+ char buf[SD_ID128_STRING_MAX];
+ return json_variant_new_string(ret, sd_id128_to_string(d->id128, buf));
+ }
+
+ case TABLE_UUID: {
+ char buf[ID128_UUID_STRING_MAX];
+ return json_variant_new_string(ret, id128_to_uuid_string(d->id128, buf));
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static char* string_to_json_field_name(const char *f) {
+ /* Tries to make a string more suitable as JSON field name. There are no strict rules defined what a
+ * field name can be hence this is a bit vague and black magic. Right now we only convert spaces to
+ * underscores and leave everything as is. */
+
+ char *c = strdup(f);
+ if (!c)
+ return NULL;
+
+ for (char *x = c; *x; x++)
+ if (isspace(*x))
+ *x = '_';
+
+ return c;
+}
+
+int table_to_json(Table *t, JsonVariant **ret) {
+ JsonVariant **rows = NULL, **elements = NULL;
+ _cleanup_free_ size_t *sorted = NULL;
+ size_t n_rows, display_columns;
+ int r;
+
+ assert(t);
+
+ /* Ensure we have no incomplete rows */
+ assert(t->n_cells % t->n_columns == 0);
+
+ n_rows = t->n_cells / t->n_columns;
+ assert(n_rows > 0); /* at least the header row must be complete */
+
+ if (t->sort_map) {
+ /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */
+
+ sorted = new(size_t, n_rows);
+ if (!sorted) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (size_t i = 0; i < n_rows; i++)
+ sorted[i] = i * t->n_columns;
+
+ typesafe_qsort_r(sorted, n_rows, table_data_compare, t);
+ }
+
+ if (t->display_map)
+ display_columns = t->n_display_map;
+ else
+ display_columns = t->n_columns;
+ assert(display_columns > 0);
+
+ elements = new0(JsonVariant*, display_columns * 2);
+ if (!elements) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (size_t j = 0; j < display_columns; j++) {
+ _cleanup_free_ char *mangled = NULL;
+ const char *formatted;
+ TableData *d;
+
+ assert_se(d = t->data[t->display_map ? t->display_map[j] : j]);
+
+ /* Field names must be strings, hence format whatever we got here as a string first */
+ formatted = table_data_format(t, d, true, SIZE_MAX, NULL);
+ if (!formatted) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ /* Arbitrary strings suck as field names, try to mangle them into something more suitable hence */
+ mangled = string_to_json_field_name(formatted);
+ if (!mangled) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = json_variant_new_string(elements + j*2, mangled);
+ if (r < 0)
+ goto finish;
+ }
+
+ rows = new0(JsonVariant*, n_rows-1);
+ if (!rows) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ for (size_t i = 1; i < n_rows; i++) {
+ TableData **row;
+
+ if (sorted)
+ row = t->data + sorted[i];
+ else
+ row = t->data + i * t->n_columns;
+
+ for (size_t j = 0; j < display_columns; j++) {
+ TableData *d;
+ size_t k;
+
+ assert_se(d = row[t->display_map ? t->display_map[j] : j]);
+
+ k = j*2+1;
+ elements[k] = json_variant_unref(elements[k]);
+
+ r = table_data_to_json(d, elements + k);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = json_variant_new_object(rows + i - 1, elements, display_columns * 2);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = json_variant_new_array(ret, rows, n_rows - 1);
+
+finish:
+ if (rows) {
+ json_variant_unref_many(rows, n_rows-1);
+ free(rows);
+ }
+
+ if (elements) {
+ json_variant_unref_many(elements, display_columns*2);
+ free(elements);
+ }
+
+ return r;
+}
+
+int table_print_json(Table *t, FILE *f, JsonFormatFlags flags) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ int r;
+
+ assert(t);
+
+ if (!f)
+ f = stdout;
+
+ r = table_to_json(t, &v);
+ if (r < 0)
+ return r;
+
+ json_variant_dump(v, flags, f, NULL);
+
+ return fflush_and_check(f);
+}
diff --git a/src/shared/format-table.h b/src/shared/format-table.h
new file mode 100644
index 0000000..965549b
--- /dev/null
+++ b/src/shared/format-table.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "json.h"
+#include "macro.h"
+
+typedef enum TableDataType {
+ TABLE_EMPTY,
+ TABLE_STRING,
+ TABLE_STRV,
+ TABLE_STRV_WRAPPED,
+ TABLE_PATH,
+ TABLE_BOOLEAN,
+ TABLE_TIMESTAMP,
+ TABLE_TIMESTAMP_UTC,
+ TABLE_TIMESTAMP_RELATIVE,
+ TABLE_TIMESPAN,
+ TABLE_TIMESPAN_MSEC,
+ TABLE_SIZE,
+ TABLE_BPS,
+ TABLE_INT,
+ TABLE_INT8,
+ TABLE_INT16,
+ TABLE_INT32,
+ TABLE_INT64,
+ TABLE_UINT,
+ TABLE_UINT8,
+ TABLE_UINT16,
+ TABLE_UINT32,
+ TABLE_UINT64,
+ TABLE_PERCENT,
+ TABLE_IFINDEX,
+ TABLE_IN_ADDR, /* Takes a union in_addr_union (or a struct in_addr) */
+ TABLE_IN6_ADDR, /* Takes a union in_addr_union (or a struct in6_addr) */
+ TABLE_ID128,
+ TABLE_UUID,
+ _TABLE_DATA_TYPE_MAX,
+
+ /* The following are not really data types, but commands for table_add_cell_many() to make changes to
+ * a cell just added. */
+ TABLE_SET_MINIMUM_WIDTH,
+ TABLE_SET_MAXIMUM_WIDTH,
+ TABLE_SET_WEIGHT,
+ TABLE_SET_ALIGN_PERCENT,
+ TABLE_SET_ELLIPSIZE_PERCENT,
+ TABLE_SET_COLOR,
+ TABLE_SET_RGAP_COLOR,
+ TABLE_SET_BOTH_COLORS,
+ TABLE_SET_URL,
+ TABLE_SET_UPPERCASE,
+
+ _TABLE_DATA_TYPE_INVALID = -1,
+} TableDataType;
+
+/* PIDs are just 32bit signed integers on Linux */
+#define TABLE_PID TABLE_INT32
+assert_cc(sizeof(pid_t) == sizeof(int32_t));
+
+/* UIDs/GIDs are just 32bit unsigned integers on Linux */
+#define TABLE_UID TABLE_UINT32
+#define TABLE_GID TABLE_UINT32
+assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+assert_cc(sizeof(gid_t) == sizeof(uint32_t));
+
+typedef struct Table Table;
+typedef struct TableCell TableCell;
+
+Table *table_new_internal(const char *first_header, ...) _sentinel_;
+#define table_new(...) table_new_internal(__VA_ARGS__, NULL)
+Table *table_new_raw(size_t n_columns);
+Table *table_unref(Table *t);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Table*, table_unref);
+
+int table_add_cell_full(Table *t, TableCell **ret_cell, TableDataType type, const void *data, size_t minimum_width, size_t maximum_width, unsigned weight, unsigned align_percent, unsigned ellipsize_percent);
+static inline int table_add_cell(Table *t, TableCell **ret_cell, TableDataType type, const void *data) {
+ return table_add_cell_full(t, ret_cell, type, data, (size_t) -1, (size_t) -1, (unsigned) -1, (unsigned) -1, (unsigned) -1);
+}
+int table_add_cell_stringf(Table *t, TableCell **ret_cell, const char *format, ...) _printf_(3, 4);
+
+int table_fill_empty(Table *t, size_t until_column);
+
+int table_dup_cell(Table *t, TableCell *cell);
+
+int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width);
+int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width);
+int table_set_weight(Table *t, TableCell *cell, unsigned weight);
+int table_set_align_percent(Table *t, TableCell *cell, unsigned percent);
+int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent);
+int table_set_color(Table *t, TableCell *cell, const char *color);
+int table_set_rgap_color(Table *t, TableCell *cell, const char *color);
+int table_set_url(Table *t, TableCell *cell, const char *url);
+int table_set_uppercase(Table *t, TableCell *cell, bool b);
+
+int table_update(Table *t, TableCell *cell, TableDataType type, const void *data);
+
+int table_add_many_internal(Table *t, TableDataType first_type, ...);
+#define table_add_many(t, ...) table_add_many_internal(t, __VA_ARGS__, _TABLE_DATA_TYPE_MAX)
+
+void table_set_header(Table *table, bool b);
+void table_set_width(Table *t, size_t width);
+void table_set_cell_height_max(Table *t, size_t height);
+int table_set_empty_string(Table *t, const char *empty);
+int table_set_display_all(Table *t);
+int table_set_display(Table *t, size_t first_column, ...);
+int table_set_sort(Table *t, size_t first_column, ...);
+int table_set_reverse(Table *t, size_t column, bool b);
+int table_hide_column_from_display(Table *t, size_t column);
+
+int table_print(Table *t, FILE *f);
+int table_format(Table *t, char **ret);
+
+static inline TableCell* TABLE_HEADER_CELL(size_t i) {
+ return SIZE_TO_PTR(i + 1);
+}
+
+size_t table_get_rows(Table *t);
+size_t table_get_columns(Table *t);
+
+TableCell *table_get_cell(Table *t, size_t row, size_t column);
+
+const void *table_get(Table *t, TableCell *cell);
+const void *table_get_at(Table *t, size_t row, size_t column);
+
+int table_to_json(Table *t, JsonVariant **ret);
+int table_print_json(Table *t, FILE *f, JsonFormatFlags json_flags);
+
+#define table_log_add_error(r) \
+ log_error_errno(r, "Failed to add cell(s) to table: %m")
+
+#define table_log_print_error(r) \
+ log_error_errno(r, "Failed to print table: %m")
+
+#define table_log_sort_error(r) \
+ log_error_errno(r, "Failed to sort table: %m")
diff --git a/src/shared/fsck-util.h b/src/shared/fsck-util.h
new file mode 100644
index 0000000..855137c
--- /dev/null
+++ b/src/shared/fsck-util.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+/* exit codes as defined in fsck(8) */
+enum {
+ FSCK_SUCCESS = 0,
+ FSCK_ERROR_CORRECTED = 1 << 0,
+ FSCK_SYSTEM_SHOULD_REBOOT = 1 << 1,
+ FSCK_ERRORS_LEFT_UNCORRECTED = 1 << 2,
+ FSCK_OPERATIONAL_ERROR = 1 << 3,
+ FSCK_USAGE_OR_SYNTAX_ERROR = 1 << 4,
+ FSCK_USER_CANCELLED = 1 << 5,
+ FSCK_SHARED_LIB_ERROR = 1 << 7,
+};
diff --git a/src/shared/fstab-util.c b/src/shared/fstab-util.c
new file mode 100644
index 0000000..292b97c
--- /dev/null
+++ b/src/shared/fstab-util.c
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "device-nodes.h"
+#include "fstab-util.h"
+#include "macro.h"
+#include "mount-util.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int fstab_has_fstype(const char *fstype) {
+ _cleanup_endmntent_ FILE *f = NULL;
+ struct mntent *m;
+
+ f = setmntent(fstab_path(), "re");
+ if (!f)
+ return errno == ENOENT ? false : -errno;
+
+ for (;;) {
+ errno = 0;
+ m = getmntent(f);
+ if (!m)
+ return errno != 0 ? -errno : false;
+
+ if (streq(m->mnt_type, fstype))
+ return true;
+ }
+ return false;
+}
+
+bool fstab_is_extrinsic(const char *mount, const char *opts) {
+
+ /* Don't bother with the OS data itself */
+ if (PATH_IN_SET(mount,
+ "/",
+ "/usr",
+ "/etc"))
+ return true;
+
+ if (PATH_STARTSWITH_SET(mount,
+ "/run/initramfs", /* This should stay around from before we boot until after we shutdown */
+ "/proc", /* All of this is API VFS */
+ "/sys", /* … dito … */
+ "/dev")) /* … dito … */
+ return true;
+
+ /* If this is an initrd mount, and we are not in the initrd, then leave
+ * this around forever, too. */
+ if (opts && fstab_test_option(opts, "x-initrd.mount\0") && !in_initrd())
+ return true;
+
+ return false;
+}
+
+int fstab_is_mount_point(const char *mount) {
+ _cleanup_endmntent_ FILE *f = NULL;
+ struct mntent *m;
+
+ f = setmntent(fstab_path(), "re");
+ if (!f)
+ return errno == ENOENT ? false : -errno;
+
+ for (;;) {
+ errno = 0;
+ m = getmntent(f);
+ if (!m)
+ return errno != 0 ? -errno : false;
+
+ if (path_equal(m->mnt_dir, mount))
+ return true;
+ }
+ return false;
+}
+
+int fstab_filter_options(const char *opts, const char *names,
+ const char **ret_namefound, char **ret_value, char **ret_filtered) {
+ const char *name, *namefound = NULL, *x;
+ _cleanup_strv_free_ char **stor = NULL;
+ _cleanup_free_ char *v = NULL, **strv = NULL;
+ int r;
+
+ assert(names && *names);
+
+ if (!opts)
+ goto answer;
+
+ /* If !ret_value and !ret_filtered, this function is not allowed to fail. */
+
+ if (!ret_filtered) {
+ for (const char *word = opts;;) {
+ const char *end = word;
+
+ /* Look for an *non-escaped* comma separator. Only commas can be escaped, so "\," is
+ * the only valid escape sequence, so we can do a very simple test here. */
+ for (;;) {
+ size_t n = strcspn(end, ",");
+
+ end += n;
+ if (n > 0 && end[-1] == '\\')
+ end++;
+ else
+ break;
+ }
+
+ NULSTR_FOREACH(name, names) {
+ if (end < word + strlen(name))
+ continue;
+ if (!strneq(word, name, strlen(name)))
+ continue;
+
+ /* We know that the string is NUL terminated, so *x is valid */
+ x = word + strlen(name);
+ if (IN_SET(*x, '\0', '=', ',')) {
+ namefound = name;
+ if (ret_value) {
+ bool eq = *x == '=';
+ assert(eq || IN_SET(*x, ',', '\0'));
+
+ r = free_and_strndup(&v,
+ eq ? x + 1 : NULL,
+ eq ? end - x - 1 : 0);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+ }
+ }
+
+ if (*end)
+ word = end + 1;
+ else
+ break;
+ }
+ } else {
+ r = strv_split_full(&stor, opts, ",", EXTRACT_DONT_COALESCE_SEPARATORS | EXTRACT_UNESCAPE_SEPARATORS);
+ if (r < 0)
+ return r;
+
+ strv = memdup(stor, sizeof(char*) * (strv_length(stor) + 1));
+ if (!strv)
+ return -ENOMEM;
+
+ char **t = strv;
+ for (char **s = strv; *s; s++) {
+ NULSTR_FOREACH(name, names) {
+ x = startswith(*s, name);
+ if (x && IN_SET(*x, '\0', '='))
+ goto found;
+ }
+
+ *t = *s;
+ t++;
+ continue;
+ found:
+ /* Keep the last occurrence found */
+ namefound = name;
+ if (ret_value) {
+ assert(IN_SET(*x, '=', '\0'));
+ r = free_and_strdup(&v, *x == '=' ? x + 1 : NULL);
+ if (r < 0)
+ return r;
+ }
+ }
+ *t = NULL;
+ }
+
+answer:
+ if (ret_namefound)
+ *ret_namefound = namefound;
+ if (ret_filtered) {
+ char *f;
+
+ f = strv_join_full(strv, ",", NULL, true);
+ if (!f)
+ return -ENOMEM;
+
+ *ret_filtered = f;
+ }
+ if (ret_value)
+ *ret_value = TAKE_PTR(v);
+
+ return !!namefound;
+}
+
+int fstab_extract_values(const char *opts, const char *name, char ***values) {
+ _cleanup_strv_free_ char **optsv = NULL, **res = NULL;
+ char **s;
+
+ assert(opts);
+ assert(name);
+ assert(values);
+
+ optsv = strv_split(opts, ",");
+ if (!optsv)
+ return -ENOMEM;
+
+ STRV_FOREACH(s, optsv) {
+ char *arg;
+ int r;
+
+ arg = startswith(*s, name);
+ if (!arg || *arg != '=')
+ continue;
+ r = strv_extend(&res, arg + 1);
+ if (r < 0)
+ return r;
+ }
+
+ *values = TAKE_PTR(res);
+
+ return !!*values;
+}
+
+int fstab_find_pri(const char *options, int *ret) {
+ _cleanup_free_ char *opt = NULL;
+ int r, pri;
+
+ assert(ret);
+
+ r = fstab_filter_options(options, "pri\0", NULL, &opt, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0 || !opt)
+ return 0;
+
+ r = safe_atoi(opt, &pri);
+ if (r < 0)
+ return r;
+
+ *ret = pri;
+ return 1;
+}
+
+static char *unquote(const char *s, const char* quotes) {
+ size_t l;
+ assert(s);
+
+ /* This is rather stupid, simply removes the heading and
+ * trailing quotes if there is one. Doesn't care about
+ * escaping or anything.
+ *
+ * DON'T USE THIS FOR NEW CODE ANYMORE! */
+
+ l = strlen(s);
+ if (l < 2)
+ return strdup(s);
+
+ if (strchr(quotes, s[0]) && s[l-1] == s[0])
+ return strndup(s+1, l-2);
+
+ return strdup(s);
+}
+
+static char *tag_to_udev_node(const char *tagvalue, const char *by) {
+ _cleanup_free_ char *t = NULL, *u = NULL;
+ size_t enc_len;
+
+ u = unquote(tagvalue, QUOTES);
+ if (!u)
+ return NULL;
+
+ enc_len = strlen(u) * 4 + 1;
+ t = new(char, enc_len);
+ if (!t)
+ return NULL;
+
+ if (encode_devnode_name(u, t, enc_len) < 0)
+ return NULL;
+
+ return strjoin("/dev/disk/by-", by, "/", t);
+}
+
+char *fstab_node_to_udev_node(const char *p) {
+ assert(p);
+
+ if (startswith(p, "LABEL="))
+ return tag_to_udev_node(p+6, "label");
+
+ if (startswith(p, "UUID="))
+ return tag_to_udev_node(p+5, "uuid");
+
+ if (startswith(p, "PARTUUID="))
+ return tag_to_udev_node(p+9, "partuuid");
+
+ if (startswith(p, "PARTLABEL="))
+ return tag_to_udev_node(p+10, "partlabel");
+
+ return strdup(p);
+}
diff --git a/src/shared/fstab-util.h b/src/shared/fstab-util.h
new file mode 100644
index 0000000..1a602cb
--- /dev/null
+++ b/src/shared/fstab-util.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "macro.h"
+
+bool fstab_is_extrinsic(const char *mount, const char *opts);
+int fstab_is_mount_point(const char *mount);
+int fstab_has_fstype(const char *fstype);
+
+int fstab_filter_options(const char *opts, const char *names, const char **namefound, char **value, char **filtered);
+
+int fstab_extract_values(const char *opts, const char *name, char ***values);
+
+static inline bool fstab_test_option(const char *opts, const char *names) {
+ return !!fstab_filter_options(opts, names, NULL, NULL, NULL);
+}
+
+int fstab_find_pri(const char *options, int *ret);
+
+static inline bool fstab_test_yes_no_option(const char *opts, const char *yes_no) {
+ const char *opt;
+
+ /* If first name given is last, return 1.
+ * If second name given is last or neither is found, return 0. */
+
+ assert_se(fstab_filter_options(opts, yes_no, &opt, NULL, NULL) >= 0);
+
+ return opt == yes_no;
+}
+
+char *fstab_node_to_udev_node(const char *p);
+
+static inline const char* fstab_path(void) {
+ return secure_getenv("SYSTEMD_FSTAB") ?: "/etc/fstab";
+}
diff --git a/src/shared/generate-ip-protocol-list.sh b/src/shared/generate-ip-protocol-list.sh
new file mode 100755
index 0000000..3f91979
--- /dev/null
+++ b/src/shared/generate-ip-protocol-list.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+set -eu
+
+$1 -dM -include netinet/in.h - </dev/null | \
+ awk '/^#define[ \t]+IPPROTO_[^ \t]+[ \t]+[^ \t]/ { print $2; }' | \
+ sed -e 's/IPPROTO_//'
diff --git a/src/shared/generate-syscall-list.py b/src/shared/generate-syscall-list.py
new file mode 100755
index 0000000..030c3fe
--- /dev/null
+++ b/src/shared/generate-syscall-list.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+import sys
+import os
+
+s390 = 's390' in os.uname().machine
+arm = 'arm' in os.uname().machine
+
+for line in open(sys.argv[1]):
+ if line.startswith('s390_') and not s390:
+ continue
+ if line.startswith('arm_') and not arm:
+ continue
+
+ print('"{}\\0"'.format(line.strip()))
diff --git a/src/shared/generator.c b/src/shared/generator.c
new file mode 100644
index 0000000..1eccc5a
--- /dev/null
+++ b/src/shared/generator.c
@@ -0,0 +1,631 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "dropin.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fstab-util.h"
+#include "generator.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-util.h"
+#include "special.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "unit-name.h"
+#include "util.h"
+
+int generator_open_unit_file(
+ const char *dest,
+ const char *source,
+ const char *name,
+ FILE **file) {
+
+ const char *unit;
+ FILE *f;
+ int r;
+
+ unit = prefix_roota(dest, name);
+
+ r = fopen_unlocked(unit, "wxe", &f);
+ if (r < 0) {
+ if (source && r == -EEXIST)
+ return log_error_errno(r,
+ "Failed to create unit file %s, as it already exists. Duplicate entry in %s?",
+ unit, source);
+ else
+ return log_error_errno(r,
+ "Failed to create unit file %s: %m",
+ unit);
+ }
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n",
+ program_invocation_short_name);
+
+ *file = f;
+ return 0;
+}
+
+int generator_add_symlink(const char *dir, const char *dst, const char *dep_type, const char *src) {
+ /* Adds a symlink from <dst>.<dep_type>/ to <src> (if src is absolute)
+ * or ../<src> (otherwise). */
+
+ const char *from, *to;
+
+ from = path_is_absolute(src) ? src : strjoina("../", src);
+ to = strjoina(dir, "/", dst, ".", dep_type, "/", basename(src));
+
+ mkdir_parents_label(to, 0755);
+ if (symlink(from, to) < 0)
+ if (errno != EEXIST)
+ return log_error_errno(errno, "Failed to create symlink \"%s\": %m", to);
+
+ return 0;
+}
+
+static int write_fsck_sysroot_service(const char *dir, const char *what) {
+ _cleanup_free_ char *device = NULL, *escaped = NULL, *escaped2 = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit;
+ int r;
+
+ escaped = specifier_escape(what);
+ if (!escaped)
+ return log_oom();
+
+ escaped2 = cescape(escaped);
+ if (!escaped2)
+ return log_oom();
+
+ unit = strjoina(dir, "/"SPECIAL_FSCK_ROOT_SERVICE);
+ log_debug("Creating %s", unit);
+
+ r = unit_name_from_path(what, ".device", &device);
+ if (r < 0)
+ return log_error_errno(r, "Failed to convert device \"%s\" to unit name: %m", what);
+
+ f = fopen(unit, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m", unit);
+
+ fprintf(f,
+ "# Automatically generated by %1$s\n\n"
+ "[Unit]\n"
+ "Description=File System Check on %2$s\n"
+ "Documentation=man:systemd-fsck-root.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%3$s\n"
+ "Conflicts=shutdown.target\n"
+ "After=initrd-root-device.target local-fs-pre.target %3$s\n"
+ "Before=shutdown.target\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart=" SYSTEMD_FSCK_PATH " %4$s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ escaped,
+ device,
+ escaped2);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit);
+
+ return 0;
+}
+
+int generator_write_fsck_deps(
+ FILE *f,
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *fstype) {
+
+ int r;
+
+ assert(f);
+ assert(dir);
+ assert(what);
+ assert(where);
+
+ if (!is_device_path(what)) {
+ log_warning("Checking was requested for \"%s\", but it is not a device.", what);
+ return 0;
+ }
+
+ if (!isempty(fstype) && !streq(fstype, "auto")) {
+ r = fsck_exists(fstype);
+ if (r < 0)
+ log_warning_errno(r, "Checking was requested for %s, but couldn't detect if fsck.%s may be used, proceeding: %m", what, fstype);
+ else if (r == 0) {
+ /* treat missing check as essentially OK */
+ log_debug("Checking was requested for %s, but fsck.%s does not exist.", what, fstype);
+ return 0;
+ }
+ }
+
+ if (path_equal(where, "/")) {
+ const char *lnk;
+
+ lnk = strjoina(dir, "/" SPECIAL_LOCAL_FS_TARGET ".wants/" SPECIAL_FSCK_ROOT_SERVICE);
+
+ (void) mkdir_parents(lnk, 0755);
+ if (symlink(SYSTEM_DATA_UNIT_PATH "/" SPECIAL_FSCK_ROOT_SERVICE, lnk) < 0)
+ return log_error_errno(errno, "Failed to create symlink %s: %m", lnk);
+
+ } else {
+ _cleanup_free_ char *_fsck = NULL;
+ const char *fsck, *dep;
+
+ if (in_initrd() && path_equal(where, "/sysroot")) {
+ r = write_fsck_sysroot_service(dir, what);
+ if (r < 0)
+ return r;
+
+ fsck = SPECIAL_FSCK_ROOT_SERVICE;
+ dep = "Requires";
+ } else {
+ /* When this is /usr, then let's add a Wants= dependency, otherwise a Requires=
+ * dependency. Why? We can't possibly unmount /usr during shutdown, but if we have a
+ * Requires= from /usr onto a fsck@.service unit and that unit is shut down, then
+ * we'd have to unmount /usr too. */
+
+ dep = !in_initrd() && path_equal(where, "/usr") ? "Wants" : "Requires";
+
+ r = unit_name_from_path_instance("systemd-fsck", what, ".service", &_fsck);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create fsck service name: %m");
+
+ fsck = _fsck;
+ }
+
+ fprintf(f,
+ "%1$s=%2$s\n"
+ "After=%2$s\n",
+ dep, fsck);
+ }
+
+ return 0;
+}
+
+int generator_write_timeouts(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts,
+ char **filtered) {
+
+ /* Configure how long we wait for a device that backs a mount point or a
+ * swap partition to show up. This is useful to support endless device timeouts
+ * for devices that show up only after user input, like crypto devices. */
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *timeout = NULL;
+ usec_t u;
+ int r;
+
+ r = fstab_filter_options(opts, "comment=systemd.device-timeout\0"
+ "x-systemd.device-timeout\0",
+ NULL, &timeout, filtered);
+ if (r <= 0)
+ return r;
+
+ r = parse_sec_fix_0(timeout, &u);
+ if (r < 0) {
+ log_warning("Failed to parse timeout for %s, ignoring: %s", where, timeout);
+ return 0;
+ }
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+ if (!is_device_path(node)) {
+ log_warning("x-systemd.device-timeout ignored for %s", what);
+ return 0;
+ }
+
+ r = unit_name_from_path(node, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path: %m");
+
+ return write_drop_in_format(dir, unit, 50, "device-timeout",
+ "# Automatically generated by %s\n"
+ "# from supplied options \"%s\"\n\n"
+ "[Unit]\n"
+ "JobRunningTimeoutSec=%s",
+ program_invocation_short_name,
+ opts,
+ timeout);
+}
+
+int generator_write_device_deps(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts) {
+
+ /* fstab records that specify _netdev option should apply the network
+ * ordering on the actual device depending on network connection. If we
+ * are not mounting real device (NFS, CIFS), we rely on _netdev effect
+ * on the mount unit itself. */
+
+ _cleanup_free_ char *node = NULL, *unit = NULL;
+ int r;
+
+ if (fstab_is_extrinsic(where, opts))
+ return 0;
+
+ if (!fstab_test_option(opts, "_netdev\0"))
+ return 0;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to apply dependencies to. */
+ if (!is_device_path(node))
+ return 0;
+
+ r = unit_name_from_path(node, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ node);
+
+ /* See mount_add_default_dependencies for explanation why we create such
+ * dependencies. */
+ return write_drop_in_format(dir, unit, 50, "netdev-dependencies",
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "After=" SPECIAL_NETWORK_ONLINE_TARGET " " SPECIAL_NETWORK_TARGET "\n"
+ "Wants=" SPECIAL_NETWORK_ONLINE_TARGET "\n",
+ program_invocation_short_name);
+}
+
+int generator_write_initrd_root_device_deps(const char *dir, const char *what) {
+ _cleanup_free_ char *unit = NULL;
+ int r;
+
+ r = unit_name_from_path(what, ".device", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ what);
+
+ return write_drop_in_format(dir, SPECIAL_INITRD_ROOT_DEVICE_TARGET, 50, "root-device",
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Requires=%s\n"
+ "After=%s",
+ program_invocation_short_name,
+ unit,
+ unit);
+}
+
+int generator_hook_up_mkswap(
+ const char *dir,
+ const char *what) {
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to work on. */
+ if (!is_device_path(node))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format something that is not a device node: %s",
+ node);
+
+ r = unit_name_from_path_instance("systemd-mkswap", node, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ node);
+
+ unit_file = prefix_roota(dir, unit);
+ log_debug("Creating %s", unit_file);
+
+ escaped = cescape(node);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path(what, ".swap", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ what);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Make Swap on %%f\n"
+ "Documentation=man:systemd-mkswap@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.device\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.device\n"
+ "Before=shutdown.target %s\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_MAKEFS_PATH " swap %s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ where_unit,
+ escaped);
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit_file);
+
+ return generator_add_symlink(dir, where_unit, "requires", unit);
+}
+
+int generator_hook_up_mkfs(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type) {
+
+ _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ node = fstab_node_to_udev_node(what);
+ if (!node)
+ return log_oom();
+
+ /* Nothing to work on. */
+ if (!is_device_path(node))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format something that is not a device node: %s",
+ node);
+
+ if (!type || streq(type, "auto"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Cannot format partition %s, filesystem type is not specified",
+ node);
+
+ r = unit_name_from_path_instance("systemd-makefs", node, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ node);
+
+ unit_file = prefix_roota(dir, unit);
+ log_debug("Creating %s", unit_file);
+
+ escaped = cescape(node);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path(where, ".mount", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ where);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Make File System on %%f\n"
+ "Documentation=man:systemd-makefs@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.device\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.device\n"
+ /* fsck might or might not be used, so let's be safe and order
+ * ourselves before both systemd-fsck@.service and the mount unit. */
+ "Before=shutdown.target systemd-fsck@%%i.service %s\n"
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_MAKEFS_PATH " %s %s\n"
+ "TimeoutSec=0\n",
+ program_invocation_short_name,
+ where_unit,
+ type,
+ escaped);
+ // XXX: what about local-fs-pre.target?
+
+ r = fflush_and_check(f);
+ if (r < 0)
+ return log_error_errno(r, "Failed to write unit file %s: %m", unit_file);
+
+ return generator_add_symlink(dir, where_unit, "requires", unit);
+}
+
+int generator_hook_up_growfs(
+ const char *dir,
+ const char *where,
+ const char *target) {
+
+ _cleanup_free_ char *unit = NULL, *escaped = NULL, *where_unit = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ const char *unit_file;
+ int r;
+
+ escaped = cescape(where);
+ if (!escaped)
+ return log_oom();
+
+ r = unit_name_from_path_instance("systemd-growfs", where, ".service", &unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m",
+ where);
+
+ r = unit_name_from_path(where, ".mount", &where_unit);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make unit name from path \"%s\": %m",
+ where);
+
+ unit_file = prefix_roota(dir, unit);
+ log_debug("Creating %s", unit_file);
+
+ f = fopen(unit_file, "wxe");
+ if (!f)
+ return log_error_errno(errno, "Failed to create unit file %s: %m",
+ unit_file);
+
+ fprintf(f,
+ "# Automatically generated by %s\n\n"
+ "[Unit]\n"
+ "Description=Grow File System on %%f\n"
+ "Documentation=man:systemd-growfs@.service(8)\n"
+ "DefaultDependencies=no\n"
+ "BindsTo=%%i.mount\n"
+ "Conflicts=shutdown.target\n"
+ "After=%%i.mount\n"
+ "Before=shutdown.target %s\n",
+ program_invocation_short_name,
+ target);
+
+ if (empty_or_root(where)) /* Make sure the root fs is actually writable before we resize it */
+ fprintf(f,
+ "After=systemd-remount-fs.service\n");
+
+ fprintf(f,
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "ExecStart="SYSTEMD_GROWFS_PATH " %s\n"
+ "TimeoutSec=0\n",
+ escaped);
+
+ return generator_add_symlink(dir, where_unit, "wants", unit);
+}
+
+int generator_enable_remount_fs_service(const char *dir) {
+ /* Pull in systemd-remount-fs.service */
+ return generator_add_symlink(dir, SPECIAL_LOCAL_FS_TARGET, "wants",
+ SYSTEM_DATA_UNIT_PATH "/" SPECIAL_REMOUNT_FS_SERVICE);
+}
+
+int generator_write_blockdev_dependency(
+ FILE *f,
+ const char *what) {
+
+ _cleanup_free_ char *escaped = NULL;
+ int r;
+
+ assert(f);
+ assert(what);
+
+ if (!path_startswith(what, "/dev/"))
+ return 0;
+
+ r = unit_name_path_escape(what, &escaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to escape device node path %s: %m", what);
+
+ fprintf(f,
+ "After=blockdev@%s.target\n",
+ escaped);
+
+ return 0;
+}
+
+int generator_write_cryptsetup_unit_section(
+ FILE *f,
+ const char *source) {
+
+ assert(f);
+
+ fprintf(f,
+ "[Unit]\n"
+ "Description=Cryptography Setup for %%I\n"
+ "Documentation=man:crypttab(5) man:systemd-cryptsetup-generator(8) man:systemd-cryptsetup@.service(8)\n");
+
+ if (source)
+ fprintf(f, "SourcePath=%s\n", source);
+
+ fprintf(f,
+ "DefaultDependencies=no\n"
+ "IgnoreOnIsolate=true\n"
+ "After=cryptsetup-pre.target systemd-udevd-kernel.socket\n"
+ "Before=blockdev@dev-mapper-%%i.target\n"
+ "Wants=blockdev@dev-mapper-%%i.target\n");
+
+ return 0;
+}
+
+int generator_write_cryptsetup_service_section(
+ FILE *f,
+ const char *name,
+ const char *what,
+ const char *password,
+ const char *options) {
+
+ _cleanup_free_ char *name_escaped = NULL, *what_escaped = NULL, *password_escaped = NULL, *options_escaped = NULL;
+
+ assert(f);
+ assert(name);
+ assert(what);
+
+ name_escaped = specifier_escape(name);
+ if (!name_escaped)
+ return log_oom();
+
+ what_escaped = specifier_escape(what);
+ if (!what_escaped)
+ return log_oom();
+
+ if (password) {
+ password_escaped = specifier_escape(password);
+ if (!password_escaped)
+ return log_oom();
+ }
+
+ if (options) {
+ options_escaped = specifier_escape(options);
+ if (!options_escaped)
+ return log_oom();
+ }
+
+ fprintf(f,
+ "\n"
+ "[Service]\n"
+ "Type=oneshot\n"
+ "RemainAfterExit=yes\n"
+ "TimeoutSec=0\n" /* The binary handles timeouts on its own */
+ "KeyringMode=shared\n" /* Make sure we can share cached keys among instances */
+ "OOMScoreAdjust=500\n" /* Unlocking can allocate a lot of memory if Argon2 is used */
+ "ExecStart=" SYSTEMD_CRYPTSETUP_PATH " attach '%s' '%s' '%s' '%s'\n"
+ "ExecStop=" SYSTEMD_CRYPTSETUP_PATH " detach '%s'\n",
+ name_escaped, what_escaped, strempty(password_escaped), strempty(options_escaped),
+ name_escaped);
+
+ return 0;
+}
+
+void log_setup_generator(void) {
+ /* Disable talking to syslog/journal (i.e. the two IPC-based loggers) if we run in system context. */
+ if (cg_pid_get_owner_uid(0, NULL) == -ENXIO /* not running in a per-user slice */)
+ log_set_prohibit_ipc(true);
+
+ log_set_target(LOG_TARGET_JOURNAL_OR_KMSG); /* This effectively means: journal for per-user generators, kmsg otherwise */
+ log_parse_environment();
+ (void) log_open();
+}
diff --git a/src/shared/generator.h b/src/shared/generator.h
new file mode 100644
index 0000000..ff6072f
--- /dev/null
+++ b/src/shared/generator.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+#include "main-func.h"
+
+int generator_open_unit_file(
+ const char *dest,
+ const char *source,
+ const char *name,
+ FILE **file);
+
+int generator_add_symlink(const char *dir, const char *dst, const char *dep_type, const char *src);
+
+int generator_write_fsck_deps(
+ FILE *f,
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type);
+
+int generator_write_timeouts(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts,
+ char **filtered);
+
+int generator_write_blockdev_dependency(
+ FILE *f,
+ const char *what);
+
+int generator_write_cryptsetup_unit_section(
+ FILE *f,
+ const char *source);
+
+int generator_write_cryptsetup_service_section(
+ FILE *f,
+ const char *name,
+ const char *what,
+ const char *password,
+ const char *options);
+
+int generator_write_device_deps(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *opts);
+
+int generator_write_initrd_root_device_deps(
+ const char *dir,
+ const char *what);
+
+int generator_hook_up_mkswap(
+ const char *dir,
+ const char *what);
+int generator_hook_up_mkfs(
+ const char *dir,
+ const char *what,
+ const char *where,
+ const char *type);
+int generator_hook_up_growfs(
+ const char *dir,
+ const char *where,
+ const char *target);
+
+int generator_enable_remount_fs_service(const char *dir);
+
+void log_setup_generator(void);
+
+/* Similar to DEFINE_MAIN_FUNCTION, but initializes logging and assigns positional arguments. */
+#define DEFINE_MAIN_GENERATOR_FUNCTION(impl) \
+ _DEFINE_MAIN_FUNCTION( \
+ ({ \
+ log_setup_generator(); \
+ if (argc > 1 && argc != 4) \
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), \
+ "This program takes zero or three arguments."); \
+ }), \
+ impl(argc > 1 ? argv[1] : "/tmp", \
+ argc > 1 ? argv[2] : "/tmp", \
+ argc > 1 ? argv[3] : "/tmp"), \
+ r < 0 ? EXIT_FAILURE : EXIT_SUCCESS)
diff --git a/src/shared/geneve-util.c b/src/shared/geneve-util.c
new file mode 100644
index 0000000..36ef9c8
--- /dev/null
+++ b/src/shared/geneve-util.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "geneve-util.h"
+#include "string-table.h"
+
+static const char* const geneve_df_table[_NETDEV_GENEVE_DF_MAX] = {
+ [NETDEV_GENEVE_DF_UNSET] = "unset",
+ [NETDEV_GENEVE_DF_SET] = "set",
+ [NETDEV_GENEVE_DF_INHERIT] = "inherit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(geneve_df, GeneveDF);
diff --git a/src/shared/geneve-util.h b/src/shared/geneve-util.h
new file mode 100644
index 0000000..3865f80
--- /dev/null
+++ b/src/shared/geneve-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/if_link.h>
+
+#include "conf-parser.h"
+
+typedef enum GeneveDF {
+ NETDEV_GENEVE_DF_UNSET = GENEVE_DF_UNSET,
+ NETDEV_GENEVE_DF_SET = GENEVE_DF_SET,
+ NETDEV_GENEVE_DF_INHERIT = GENEVE_DF_INHERIT,
+ _NETDEV_GENEVE_DF_MAX,
+ _NETDEV_GENEVE_DF_INVALID = -1,
+} GeneveDF;
+
+const char *geneve_df_to_string(GeneveDF d) _const_;
+GeneveDF geneve_df_from_string(const char *d) _pure_;
diff --git a/src/shared/gpt.c b/src/shared/gpt.c
new file mode 100644
index 0000000..15ea2f0
--- /dev/null
+++ b/src/shared/gpt.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "gpt.h"
+#include "string-util.h"
+
+const GptPartitionType gpt_partition_type_table[] = {
+ { GPT_ROOT_X86, "root-x86" },
+ { GPT_ROOT_X86_VERITY, "root-x86-verity" },
+ { GPT_ROOT_X86_64, "root-x86-64" },
+ { GPT_ROOT_X86_64_VERITY, "root-x86-64-verity" },
+ { GPT_ROOT_ARM, "root-arm" },
+ { GPT_ROOT_ARM_VERITY, "root-arm-verity" },
+ { GPT_ROOT_ARM_64, "root-arm64" },
+ { GPT_ROOT_ARM_64_VERITY, "root-arm64-verity" },
+ { GPT_ROOT_IA64, "root-ia64" },
+ { GPT_ROOT_IA64_VERITY, "root-ia64-verity" },
+ { GPT_ROOT_RISCV32, "root-riscv32" },
+ { GPT_ROOT_RISCV32_VERITY, "root-riscv32-verity" },
+ { GPT_ROOT_RISCV64, "root-riscv64" },
+ { GPT_ROOT_RISCV64_VERITY, "root-riscv64-verity" },
+#ifdef GPT_ROOT_NATIVE
+ { GPT_ROOT_NATIVE, "root" },
+ { GPT_ROOT_NATIVE_VERITY, "root-verity" },
+#endif
+#ifdef GPT_ROOT_SECONDARY
+ { GPT_ROOT_SECONDARY, "root-secondary" },
+ { GPT_ROOT_SECONDARY_VERITY, "root-secondary-verity" },
+#endif
+ { GPT_USR_X86, "usr-x86" },
+ { GPT_USR_X86_VERITY, "usr-x86-verity" },
+ { GPT_USR_X86_64, "usr-x86-64" },
+ { GPT_USR_X86_64_VERITY, "usr-x86-64-verity" },
+ { GPT_USR_ARM, "usr-arm" },
+ { GPT_USR_ARM_VERITY, "usr-arm-verity" },
+ { GPT_USR_ARM_64, "usr-arm64" },
+ { GPT_USR_ARM_64_VERITY, "usr-arm64-verity" },
+ { GPT_USR_IA64, "usr-ia64" },
+ { GPT_USR_IA64_VERITY, "usr-ia64-verity" },
+ { GPT_USR_RISCV32, "usr-riscv32" },
+ { GPT_USR_RISCV32_VERITY, "usr-riscv32-verity" },
+ { GPT_USR_RISCV64, "usr-riscv64" },
+ { GPT_USR_RISCV64_VERITY, "usr-riscv64-verity" },
+#ifdef GPT_USR_NATIVE
+ { GPT_USR_NATIVE, "usr" },
+ { GPT_USR_NATIVE_VERITY, "usr-verity" },
+#endif
+#ifdef GPT_USR_SECONDARY
+ { GPT_USR_SECONDARY, "usr-secondary" },
+ { GPT_USR_SECONDARY_VERITY, "usr-secondary-verity" },
+#endif
+ { GPT_ESP, "esp" },
+ { GPT_XBOOTLDR, "xbootldr" },
+ { GPT_SWAP, "swap" },
+ { GPT_HOME, "home" },
+ { GPT_SRV, "srv" },
+ { GPT_VAR, "var" },
+ { GPT_TMP, "tmp" },
+ { GPT_USER_HOME, "user-home" },
+ { GPT_LINUX_GENERIC, "linux-generic" },
+ {}
+};
+
+const char *gpt_partition_type_uuid_to_string(sd_id128_t id) {
+ for (size_t i = 0; i < ELEMENTSOF(gpt_partition_type_table) - 1; i++)
+ if (sd_id128_equal(id, gpt_partition_type_table[i].uuid))
+ return gpt_partition_type_table[i].name;
+
+ return NULL;
+}
+
+const char *gpt_partition_type_uuid_to_string_harder(
+ sd_id128_t id,
+ char buffer[static ID128_UUID_STRING_MAX]) {
+
+ const char *s;
+
+ assert(buffer);
+
+ s = gpt_partition_type_uuid_to_string(id);
+ if (s)
+ return s;
+
+ return id128_to_uuid_string(id, buffer);
+}
+
+int gpt_partition_type_uuid_from_string(const char *s, sd_id128_t *ret) {
+ assert(s);
+ assert(ret);
+
+ for (size_t i = 0; i < ELEMENTSOF(gpt_partition_type_table) - 1; i++)
+ if (streq(s, gpt_partition_type_table[i].name)) {
+ *ret = gpt_partition_type_table[i].uuid;
+ return 0;
+ }
+
+ return sd_id128_from_string(s, ret);
+}
diff --git a/src/shared/gpt.h b/src/shared/gpt.h
new file mode 100644
index 0000000..241ff03
--- /dev/null
+++ b/src/shared/gpt.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <endian.h>
+
+#include "sd-id128.h"
+
+#include "id128-util.h"
+
+/* We only support root disk discovery for x86, x86-64, Itanium and ARM for now, since EFI for anything else
+ * doesn't really exist, and we only care for root partitions on the same disk as the EFI ESP. */
+
+#define GPT_ROOT_X86 SD_ID128_MAKE(44,47,95,40,f2,97,41,b2,9a,f7,d1,31,d5,f0,45,8a)
+#define GPT_ROOT_X86_64 SD_ID128_MAKE(4f,68,bc,e3,e8,cd,4d,b1,96,e7,fb,ca,f9,84,b7,09)
+#define GPT_ROOT_ARM SD_ID128_MAKE(69,da,d7,10,2c,e4,4e,3c,b1,6c,21,a1,d4,9a,be,d3)
+#define GPT_ROOT_ARM_64 SD_ID128_MAKE(b9,21,b0,45,1d,f0,41,c3,af,44,4c,6f,28,0d,3f,ae)
+#define GPT_ROOT_IA64 SD_ID128_MAKE(99,3d,8d,3d,f8,0e,42,25,85,5a,9d,af,8e,d7,ea,97)
+#define GPT_ROOT_RISCV32 SD_ID128_MAKE(60,d5,a7,fe,8e,7d,43,5c,b7,14,3d,d8,16,21,44,e1)
+#define GPT_ROOT_RISCV64 SD_ID128_MAKE(72,ec,70,a6,cf,74,40,e6,bd,49,4b,da,08,e8,f2,24)
+#define GPT_USR_X86 SD_ID128_MAKE(75,25,0d,76,8c,c6,45,8e,bd,66,bd,47,cc,81,a8,12)
+#define GPT_USR_X86_64 SD_ID128_MAKE(84,84,68,0c,95,21,48,c6,9c,11,b0,72,06,56,f6,9e)
+#define GPT_USR_ARM SD_ID128_MAKE(7d,03,59,a3,02,b3,4f,0a,86,5c,65,44,03,e7,06,25)
+#define GPT_USR_ARM_64 SD_ID128_MAKE(b0,e0,10,50,ee,5f,43,90,94,9a,91,01,b1,71,04,e9)
+#define GPT_USR_IA64 SD_ID128_MAKE(43,01,d2,a6,4e,3b,4b,2a,bb,94,9e,0b,2c,42,25,ea)
+#define GPT_USR_RISCV32 SD_ID128_MAKE(b9,33,fb,22,5c,3f,4f,91,af,90,e2,bb,0f,a5,07,02)
+#define GPT_USR_RISCV64 SD_ID128_MAKE(be,ae,c3,4b,84,42,43,9b,a4,0b,98,43,81,ed,09,7d)
+#define GPT_ESP SD_ID128_MAKE(c1,2a,73,28,f8,1f,11,d2,ba,4b,00,a0,c9,3e,c9,3b)
+#define GPT_XBOOTLDR SD_ID128_MAKE(bc,13,c2,ff,59,e6,42,62,a3,52,b2,75,fd,6f,71,72)
+#define GPT_SWAP SD_ID128_MAKE(06,57,fd,6d,a4,ab,43,c4,84,e5,09,33,c8,4b,4f,4f)
+#define GPT_HOME SD_ID128_MAKE(93,3a,c7,e1,2e,b4,4f,13,b8,44,0e,14,e2,ae,f9,15)
+#define GPT_SRV SD_ID128_MAKE(3b,8f,84,25,20,e0,4f,3b,90,7f,1a,25,a7,6f,98,e8)
+#define GPT_VAR SD_ID128_MAKE(4d,21,b0,16,b5,34,45,c2,a9,fb,5c,16,e0,91,fd,2d)
+#define GPT_TMP SD_ID128_MAKE(7e,c6,f5,57,3b,c5,4a,ca,b2,93,16,ef,5d,f6,39,d1)
+#define GPT_USER_HOME SD_ID128_MAKE(77,3f,91,ef,66,d4,49,b5,bd,83,d6,83,bf,40,ad,16)
+#define GPT_LINUX_GENERIC SD_ID128_MAKE(0f,c6,3d,af,84,83,47,72,8e,79,3d,69,d8,47,7d,e4)
+
+/* Verity partitions for the root partitions above (we only define them for the root and /usr partitions,
+ * because only they are are commonly read-only and hence suitable for verity). */
+#define GPT_ROOT_X86_VERITY SD_ID128_MAKE(d1,3c,5d,3b,b5,d1,42,2a,b2,9f,94,54,fd,c8,9d,76)
+#define GPT_ROOT_X86_64_VERITY SD_ID128_MAKE(2c,73,57,ed,eb,d2,46,d9,ae,c1,23,d4,37,ec,2b,f5)
+#define GPT_ROOT_ARM_VERITY SD_ID128_MAKE(73,86,cd,f2,20,3c,47,a9,a4,98,f2,ec,ce,45,a2,d6)
+#define GPT_ROOT_ARM_64_VERITY SD_ID128_MAKE(df,33,00,ce,d6,9f,4c,92,97,8c,9b,fb,0f,38,d8,20)
+#define GPT_ROOT_IA64_VERITY SD_ID128_MAKE(86,ed,10,d5,b6,07,45,bb,89,57,d3,50,f2,3d,05,71)
+#define GPT_ROOT_RISCV32_VERITY SD_ID128_MAKE(ae,02,53,be,11,67,40,07,ac,68,43,92,6c,14,c5,de)
+#define GPT_ROOT_RISCV64_VERITY SD_ID128_MAKE(b6,ed,55,82,44,0b,42,09,b8,da,5f,f7,c4,19,ea,3d)
+#define GPT_USR_X86_VERITY SD_ID128_MAKE(8f,46,1b,0d,14,ee,4e,81,9a,a9,04,9b,6f,b9,7a,bd)
+#define GPT_USR_X86_64_VERITY SD_ID128_MAKE(77,ff,5f,63,e7,b6,46,33,ac,f4,15,65,b8,64,c0,e6)
+#define GPT_USR_ARM_VERITY SD_ID128_MAKE(c2,15,d7,51,7b,cd,46,49,be,90,66,27,49,0a,4c,05)
+#define GPT_USR_ARM_64_VERITY SD_ID128_MAKE(6e,11,a4,e7,fb,ca,4d,ed,b9,e9,e1,a5,12,bb,66,4e)
+#define GPT_USR_IA64_VERITY SD_ID128_MAKE(6a,49,1e,03,3b,e7,45,45,8e,38,83,32,0e,0e,a8,80)
+#define GPT_USR_RISCV32_VERITY SD_ID128_MAKE(cb,1e,e4,e3,8c,d0,41,36,a0,a4,aa,61,a3,2e,87,30)
+#define GPT_USR_RISCV64_VERITY SD_ID128_MAKE(8f,10,56,be,9b,05,47,c4,81,d6,be,53,12,8e,5b,54)
+
+#if defined(__x86_64__)
+# define GPT_ROOT_NATIVE GPT_ROOT_X86_64
+# define GPT_ROOT_SECONDARY GPT_ROOT_X86
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_64_VERITY
+# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_X86_VERITY
+# define GPT_USR_NATIVE GPT_USR_X86_64
+# define GPT_USR_SECONDARY GPT_USR_X86
+# define GPT_USR_NATIVE_VERITY GPT_USR_X86_64_VERITY
+# define GPT_USR_SECONDARY_VERITY GPT_USR_X86_VERITY
+#elif defined(__i386__)
+# define GPT_ROOT_NATIVE GPT_ROOT_X86
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_VERITY
+# define GPT_USR_NATIVE GPT_USR_X86
+# define GPT_USR_NATIVE_VERITY GPT_USR_X86_VERITY
+#endif
+
+#if defined(__ia64__)
+# define GPT_ROOT_NATIVE GPT_ROOT_IA64
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_IA64_VERITY
+# define GPT_USR_NATIVE GPT_USR_IA64
+# define GPT_USR_NATIVE_VERITY GPT_USR_IA64_VERITY
+#endif
+
+#if defined(__aarch64__) && (__BYTE_ORDER != __BIG_ENDIAN)
+# define GPT_ROOT_NATIVE GPT_ROOT_ARM_64
+# define GPT_ROOT_SECONDARY GPT_ROOT_ARM
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_64_VERITY
+# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_ARM_VERITY
+# define GPT_USR_NATIVE GPT_USR_ARM_64
+# define GPT_USR_SECONDARY GPT_USR_ARM
+# define GPT_USR_NATIVE_VERITY GPT_USR_ARM_64_VERITY
+# define GPT_USR_SECONDARY_VERITY GPT_USR_ARM_VERITY
+#elif defined(__arm__) && (__BYTE_ORDER != __BIG_ENDIAN)
+# define GPT_ROOT_NATIVE GPT_ROOT_ARM
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_VERITY
+# define GPT_USR_NATIVE GPT_USR_ARM
+# define GPT_USR_NATIVE_VERITY GPT_USR_ARM_VERITY
+#endif
+
+#if defined(__riscv)
+#if (__riscv_xlen == 32)
+# define GPT_ROOT_NATIVE GPT_ROOT_RISCV32
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_RISCV32_VERITY
+# define GPT_USR_NATIVE GPT_USR_RISCV32
+# define GPT_USR_NATIVE_VERITY GPT_USR_RISCV32_VERITY
+#elif (__riscv_xlen == 64)
+# define GPT_ROOT_NATIVE GPT_ROOT_RISCV64
+# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_RISCV64_VERITY
+# define GPT_USR_NATIVE GPT_USR_RISCV64
+# define GPT_USR_NATIVE_VERITY GPT_USR_RISCV64_VERITY
+#endif
+#endif
+
+#define GPT_FLAG_REQUIRED_PARTITION (1ULL << 0)
+#define GPT_FLAG_NO_BLOCK_IO_PROTOCOL (1ULL << 1)
+#define GPT_FLAG_LEGACY_BIOS_BOOTABLE (1ULL << 2)
+
+/* Flags we recognize on the root, usr, xbootldr, swap, home, srv, var, tmp partitions when doing
+ * auto-discovery. These happen to be identical to what Microsoft defines for its own Basic Data Partitions,
+ * but that's just because we saw no point in defining any other values here. */
+#define GPT_FLAG_READ_ONLY (1ULL << 60)
+#define GPT_FLAG_NO_AUTO (1ULL << 63)
+
+const char *gpt_partition_type_uuid_to_string(sd_id128_t id);
+const char *gpt_partition_type_uuid_to_string_harder(
+ sd_id128_t id,
+ char buffer[static ID128_UUID_STRING_MAX]);
+int gpt_partition_type_uuid_from_string(const char *s, sd_id128_t *ret);
+
+typedef struct GptPartitionType {
+ sd_id128_t uuid;
+ const char *name;
+} GptPartitionType;
+
+extern const GptPartitionType gpt_partition_type_table[];
diff --git a/src/shared/group-record.c b/src/shared/group-record.c
new file mode 100644
index 0000000..da3ed0a
--- /dev/null
+++ b/src/shared/group-record.c
@@ -0,0 +1,348 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "group-record.h"
+#include "strv.h"
+#include "user-util.h"
+
+GroupRecord* group_record_new(void) {
+ GroupRecord *h;
+
+ h = new(GroupRecord, 1);
+ if (!h)
+ return NULL;
+
+ *h = (GroupRecord) {
+ .n_ref = 1,
+ .disposition = _USER_DISPOSITION_INVALID,
+ .last_change_usec = UINT64_MAX,
+ .gid = GID_INVALID,
+ };
+
+ return h;
+}
+
+static GroupRecord *group_record_free(GroupRecord *g) {
+ if (!g)
+ return NULL;
+
+ free(g->group_name);
+ free(g->realm);
+ free(g->group_name_and_realm_auto);
+ free(g->description);
+
+ strv_free(g->members);
+ free(g->service);
+ strv_free(g->administrators);
+ strv_free_erase(g->hashed_password);
+
+ json_variant_unref(g->json);
+
+ return mfree(g);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(GroupRecord, group_record, group_record_free);
+
+static int dispatch_privileged(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch privileged_dispatch_table[] = {
+ { "hashedPassword", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(GroupRecord, hashed_password), JSON_SAFE },
+ {},
+ };
+
+ return json_dispatch(variant, privileged_dispatch_table, NULL, flags, userdata);
+}
+
+static int dispatch_binding(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch binding_dispatch_table[] = {
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(GroupRecord, gid), 0 },
+ {},
+ };
+
+ char smid[SD_ID128_STRING_MAX];
+ JsonVariant *m;
+ sd_id128_t mid;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_object(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name));
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to determine machine ID: %m");
+
+ m = json_variant_by_key(variant, sd_id128_to_string(mid, smid));
+ if (!m)
+ return 0;
+
+ return json_dispatch(m, binding_dispatch_table, NULL, flags, userdata);
+}
+
+static int dispatch_per_machine(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch per_machine_dispatch_table[] = {
+ { "matchMachineId", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 },
+ { "matchHostname", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 },
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(GroupRecord, gid), 0 },
+ { "members", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, members), JSON_RELAX},
+ { "administrators", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, administrators), JSON_RELAX},
+ {},
+ };
+
+ JsonVariant *e;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ bool matching = false;
+ JsonVariant *m;
+
+ if (!json_variant_is_object(e))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name));
+
+ m = json_variant_by_key(e, "matchMachineId");
+ if (m) {
+ r = per_machine_id_match(m, flags);
+ if (r < 0)
+ return r;
+
+ matching = r > 0;
+ }
+
+ if (!matching) {
+ m = json_variant_by_key(e, "matchHostname");
+ if (m) {
+ r = per_machine_hostname_match(m, flags);
+ if (r < 0)
+ return r;
+
+ matching = r > 0;
+ }
+ }
+
+ if (!matching)
+ continue;
+
+ r = json_dispatch(e, per_machine_dispatch_table, NULL, flags, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dispatch_status(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch status_dispatch_table[] = {
+ { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(GroupRecord, service), JSON_SAFE },
+ {},
+ };
+
+ char smid[SD_ID128_STRING_MAX];
+ JsonVariant *m;
+ sd_id128_t mid;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_object(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name));
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to determine machine ID: %m");
+
+ m = json_variant_by_key(variant, sd_id128_to_string(mid, smid));
+ if (!m)
+ return 0;
+
+ return json_dispatch(m, status_dispatch_table, NULL, flags, userdata);
+}
+
+static int group_record_augment(GroupRecord *h, JsonDispatchFlags json_flags) {
+ assert(h);
+
+ if (!FLAGS_SET(h->mask, USER_RECORD_REGULAR))
+ return 0;
+
+ assert(h->group_name);
+
+ if (!h->group_name_and_realm_auto && h->realm) {
+ h->group_name_and_realm_auto = strjoin(h->group_name, "@", h->realm);
+ if (!h->group_name_and_realm_auto)
+ return json_log_oom(h->json, json_flags);
+ }
+
+ return 0;
+}
+
+int group_record_load(
+ GroupRecord *h,
+ JsonVariant *v,
+ UserRecordLoadFlags load_flags) {
+
+ static const JsonDispatch group_dispatch_table[] = {
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_user_group_name, offsetof(GroupRecord, group_name), JSON_RELAX},
+ { "realm", JSON_VARIANT_STRING, json_dispatch_realm, offsetof(GroupRecord, realm), 0 },
+ { "description", JSON_VARIANT_STRING, json_dispatch_gecos, offsetof(GroupRecord, description), 0 },
+ { "disposition", JSON_VARIANT_STRING, json_dispatch_user_disposition, offsetof(GroupRecord, disposition), 0 },
+ { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(GroupRecord, service), JSON_SAFE },
+ { "lastChangeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(GroupRecord, last_change_usec), 0 },
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(GroupRecord, gid), 0 },
+ { "members", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, members), JSON_RELAX},
+ { "administrators", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(GroupRecord, administrators), JSON_RELAX},
+
+ { "privileged", JSON_VARIANT_OBJECT, dispatch_privileged, 0, 0 },
+
+ /* Not defined for now, for groups, but let's at least generate sensible errors about it */
+ { "secret", JSON_VARIANT_OBJECT, json_dispatch_unsupported, 0, 0 },
+
+ /* Ignore the perMachine, binding and status stuff here, and process it later, so that it overrides whatever is set above */
+ { "perMachine", JSON_VARIANT_ARRAY, NULL, 0, 0 },
+ { "binding", JSON_VARIANT_OBJECT, NULL, 0, 0 },
+ { "status", JSON_VARIANT_OBJECT, NULL, 0, 0 },
+
+ /* Ignore 'signature', we check it with explicit accessors instead */
+ { "signature", JSON_VARIANT_ARRAY, NULL, 0, 0 },
+ {},
+ };
+
+ JsonDispatchFlags json_flags = USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(load_flags);
+ int r;
+
+ assert(h);
+ assert(!h->json);
+
+ /* Note that this call will leave a half-initialized record around on failure! */
+
+ if ((USER_RECORD_REQUIRE_MASK(load_flags) & (USER_RECORD_SECRET|USER_RECORD_PRIVILEGED)))
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EINVAL), "Secret and privileged section currently not available for groups, refusing.");
+
+ r = user_group_record_mangle(v, load_flags, &h->json, &h->mask);
+ if (r < 0)
+ return r;
+
+ r = json_dispatch(h->json, group_dispatch_table, NULL, json_flags, h);
+ if (r < 0)
+ return r;
+
+ /* During the parsing operation above we ignored the 'perMachine', 'binding' and 'status' fields, since we want
+ * them to override the global options. Let's process them now. */
+
+ r = dispatch_per_machine("perMachine", json_variant_by_key(h->json, "perMachine"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ r = dispatch_binding("binding", json_variant_by_key(h->json, "binding"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ r = dispatch_status("status", json_variant_by_key(h->json, "status"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(h->mask, USER_RECORD_REGULAR) && !h->group_name)
+ return json_log(h->json, json_flags, SYNTHETIC_ERRNO(EINVAL), "Group name field missing, refusing.");
+
+ r = group_record_augment(h, json_flags);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int group_record_build(GroupRecord **ret, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+ va_list ap;
+ int r;
+
+ assert(ret);
+
+ va_start(ap, ret);
+ r = json_buildv(&v, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ g = group_record_new();
+ if (!g)
+ return -ENOMEM;
+
+ r = group_record_load(g, v, USER_RECORD_LOAD_FULL);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(g);
+ return 0;
+}
+
+const char *group_record_group_name_and_realm(GroupRecord *h) {
+ assert(h);
+
+ /* Return the pre-initialized joined string if it is defined */
+ if (h->group_name_and_realm_auto)
+ return h->group_name_and_realm_auto;
+
+ /* If it's not defined then we cannot have a realm */
+ assert(!h->realm);
+ return h->group_name;
+}
+
+UserDisposition group_record_disposition(GroupRecord *h) {
+ assert(h);
+
+ if (h->disposition >= 0)
+ return h->disposition;
+
+ /* If not declared, derive from GID */
+
+ if (!gid_is_valid(h->gid))
+ return _USER_DISPOSITION_INVALID;
+
+ if (h->gid == 0 || h->gid == GID_NOBODY)
+ return USER_INTRINSIC;
+
+ if (gid_is_system(h->gid))
+ return USER_SYSTEM;
+
+ if (gid_is_dynamic(h->gid))
+ return USER_DYNAMIC;
+
+ if (gid_is_container(h->gid))
+ return USER_CONTAINER;
+
+ if (h->gid > INT32_MAX)
+ return USER_RESERVED;
+
+ return USER_REGULAR;
+}
+
+int group_record_clone(GroupRecord *h, UserRecordLoadFlags flags, GroupRecord **ret) {
+ _cleanup_(group_record_unrefp) GroupRecord *c = NULL;
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ c = group_record_new();
+ if (!c)
+ return -ENOMEM;
+
+ r = group_record_load(c, h->json, flags);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(c);
+ return 0;
+}
diff --git a/src/shared/group-record.h b/src/shared/group-record.h
new file mode 100644
index 0000000..f810204
--- /dev/null
+++ b/src/shared/group-record.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "json.h"
+#include "user-record.h"
+
+typedef struct GroupRecord {
+ unsigned n_ref;
+ UserRecordMask mask;
+ bool incomplete;
+
+ char *group_name;
+ char *realm;
+ char *group_name_and_realm_auto;
+
+ char *description;
+
+ UserDisposition disposition;
+ uint64_t last_change_usec;
+
+ gid_t gid;
+
+ char **members;
+
+ char *service;
+
+ /* The following exist mostly so that we can cover the full /etc/gshadow set of fields, we currently
+ * do not actually make use of these */
+ char **administrators; /* maps to 'struct sgrp' .sg_adm field */
+ char **hashed_password; /* maps to 'struct sgrp' .sg_passwd field */
+
+ JsonVariant *json;
+} GroupRecord;
+
+GroupRecord* group_record_new(void);
+GroupRecord* group_record_ref(GroupRecord *g);
+GroupRecord* group_record_unref(GroupRecord *g);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(GroupRecord*, group_record_unref);
+
+int group_record_load(GroupRecord *h, JsonVariant *v, UserRecordLoadFlags flags);
+int group_record_build(GroupRecord **ret, ...);
+int group_record_clone(GroupRecord *g, UserRecordLoadFlags flags, GroupRecord **ret);
+
+const char *group_record_group_name_and_realm(GroupRecord *h);
+UserDisposition group_record_disposition(GroupRecord *h);
diff --git a/src/shared/id128-print.c b/src/shared/id128-print.c
new file mode 100644
index 0000000..f232767
--- /dev/null
+++ b/src/shared/id128-print.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "id128-print.h"
+#include "log.h"
+#include "pretty-print.h"
+#include "terminal-util.h"
+
+int id128_pretty_print_sample(const char *name, sd_id128_t id) {
+ _cleanup_free_ char *man_link = NULL, *mod_link = NULL;
+ const char *on, *off;
+ unsigned i;
+
+ on = ansi_highlight();
+ off = ansi_normal();
+
+ if (terminal_urlify("man:systemd-id128(1)", "systemd-id128(1)", &man_link) < 0)
+ return log_oom();
+
+ if (terminal_urlify("https://docs.python.org/3/library/uuid.html", "uuid", &mod_link) < 0)
+ return log_oom();
+
+ printf("As string:\n"
+ "%s" SD_ID128_FORMAT_STR "%s\n\n"
+ "As UUID:\n"
+ "%s" SD_ID128_UUID_FORMAT_STR "%s\n\n"
+ "As %s macro:\n"
+ "%s#define %s SD_ID128_MAKE(",
+ on, SD_ID128_FORMAT_VAL(id), off,
+ on, SD_ID128_FORMAT_VAL(id), off,
+ man_link,
+ on, name);
+ for (i = 0; i < 16; i++)
+ printf("%02x%s", id.bytes[i], i != 15 ? "," : "");
+ printf(")%s\n\n", off);
+
+ printf("As Python constant:\n"
+ ">>> import %s\n"
+ ">>> %s%s = uuid.UUID('" SD_ID128_FORMAT_STR "')%s\n",
+ mod_link,
+ on, name, SD_ID128_FORMAT_VAL(id), off);
+
+ return 0;
+}
+
+
+int id128_pretty_print(sd_id128_t id, Id128PrettyPrintMode mode) {
+ assert(mode >= 0);
+ assert(mode < _ID128_PRETTY_PRINT_MODE_MAX);
+
+ if (mode == ID128_PRINT_ID128) {
+ printf(SD_ID128_FORMAT_STR "\n",
+ SD_ID128_FORMAT_VAL(id));
+ return 0;
+ } else if (mode == ID128_PRINT_UUID) {
+ printf(SD_ID128_UUID_FORMAT_STR "\n",
+ SD_ID128_FORMAT_VAL(id));
+ return 0;
+ } else
+ return id128_pretty_print_sample("XYZ", id);
+}
+
+int id128_print_new(Id128PrettyPrintMode mode) {
+ sd_id128_t id;
+ int r;
+
+ r = sd_id128_randomize(&id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to generate ID: %m");
+
+ return id128_pretty_print(id, mode);
+}
diff --git a/src/shared/id128-print.h b/src/shared/id128-print.h
new file mode 100644
index 0000000..d69cb9b
--- /dev/null
+++ b/src/shared/id128-print.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+typedef enum Id128PrettyPrintMode {
+ ID128_PRINT_ID128,
+ ID128_PRINT_UUID,
+ ID128_PRINT_PRETTY,
+ _ID128_PRETTY_PRINT_MODE_MAX,
+ _ID128_PRETTY_PRINT_MODE_INVALID = -1
+} Id128PrettyPrintMode;
+
+int id128_pretty_print_sample(const char *name, sd_id128_t id);
+int id128_pretty_print(sd_id128_t id, Id128PrettyPrintMode mode);
+int id128_print_new(Id128PrettyPrintMode mode);
diff --git a/src/shared/idn-util.c b/src/shared/idn-util.c
new file mode 100644
index 0000000..83c4b3c
--- /dev/null
+++ b/src/shared/idn-util.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_LIBIDN2
+# include <idn2.h>
+#elif HAVE_LIBIDN
+# include <idna.h>
+# include <stringprep.h>
+#endif
+
+#include "alloc-util.h"
+#include "dlfcn-util.h"
+#include "idn-util.h"
+
+#if HAVE_LIBIDN || HAVE_LIBIDN2
+static void* idn_dl = NULL;
+#endif
+
+#if HAVE_LIBIDN2
+int (*sym_idn2_lookup_u8)(const uint8_t* src, uint8_t** lookupname, int flags) = NULL;
+const char *(*sym_idn2_strerror)(int rc) = NULL;
+int (*sym_idn2_to_unicode_8z8z)(const char * input, char ** output, int flags) = NULL;
+
+int dlopen_idn(void) {
+ _cleanup_(dlclosep) void *dl = NULL;
+ int r;
+
+ if (idn_dl)
+ return 0; /* Already loaded */
+
+ dl = dlopen("libidn2.so.0", RTLD_LAZY);
+ if (!dl)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "libidn2 support is not installed: %s", dlerror());
+
+ r = dlsym_many_and_warn(
+ dl,
+ LOG_DEBUG,
+ &sym_idn2_lookup_u8, "idn2_lookup_u8",
+ &sym_idn2_strerror, "idn2_strerror",
+ &sym_idn2_to_unicode_8z8z, "idn2_to_unicode_8z8z",
+ NULL);
+ if (r < 0)
+ return r;
+
+ /* Note that we never release the reference here, because there's no real reason to, after all this
+ * was traditionally a regular shared library dependency which lives forever too. */
+ idn_dl = TAKE_PTR(dl);
+
+ return 1;
+}
+#endif
+
+#if HAVE_LIBIDN
+int (*sym_idna_to_ascii_4i)(const uint32_t * in, size_t inlen, char *out, int flags);
+int (*sym_idna_to_unicode_44i)(const uint32_t * in, size_t inlen,uint32_t * out, size_t * outlen, int flags);
+char* (*sym_stringprep_ucs4_to_utf8)(const uint32_t * str, ssize_t len, size_t * items_read, size_t * items_written);
+uint32_t* (*sym_stringprep_utf8_to_ucs4)(const char *str, ssize_t len, size_t *items_written);
+
+int dlopen_idn(void) {
+ _cleanup_(dlclosep) void *dl = NULL;
+ int r;
+
+ if (idn_dl)
+ return 0; /* Already loaded */
+
+ dl = dlopen("libidn.so.12", RTLD_LAZY);
+ if (!dl) {
+ /* libidn broke ABI in 1.34, but not in a way we care about (a new field got added to an
+ * open-coded struct we do not use), hence support both versions. */
+ dl = dlopen("libidn.so.11", RTLD_LAZY);
+ if (!dl)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "libidn support is not installed: %s", dlerror());
+ }
+
+ r = dlsym_many_and_warn(
+ dl,
+ LOG_DEBUG,
+ &sym_idna_to_ascii_4i, "idna_to_ascii_4i",
+ &sym_idna_to_unicode_44i, "idna_to_unicode_44i",
+ &sym_stringprep_ucs4_to_utf8, "stringprep_ucs4_to_utf8",
+ &sym_stringprep_utf8_to_ucs4, "stringprep_utf8_to_ucs4",
+ NULL);
+ if (r < 0)
+ return r;
+
+ idn_dl = TAKE_PTR(dl);
+
+ return 1;
+}
+#endif
diff --git a/src/shared/idn-util.h b/src/shared/idn-util.h
new file mode 100644
index 0000000..4698eed
--- /dev/null
+++ b/src/shared/idn-util.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if HAVE_LIBIDN2
+# include <idn2.h>
+#elif HAVE_LIBIDN
+# include <idna.h>
+# include <stringprep.h>
+#endif
+
+#include <inttypes.h>
+
+#if HAVE_LIBIDN2 || HAVE_LIBIDN
+int dlopen_idn(void);
+#else
+static inline int dlopen_idn(void) {
+ return -EOPNOTSUPP;
+}
+#endif
+
+#if HAVE_LIBIDN2
+extern int (*sym_idn2_lookup_u8)(const uint8_t* src, uint8_t** lookupname, int flags);
+extern const char *(*sym_idn2_strerror)(int rc);
+extern int (*sym_idn2_to_unicode_8z8z)(const char * input, char ** output, int flags);
+#endif
+
+#if HAVE_LIBIDN
+extern int (*sym_idna_to_ascii_4i)(const uint32_t * in, size_t inlen, char *out, int flags);
+extern int (*sym_idna_to_unicode_44i)(const uint32_t * in, size_t inlen,uint32_t * out, size_t * outlen, int flags);
+extern char* (*sym_stringprep_ucs4_to_utf8)(const uint32_t * str, ssize_t len, size_t * items_read, size_t * items_written);
+extern uint32_t* (*sym_stringprep_utf8_to_ucs4)(const char *str, ssize_t len, size_t *items_written);
+#endif
diff --git a/src/shared/ima-util.c b/src/shared/ima-util.c
new file mode 100644
index 0000000..e37c9ad
--- /dev/null
+++ b/src/shared/ima-util.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "ima-util.h"
+
+static int use_ima_cached = -1;
+
+bool use_ima(void) {
+
+ if (use_ima_cached < 0)
+ use_ima_cached = access("/sys/kernel/security/ima/", F_OK) >= 0;
+
+ return use_ima_cached;
+}
diff --git a/src/shared/ima-util.h b/src/shared/ima-util.h
new file mode 100644
index 0000000..922db78
--- /dev/null
+++ b/src/shared/ima-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+bool use_ima(void);
diff --git a/src/shared/import-util.c b/src/shared/import-util.c
new file mode 100644
index 0000000..298c066
--- /dev/null
+++ b/src/shared/import-util.c
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "errno-util.h"
+#include "import-util.h"
+#include "log.h"
+#include "macro.h"
+#include "nulstr-util.h"
+#include "path-util.h"
+#include "string-table.h"
+#include "string-util.h"
+
+int import_url_last_component(const char *url, char **ret) {
+ const char *e, *p;
+ char *s;
+
+ e = strchrnul(url, '?');
+
+ while (e > url && e[-1] == '/')
+ e--;
+
+ p = e;
+ while (p > url && p[-1] != '/')
+ p--;
+
+ if (e <= p)
+ return -EINVAL;
+
+ s = strndup(p, e - p);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int import_url_change_last_component(const char *url, const char *suffix, char **ret) {
+ const char *e;
+ char *s;
+
+ assert(url);
+ assert(ret);
+
+ e = strchrnul(url, '?');
+
+ while (e > url && e[-1] == '/')
+ e--;
+
+ while (e > url && e[-1] != '/')
+ e--;
+
+ if (e <= url)
+ return -EINVAL;
+
+ s = new(char, (e - url) + strlen(suffix) + 1);
+ if (!s)
+ return -ENOMEM;
+
+ strcpy(mempcpy(s, url, e - url), suffix);
+ *ret = s;
+ return 0;
+}
+
+static const char* const import_verify_table[_IMPORT_VERIFY_MAX] = {
+ [IMPORT_VERIFY_NO] = "no",
+ [IMPORT_VERIFY_CHECKSUM] = "checksum",
+ [IMPORT_VERIFY_SIGNATURE] = "signature",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(import_verify, ImportVerify);
+
+int tar_strip_suffixes(const char *name, char **ret) {
+ const char *e;
+ char *s;
+
+ e = endswith(name, ".tar");
+ if (!e)
+ e = endswith(name, ".tar.xz");
+ if (!e)
+ e = endswith(name, ".tar.gz");
+ if (!e)
+ e = endswith(name, ".tar.bz2");
+ if (!e)
+ e = endswith(name, ".tgz");
+ if (!e)
+ e = strchr(name, 0);
+
+ if (e <= name)
+ return -EINVAL;
+
+ s = strndup(name, e - name);
+ if (!s)
+ return -ENOMEM;
+
+ *ret = s;
+ return 0;
+}
+
+int raw_strip_suffixes(const char *p, char **ret) {
+
+ static const char suffixes[] =
+ ".xz\0"
+ ".gz\0"
+ ".bz2\0"
+ ".raw\0"
+ ".qcow2\0"
+ ".img\0"
+ ".bin\0";
+
+ _cleanup_free_ char *q = NULL;
+
+ q = strdup(p);
+ if (!q)
+ return -ENOMEM;
+
+ for (;;) {
+ const char *sfx;
+ bool changed = false;
+
+ NULSTR_FOREACH(sfx, suffixes) {
+ char *e;
+
+ e = endswith(q, sfx);
+ if (e) {
+ *e = 0;
+ changed = true;
+ }
+ }
+
+ if (!changed)
+ break;
+ }
+
+ *ret = TAKE_PTR(q);
+
+ return 0;
+}
+
+int import_assign_pool_quota_and_warn(const char *path) {
+ int r;
+
+ r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true);
+ if (r == -ENOTTY) {
+ log_debug_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, as directory is not on btrfs or not a subvolume. Ignoring.");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines: %m");
+ if (r > 0)
+ log_info("Set up default quota hierarchy for /var/lib/machines.");
+
+ r = btrfs_subvol_auto_qgroup(path, 0, true);
+ if (r == -ENOTTY) {
+ log_debug_errno(r, "Failed to set up quota hierarchy for %s, as directory is not on btrfs or not a subvolume. Ignoring.", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to set up default quota hierarchy for %s: %m", path);
+ if (r > 0)
+ log_debug("Set up default quota hierarchy for %s.", path);
+
+ return 0;
+}
+
+int import_set_nocow_and_log(int fd, const char *path) {
+ int r;
+
+ r = chattr_fd(fd, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+ if (r < 0)
+ return log_full_errno(
+ ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to set file attributes on %s: %m", path);
+
+ return 0;
+}
diff --git a/src/shared/import-util.h b/src/shared/import-util.h
new file mode 100644
index 0000000..8d017f6
--- /dev/null
+++ b/src/shared/import-util.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+typedef enum ImportVerify {
+ IMPORT_VERIFY_NO,
+ IMPORT_VERIFY_CHECKSUM,
+ IMPORT_VERIFY_SIGNATURE,
+ _IMPORT_VERIFY_MAX,
+ _IMPORT_VERIFY_INVALID = -1,
+} ImportVerify;
+
+int import_url_last_component(const char *url, char **ret);
+int import_url_change_last_component(const char *url, const char *suffix, char **ret);
+
+const char* import_verify_to_string(ImportVerify v) _const_;
+ImportVerify import_verify_from_string(const char *s) _pure_;
+
+int tar_strip_suffixes(const char *name, char **ret);
+int raw_strip_suffixes(const char *name, char **ret);
+
+int import_assign_pool_quota_and_warn(const char *path);
+
+int import_set_nocow_and_log(int fd, const char *path);
diff --git a/src/shared/initreq.h b/src/shared/initreq.h
new file mode 100644
index 0000000..1bf5b8e
--- /dev/null
+++ b/src/shared/initreq.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: LGPL-2+ */
+/*
+ * initreq.h Interface to talk to init through /dev/initctl.
+ *
+ * Copyright (C) 1995-2004 Miquel van Smoorenburg
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * Version: @(#)initreq.h 1.28 31-Mar-2004 MvS
+ */
+
+#pragma once
+
+#include <sys/param.h>
+
+#if defined(__FreeBSD_kernel__)
+# define INIT_FIFO "/etc/.initctl"
+#else
+# define INIT_FIFO "/dev/initctl"
+#endif
+
+#define INIT_MAGIC 0x03091969
+#define INIT_CMD_START 0
+#define INIT_CMD_RUNLVL 1
+#define INIT_CMD_POWERFAIL 2
+#define INIT_CMD_POWERFAILNOW 3
+#define INIT_CMD_POWEROK 4
+#define INIT_CMD_BSD 5
+#define INIT_CMD_SETENV 6
+#define INIT_CMD_UNSETENV 7
+
+#define INIT_CMD_CHANGECONS 12345
+
+#ifdef MAXHOSTNAMELEN
+# define INITRQ_HLEN MAXHOSTNAMELEN
+#else
+# define INITRQ_HLEN 64
+#endif
+
+/*
+ * This is what BSD 4.4 uses when talking to init.
+ * Linux doesn't use this right now.
+ */
+struct init_request_bsd {
+ char gen_id[8]; /* Beats me.. telnetd uses "fe" */
+ char tty_id[16]; /* Tty name minus /dev/tty */
+ char host[INITRQ_HLEN]; /* Hostname */
+ char term_type[16]; /* Terminal type */
+ int signal; /* Signal to send */
+ int pid; /* Process to send to */
+ char exec_name[128]; /* Program to execute */
+ char reserved[128]; /* For future expansion. */
+};
+
+/*
+ * Because of legacy interfaces, "runlevel" and "sleeptime"
+ * aren't in a separate struct in the union.
+ *
+ * The weird sizes are because init expects the whole
+ * struct to be 384 bytes.
+ */
+struct init_request {
+ int magic; /* Magic number */
+ int cmd; /* What kind of request */
+ int runlevel; /* Runlevel to change to */
+ int sleeptime; /* Time between TERM and KILL */
+ union {
+ struct init_request_bsd bsd;
+ char data[368];
+ } i;
+};
diff --git a/src/shared/install-printf.c b/src/shared/install-printf.c
new file mode 100644
index 0000000..6bc3f15
--- /dev/null
+++ b/src/shared/install-printf.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "format-util.h"
+#include "install-printf.h"
+#include "install.h"
+#include "macro.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "unit-name.h"
+#include "user-util.h"
+
+static int specifier_prefix_and_instance(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ _cleanup_free_ char *prefix = NULL;
+ int r;
+
+ assert(i);
+
+ r = unit_name_to_prefix_and_instance(i->name, &prefix);
+ if (r < 0)
+ return r;
+
+ if (endswith(prefix, "@") && i->default_instance) {
+ char *ans;
+
+ ans = strjoin(prefix, i->default_instance);
+ if (!ans)
+ return -ENOMEM;
+ *ret = ans;
+ } else
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+static int specifier_name(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ char *ans;
+
+ assert(i);
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE) && i->default_instance)
+ return unit_name_replace_instance(i->name, i->default_instance, ret);
+
+ ans = strdup(i->name);
+ if (!ans)
+ return -ENOMEM;
+ *ret = ans;
+ return 0;
+}
+
+static int specifier_prefix(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+
+ assert(i);
+
+ return unit_name_to_prefix(i->name, ret);
+}
+
+static int specifier_instance(char specifier, const void *data, const void *userdata, char **ret) {
+ const UnitFileInstallInfo *i = userdata;
+ char *instance;
+ int r;
+
+ assert(i);
+
+ r = unit_name_to_instance(i->name, &instance);
+ if (r < 0)
+ return r;
+
+ if (isempty(instance)) {
+ r = free_and_strdup(&instance, strempty(i->default_instance));
+ if (r < 0)
+ return r;
+ }
+
+ *ret = instance;
+ return 0;
+}
+
+static int specifier_last_component(char specifier, const void *data, const void *userdata, char **ret) {
+ _cleanup_free_ char *prefix = NULL;
+ char *dash;
+ int r;
+
+ r = specifier_prefix(specifier, data, userdata, &prefix);
+ if (r < 0)
+ return r;
+
+ dash = strrchr(prefix, '-');
+ if (dash) {
+ dash = strdup(dash + 1);
+ if (!dash)
+ return -ENOMEM;
+ *ret = dash;
+ } else
+ *ret = TAKE_PTR(prefix);
+
+ return 0;
+}
+
+int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret) {
+ /* This is similar to unit_name_printf() */
+
+ const Specifier table[] = {
+ { 'i', specifier_instance, NULL },
+ { 'j', specifier_last_component, NULL },
+ { 'n', specifier_name, NULL },
+ { 'N', specifier_prefix_and_instance, NULL },
+ { 'p', specifier_prefix, NULL },
+
+ COMMON_SYSTEM_SPECIFIERS,
+
+ COMMON_CREDS_SPECIFIERS,
+ {}
+ };
+
+ assert(i);
+ assert(format);
+ assert(ret);
+
+ return specifier_printf(format, table, i, ret);
+}
diff --git a/src/shared/install-printf.h b/src/shared/install-printf.h
new file mode 100644
index 0000000..34e1294
--- /dev/null
+++ b/src/shared/install-printf.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "install.h"
+
+int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret);
diff --git a/src/shared/install.c b/src/shared/install.c
new file mode 100644
index 0000000..302497a
--- /dev/null
+++ b/src/shared/install.c
@@ -0,0 +1,3479 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "dirent-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "install-printf.h"
+#include "install.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "path-lookup.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "set.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-file.h"
+
+#define UNIT_FILE_FOLLOW_SYMLINK_MAX 64
+
+typedef enum SearchFlags {
+ SEARCH_LOAD = 1 << 0,
+ SEARCH_FOLLOW_CONFIG_SYMLINKS = 1 << 1,
+ SEARCH_DROPIN = 1 << 2,
+} SearchFlags;
+
+typedef struct {
+ OrderedHashmap *will_process;
+ OrderedHashmap *have_processed;
+} InstallContext;
+
+typedef enum {
+ PRESET_UNKNOWN,
+ PRESET_ENABLE,
+ PRESET_DISABLE,
+} PresetAction;
+
+struct UnitFilePresetRule {
+ char *pattern;
+ PresetAction action;
+ char **instances;
+};
+
+static bool unit_file_install_info_has_rules(const UnitFileInstallInfo *i) {
+ assert(i);
+
+ return !strv_isempty(i->aliases) ||
+ !strv_isempty(i->wanted_by) ||
+ !strv_isempty(i->required_by);
+}
+
+static bool unit_file_install_info_has_also(const UnitFileInstallInfo *i) {
+ assert(i);
+
+ return !strv_isempty(i->also);
+}
+
+void unit_file_presets_freep(UnitFilePresets *p) {
+ if (!p)
+ return;
+
+ for (size_t i = 0; i < p->n_rules; i++) {
+ free(p->rules[i].pattern);
+ strv_free(p->rules[i].instances);
+ }
+
+ free(p->rules);
+ p->n_rules = 0;
+}
+
+static const char *const unit_file_type_table[_UNIT_FILE_TYPE_MAX] = {
+ [UNIT_FILE_TYPE_REGULAR] = "regular",
+ [UNIT_FILE_TYPE_SYMLINK] = "symlink",
+ [UNIT_FILE_TYPE_MASKED] = "masked",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(unit_file_type, UnitFileType);
+
+static int in_search_path(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_strv_contains(p->search_path, parent);
+}
+
+static const char* skip_root(const LookupPaths *p, const char *path) {
+ char *e;
+
+ assert(p);
+ assert(path);
+
+ if (!p->root_dir)
+ return path;
+
+ e = path_startswith(path, p->root_dir);
+ if (!e)
+ return NULL;
+
+ /* Make sure the returned path starts with a slash */
+ if (e[0] != '/') {
+ if (e == path || e[-1] != '/')
+ return NULL;
+
+ e--;
+ }
+
+ return e;
+}
+
+static int path_is_generator(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->generator) ||
+ path_equal_ptr(parent, p->generator_early) ||
+ path_equal_ptr(parent, p->generator_late);
+}
+
+static int path_is_transient(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->transient);
+}
+
+static int path_is_control(const LookupPaths *p, const char *path) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ return path_equal_ptr(parent, p->persistent_control) ||
+ path_equal_ptr(parent, p->runtime_control);
+}
+
+static int path_is_config(const LookupPaths *p, const char *path, bool check_parent) {
+ _cleanup_free_ char *parent = NULL;
+
+ assert(p);
+ assert(path);
+
+ /* Note that we do *not* have generic checks for /etc or /run in place, since with
+ * them we couldn't discern configuration from transient or generated units */
+
+ if (check_parent) {
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ path = parent;
+ }
+
+ return path_equal_ptr(path, p->persistent_config) ||
+ path_equal_ptr(path, p->runtime_config);
+}
+
+static int path_is_runtime(const LookupPaths *p, const char *path, bool check_parent) {
+ _cleanup_free_ char *parent = NULL;
+ const char *rpath;
+
+ assert(p);
+ assert(path);
+
+ /* Everything in /run is considered runtime. On top of that we also add
+ * explicit checks for the various runtime directories, as safety net. */
+
+ rpath = skip_root(p, path);
+ if (rpath && path_startswith(rpath, "/run"))
+ return true;
+
+ if (check_parent) {
+ parent = dirname_malloc(path);
+ if (!parent)
+ return -ENOMEM;
+
+ path = parent;
+ }
+
+ return path_equal_ptr(path, p->runtime_config) ||
+ path_equal_ptr(path, p->generator) ||
+ path_equal_ptr(path, p->generator_early) ||
+ path_equal_ptr(path, p->generator_late) ||
+ path_equal_ptr(path, p->transient) ||
+ path_equal_ptr(path, p->runtime_control);
+}
+
+static int path_is_vendor_or_generator(const LookupPaths *p, const char *path) {
+ const char *rpath;
+
+ assert(p);
+ assert(path);
+
+ rpath = skip_root(p, path);
+ if (!rpath)
+ return 0;
+
+ if (path_startswith(rpath, "/usr"))
+ return true;
+
+#if HAVE_SPLIT_USR
+ if (path_startswith(rpath, "/lib"))
+ return true;
+#endif
+
+ if (path_is_generator(p, rpath))
+ return true;
+
+ return path_equal(rpath, SYSTEM_DATA_UNIT_PATH);
+}
+
+static const char* config_path_from_flags(const LookupPaths *paths, UnitFileFlags flags) {
+ assert(paths);
+
+ if (FLAGS_SET(flags, UNIT_FILE_PORTABLE))
+ return FLAGS_SET(flags, UNIT_FILE_RUNTIME) ? paths->runtime_attached : paths->persistent_attached;
+ else
+ return FLAGS_SET(flags, UNIT_FILE_RUNTIME) ? paths->runtime_config : paths->persistent_config;
+}
+
+int unit_file_changes_add(
+ UnitFileChange **changes,
+ size_t *n_changes,
+ int type,
+ const char *path,
+ const char *source) {
+
+ _cleanup_free_ char *p = NULL, *s = NULL;
+ UnitFileChange *c;
+
+ assert(path);
+ assert(!changes == !n_changes);
+
+ if (!changes)
+ return 0;
+
+ c = reallocarray(*changes, *n_changes + 1, sizeof(UnitFileChange));
+ if (!c)
+ return -ENOMEM;
+ *changes = c;
+
+ p = strdup(path);
+ if (source)
+ s = strdup(source);
+
+ if (!p || (source && !s))
+ return -ENOMEM;
+
+ path_simplify(p, false);
+ if (s)
+ path_simplify(s, false);
+
+ c[*n_changes] = (UnitFileChange) { type, p, s };
+ p = s = NULL;
+ (*n_changes) ++;
+ return 0;
+}
+
+void unit_file_changes_free(UnitFileChange *changes, size_t n_changes) {
+ assert(changes || n_changes == 0);
+
+ for (size_t i = 0; i < n_changes; i++) {
+ free(changes[i].path);
+ free(changes[i].source);
+ }
+
+ free(changes);
+}
+
+void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet) {
+ bool logged = false;
+
+ assert(changes || n_changes == 0);
+ /* If verb is not specified, errors are not allowed! */
+ assert(verb || r >= 0);
+
+ for (size_t i = 0; i < n_changes; i++) {
+ assert(verb || changes[i].type >= 0);
+
+ switch(changes[i].type) {
+ case UNIT_FILE_SYMLINK:
+ if (!quiet)
+ log_info("Created symlink %s %s %s.",
+ changes[i].path,
+ special_glyph(SPECIAL_GLYPH_ARROW),
+ changes[i].source);
+ break;
+ case UNIT_FILE_UNLINK:
+ if (!quiet)
+ log_info("Removed %s.", changes[i].path);
+ break;
+ case UNIT_FILE_IS_MASKED:
+ if (!quiet)
+ log_info("Unit %s is masked, ignoring.", changes[i].path);
+ break;
+ case UNIT_FILE_IS_DANGLING:
+ if (!quiet)
+ log_info("Unit %s is an alias to a unit that is not present, ignoring.",
+ changes[i].path);
+ break;
+ case -EEXIST:
+ if (changes[i].source)
+ log_error_errno(changes[i].type,
+ "Failed to %s unit, file %s already exists and is a symlink to %s.",
+ verb, changes[i].path, changes[i].source);
+ else
+ log_error_errno(changes[i].type,
+ "Failed to %s unit, file %s already exists.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -ERFKILL:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s is masked.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -EADDRNOTAVAIL:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s is transient or generated.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -EUCLEAN:
+ log_error_errno(changes[i].type,
+ "Failed to %s unit, \"%s\" is not a valid unit name.",
+ verb, changes[i].path);
+ logged = true;
+ break;
+ case -ELOOP:
+ log_error_errno(changes[i].type, "Failed to %s unit, refusing to operate on linked unit file %s",
+ verb, changes[i].path);
+ logged = true;
+ break;
+
+ case -ENOENT:
+ log_error_errno(changes[i].type, "Failed to %s unit, unit %s does not exist.", verb, changes[i].path);
+ logged = true;
+ break;
+
+ default:
+ assert(changes[i].type < 0);
+ log_error_errno(changes[i].type, "Failed to %s unit, file %s: %m.",
+ verb, changes[i].path);
+ logged = true;
+ }
+ }
+
+ if (r < 0 && !logged)
+ log_error_errno(r, "Failed to %s: %m.", verb);
+}
+
+/**
+ * Checks if two paths or symlinks from wd are the same, when root is the root of the filesystem.
+ * wc should be the full path in the host file system.
+ */
+static bool chroot_symlinks_same(const char *root, const char *wd, const char *a, const char *b) {
+ assert(path_is_absolute(wd));
+
+ /* This will give incorrect results if the paths are relative and go outside
+ * of the chroot. False negatives are possible. */
+
+ if (!root)
+ root = "/";
+
+ a = strjoina(path_is_absolute(a) ? root : wd, "/", a);
+ b = strjoina(path_is_absolute(b) ? root : wd, "/", b);
+ return path_equal_or_files_same(a, b, 0);
+}
+
+static int create_symlink(
+ const LookupPaths *paths,
+ const char *old_path,
+ const char *new_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *dest = NULL, *dirname = NULL;
+ const char *rp;
+ int r;
+
+ assert(old_path);
+ assert(new_path);
+
+ rp = skip_root(paths, old_path);
+ if (rp)
+ old_path = rp;
+
+ /* Actually create a symlink, and remember that we did. Is
+ * smart enough to check if there's already a valid symlink in
+ * place.
+ *
+ * Returns 1 if a symlink was created or already exists and points to
+ * the right place, or negative on error.
+ */
+
+ mkdir_parents_label(new_path, 0755);
+
+ if (symlink(old_path, new_path) >= 0) {
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path);
+ return 1;
+ }
+
+ if (errno != EEXIST) {
+ unit_file_changes_add(changes, n_changes, -errno, new_path, NULL);
+ return -errno;
+ }
+
+ r = readlink_malloc(new_path, &dest);
+ if (r < 0) {
+ /* translate EINVAL (non-symlink exists) to EEXIST */
+ if (r == -EINVAL)
+ r = -EEXIST;
+
+ unit_file_changes_add(changes, n_changes, r, new_path, NULL);
+ return r;
+ }
+
+ dirname = dirname_malloc(new_path);
+ if (!dirname)
+ return -ENOMEM;
+
+ if (chroot_symlinks_same(paths->root_dir, dirname, dest, old_path)) {
+ log_debug("Symlink %s → %s already exists", new_path, dest);
+ return 1;
+ }
+
+ if (!force) {
+ unit_file_changes_add(changes, n_changes, -EEXIST, new_path, dest);
+ return -EEXIST;
+ }
+
+ r = symlink_atomic(old_path, new_path);
+ if (r < 0) {
+ unit_file_changes_add(changes, n_changes, r, new_path, NULL);
+ return r;
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, new_path, NULL);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path);
+
+ return 1;
+}
+
+static int mark_symlink_for_removal(
+ Set **remove_symlinks_to,
+ const char *p) {
+
+ char *n;
+ int r;
+
+ assert(p);
+
+ r = set_ensure_allocated(remove_symlinks_to, &path_hash_ops);
+ if (r < 0)
+ return r;
+
+ n = strdup(p);
+ if (!n)
+ return -ENOMEM;
+
+ path_simplify(n, false);
+
+ r = set_consume(*remove_symlinks_to, n);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int remove_marked_symlinks_fd(
+ Set *remove_symlinks_to,
+ int fd,
+ const char *path,
+ const char *config_path,
+ const LookupPaths *lp,
+ bool dry_run,
+ bool *restart,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(remove_symlinks_to);
+ assert(fd >= 0);
+ assert(path);
+ assert(config_path);
+ assert(lp);
+ assert(restart);
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ rewinddir(d);
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ dirent_ensure_type(d, de);
+
+ if (de->d_type == DT_DIR) {
+ _cleanup_free_ char *p = NULL;
+ int nfd, q;
+
+ nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (nfd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ if (r == 0)
+ r = -errno;
+ continue;
+ }
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p) {
+ safe_close(nfd);
+ return -ENOMEM;
+ }
+
+ /* This will close nfd, regardless whether it succeeds or not */
+ q = remove_marked_symlinks_fd(remove_symlinks_to, nfd, p, config_path, lp, dry_run, restart, changes, n_changes);
+ if (q < 0 && r == 0)
+ r = q;
+
+ } else if (de->d_type == DT_LNK) {
+ _cleanup_free_ char *p = NULL, *dest = NULL;
+ const char *rp;
+ bool found;
+ int q;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p)
+ return -ENOMEM;
+ path_simplify(p, false);
+
+ q = chase_symlinks(p, NULL, CHASE_NONEXISTENT, &dest, NULL);
+ if (q == -ENOENT)
+ continue;
+ if (q < 0) {
+ if (r == 0)
+ r = q;
+ continue;
+ }
+
+ /* We remove all links pointing to a file or path that is marked, as well as all files sharing
+ * the same name as a file that is marked. */
+
+ found = set_contains(remove_symlinks_to, dest) ||
+ set_contains(remove_symlinks_to, basename(dest)) ||
+ set_contains(remove_symlinks_to, de->d_name);
+
+ if (!found)
+ continue;
+
+ if (!dry_run) {
+ if (unlinkat(fd, de->d_name, 0) < 0 && errno != ENOENT) {
+ if (r == 0)
+ r = -errno;
+ unit_file_changes_add(changes, n_changes, -errno, p, NULL);
+ continue;
+ }
+
+ (void) rmdir_parents(p, config_path);
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, p, NULL);
+
+ /* Now, remember the full path (but with the root prefix removed) of
+ * the symlink we just removed, and remove any symlinks to it, too. */
+
+ rp = skip_root(lp, p);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: p);
+ if (q < 0)
+ return q;
+ if (q > 0 && !dry_run)
+ *restart = true;
+ }
+ }
+
+ return r;
+}
+
+static int remove_marked_symlinks(
+ Set *remove_symlinks_to,
+ const char *config_path,
+ const LookupPaths *lp,
+ bool dry_run,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_close_ int fd = -1;
+ bool restart;
+ int r = 0;
+
+ assert(config_path);
+ assert(lp);
+
+ if (set_size(remove_symlinks_to) <= 0)
+ return 0;
+
+ fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC);
+ if (fd < 0)
+ return errno == ENOENT ? 0 : -errno;
+
+ do {
+ int q, cfd;
+ restart = false;
+
+ cfd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (cfd < 0)
+ return -errno;
+
+ /* This takes possession of cfd and closes it */
+ q = remove_marked_symlinks_fd(remove_symlinks_to, cfd, config_path, config_path, lp, dry_run, &restart, changes, n_changes);
+ if (r == 0)
+ r = q;
+ } while (restart);
+
+ return r;
+}
+
+static int is_symlink_with_known_name(const UnitFileInstallInfo *i, const char *name) {
+ int r;
+
+ if (streq(name, i->name))
+ return true;
+
+ if (strv_contains(i->aliases, name))
+ return true;
+
+ /* Look for template symlink matching DefaultInstance */
+ if (i->default_instance && unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *s = NULL;
+
+ r = unit_name_replace_instance(i->name, i->default_instance, &s);
+ if (r < 0) {
+ if (r != -EINVAL)
+ return r;
+
+ } else if (streq(name, s))
+ return true;
+ }
+
+ return false;
+}
+
+static int find_symlinks_fd(
+ const char *root_dir,
+ const UnitFileInstallInfo *i,
+ bool match_aliases,
+ bool ignore_same_name,
+ int fd,
+ const char *path,
+ const char *config_path,
+ bool *same_name_link) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r = 0;
+
+ assert(i);
+ assert(fd >= 0);
+ assert(path);
+ assert(config_path);
+ assert(same_name_link);
+
+ d = fdopendir(fd);
+ if (!d) {
+ safe_close(fd);
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ dirent_ensure_type(d, de);
+
+ if (de->d_type == DT_DIR) {
+ _cleanup_free_ char *p = NULL;
+ int nfd, q;
+
+ nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
+ if (nfd < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ if (r == 0)
+ r = -errno;
+ continue;
+ }
+
+ p = path_make_absolute(de->d_name, path);
+ if (!p) {
+ safe_close(nfd);
+ return -ENOMEM;
+ }
+
+ /* This will close nfd, regardless whether it succeeds or not */
+ q = find_symlinks_fd(root_dir, i, match_aliases, ignore_same_name, nfd,
+ p, config_path, same_name_link);
+ if (q > 0)
+ return 1;
+ if (r == 0)
+ r = q;
+
+ } else if (de->d_type == DT_LNK) {
+ _cleanup_free_ char *p = NULL, *dest = NULL;
+ bool found_path = false, found_dest, b = false;
+ int q;
+
+ /* Acquire symlink name */
+ p = path_make_absolute(de->d_name, path);
+ if (!p)
+ return -ENOMEM;
+
+ /* Acquire symlink destination */
+ q = readlink_malloc(p, &dest);
+ if (q == -ENOENT)
+ continue;
+ if (q < 0) {
+ if (r == 0)
+ r = q;
+ continue;
+ }
+
+ /* Make absolute */
+ if (!path_is_absolute(dest)) {
+ char *x;
+
+ x = path_join(root_dir, dest);
+ if (!x)
+ return -ENOMEM;
+
+ free_and_replace(dest, x);
+ }
+
+ assert(unit_name_is_valid(i->name, UNIT_NAME_ANY));
+ if (!ignore_same_name)
+ /* Check if the symlink itself matches what we are looking for.
+ *
+ * If ignore_same_name is specified, we are in one of the directories which
+ * have lower priority than the unit file, and even if a file or symlink with
+ * this name was found, we should ignore it. */
+ found_path = streq(de->d_name, i->name);
+
+ /* Check if what the symlink points to matches what we are looking for */
+ found_dest = streq(basename(dest), i->name);
+
+ if (found_path && found_dest) {
+ _cleanup_free_ char *t = NULL;
+
+ /* Filter out same name links in the main
+ * config path */
+ t = path_make_absolute(i->name, config_path);
+ if (!t)
+ return -ENOMEM;
+
+ b = path_equal(t, p);
+ }
+
+ if (b)
+ *same_name_link = true;
+ else if (found_path || found_dest) {
+ if (!match_aliases)
+ return 1;
+
+ /* Check if symlink name is in the set of names used by [Install] */
+ q = is_symlink_with_known_name(i, de->d_name);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return 1;
+ }
+ }
+ }
+
+ return r;
+}
+
+static int find_symlinks(
+ const char *root_dir,
+ const UnitFileInstallInfo *i,
+ bool match_name,
+ bool ignore_same_name,
+ const char *config_path,
+ bool *same_name_link) {
+
+ int fd;
+
+ assert(i);
+ assert(config_path);
+ assert(same_name_link);
+
+ fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC);
+ if (fd < 0) {
+ if (IN_SET(errno, ENOENT, ENOTDIR, EACCES))
+ return 0;
+ return -errno;
+ }
+
+ /* This takes possession of fd and closes it */
+ return find_symlinks_fd(root_dir, i, match_name, ignore_same_name, fd,
+ config_path, config_path, same_name_link);
+}
+
+static int find_symlinks_in_scope(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const UnitFileInstallInfo *i,
+ bool match_name,
+ UnitFileState *state) {
+
+ bool same_name_link_runtime = false, same_name_link_config = false;
+ bool enabled_in_runtime = false, enabled_at_all = false;
+ bool ignore_same_name = false;
+ char **p;
+ int r;
+
+ assert(paths);
+ assert(i);
+
+ /* As we iterate over the list of search paths in paths->search_path, we may encounter "same name"
+ * symlinks. The ones which are "below" (i.e. have lower priority) than the unit file itself are
+ * effectively masked, so we should ignore them. */
+
+ STRV_FOREACH(p, paths->search_path) {
+ bool same_name_link = false;
+
+ r = find_symlinks(paths->root_dir, i, match_name, ignore_same_name, *p, &same_name_link);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* We found symlinks in this dir? Yay! Let's see where precisely it is enabled. */
+
+ if (path_equal_ptr(*p, paths->persistent_config)) {
+ /* This is the best outcome, let's return it immediately. */
+ *state = UNIT_FILE_ENABLED;
+ return 1;
+ }
+
+ /* look for global enablement of user units */
+ if (scope == UNIT_FILE_USER && path_is_user_config_dir(*p)) {
+ *state = UNIT_FILE_ENABLED;
+ return 1;
+ }
+
+ r = path_is_runtime(paths, *p, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ enabled_in_runtime = true;
+ else
+ enabled_at_all = true;
+
+ } else if (same_name_link) {
+ if (path_equal_ptr(*p, paths->persistent_config))
+ same_name_link_config = true;
+ else {
+ r = path_is_runtime(paths, *p, false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ same_name_link_runtime = true;
+ }
+ }
+
+ /* Check if next iteration will be "below" the unit file (either a regular file
+ * or a symlink), and hence should be ignored */
+ if (!ignore_same_name && path_startswith(i->path, *p))
+ ignore_same_name = true;
+ }
+
+ if (enabled_in_runtime) {
+ *state = UNIT_FILE_ENABLED_RUNTIME;
+ return 1;
+ }
+
+ /* Here's a special rule: if the unit we are looking for is an instance, and it symlinked in the search path
+ * outside of runtime and configuration directory, then we consider it statically enabled. Note we do that only
+ * for instance, not for regular names, as those are merely aliases, while instances explicitly instantiate
+ * something, and hence are a much stronger concept. */
+ if (enabled_at_all && unit_name_is_valid(i->name, UNIT_NAME_INSTANCE)) {
+ *state = UNIT_FILE_STATIC;
+ return 1;
+ }
+
+ /* Hmm, we didn't find it, but maybe we found the same name
+ * link? */
+ if (same_name_link_config) {
+ *state = UNIT_FILE_LINKED;
+ return 1;
+ }
+ if (same_name_link_runtime) {
+ *state = UNIT_FILE_LINKED_RUNTIME;
+ return 1;
+ }
+
+ return 0;
+}
+
+static void install_info_free(UnitFileInstallInfo *i) {
+
+ if (!i)
+ return;
+
+ free(i->name);
+ free(i->path);
+ strv_free(i->aliases);
+ strv_free(i->wanted_by);
+ strv_free(i->required_by);
+ strv_free(i->also);
+ free(i->default_instance);
+ free(i->symlink_target);
+ free(i);
+}
+
+static void install_context_done(InstallContext *c) {
+ assert(c);
+
+ c->will_process = ordered_hashmap_free_with_destructor(c->will_process, install_info_free);
+ c->have_processed = ordered_hashmap_free_with_destructor(c->have_processed, install_info_free);
+}
+
+static UnitFileInstallInfo *install_info_find(InstallContext *c, const char *name) {
+ UnitFileInstallInfo *i;
+
+ i = ordered_hashmap_get(c->have_processed, name);
+ if (i)
+ return i;
+
+ return ordered_hashmap_get(c->will_process, name);
+}
+
+static int install_info_may_process(
+ const UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+ assert(i);
+ assert(paths);
+
+ /* Checks whether the loaded unit file is one we should process, or is masked,
+ * transient or generated and thus not subject to enable/disable operations. */
+
+ if (i->type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, -ERFKILL, i->path, NULL);
+ return -ERFKILL;
+ }
+ if (path_is_generator(paths, i->path) ||
+ path_is_transient(paths, i->path)) {
+ unit_file_changes_add(changes, n_changes, -EADDRNOTAVAIL, i->path, NULL);
+ return -EADDRNOTAVAIL;
+ }
+
+ return 0;
+}
+
+/**
+ * Adds a new UnitFileInstallInfo entry under name in the InstallContext.will_process
+ * hashmap, or retrieves the existing one if already present.
+ *
+ * Returns negative on error, 0 if the unit was already known, 1 otherwise.
+ */
+static int install_info_add(
+ InstallContext *c,
+ const char *name,
+ const char *path,
+ bool auxiliary,
+ UnitFileInstallInfo **ret) {
+
+ UnitFileInstallInfo *i = NULL;
+ int r;
+
+ assert(c);
+
+ if (!name) {
+ /* 'name' and 'path' must not both be null. Check here 'path' using assert_se() to
+ * workaround a bug in gcc that generates a -Wnonnull warning when calling basename(),
+ * but this cannot be possible in any code path (See #6119). */
+ assert_se(path);
+ name = basename(path);
+ }
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ i = install_info_find(c, name);
+ if (i) {
+ i->auxiliary = i->auxiliary && auxiliary;
+
+ if (ret)
+ *ret = i;
+ return 0;
+ }
+
+ r = ordered_hashmap_ensure_allocated(&c->will_process, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ i = new(UnitFileInstallInfo, 1);
+ if (!i)
+ return -ENOMEM;
+
+ *i = (UnitFileInstallInfo) {
+ .type = _UNIT_FILE_TYPE_INVALID,
+ .auxiliary = auxiliary,
+ };
+
+ i->name = strdup(name);
+ if (!i->name) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (path) {
+ i->path = strdup(path);
+ if (!i->path) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ r = ordered_hashmap_put(c->will_process, i->name, i);
+ if (r < 0)
+ goto fail;
+
+ if (ret)
+ *ret = i;
+
+ return 1;
+
+fail:
+ install_info_free(i);
+ return r;
+}
+
+static int config_parse_alias(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitType type;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ type = unit_name_to_type(unit);
+ if (!unit_type_may_alias(type))
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "Alias= is not allowed for %s units, ignoring.",
+ unit_type_to_string(type));
+
+ return config_parse_strv(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+}
+
+static int config_parse_also(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitFileInstallInfo *info = userdata;
+ InstallContext *c = data;
+ int r;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *printed = NULL;
+
+ r = extract_first_word(&rvalue, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ r = install_full_printf(info, word, &printed);
+ if (r < 0)
+ return r;
+
+ r = install_info_add(c, printed, NULL, true, NULL);
+ if (r < 0)
+ return r;
+
+ r = strv_push(&info->also, printed);
+ if (r < 0)
+ return r;
+
+ printed = NULL;
+ }
+
+ return 0;
+}
+
+static int config_parse_default_instance(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ UnitFileInstallInfo *i = data;
+ _cleanup_free_ char *printed = NULL;
+ int r;
+
+ assert(unit);
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ if (unit_name_is_valid(unit, UNIT_NAME_INSTANCE))
+ /* When enabling an instance, we might be using a template unit file,
+ * but we should ignore DefaultInstance silently. */
+ return 0;
+ if (!unit_name_is_valid(unit, UNIT_NAME_TEMPLATE))
+ return log_syntax(unit, LOG_WARNING, filename, line, 0,
+ "DefaultInstance= only makes sense for template units, ignoring.");
+
+ r = install_full_printf(i, rvalue, &printed);
+ if (r < 0)
+ return r;
+
+ if (isempty(printed)) {
+ i->default_instance = mfree(i->default_instance);
+ return 0;
+ }
+
+ if (!unit_instance_is_valid(printed))
+ return log_syntax(unit, LOG_WARNING, filename, line, SYNTHETIC_ERRNO(EINVAL),
+ "Invalid DefaultInstance= value \"%s\".", printed);
+
+ return free_and_replace(i->default_instance, printed);
+}
+
+static int unit_file_load(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const char *path,
+ const char *root_dir,
+ SearchFlags flags) {
+
+ const ConfigTableItem items[] = {
+ { "Install", "Alias", config_parse_alias, 0, &info->aliases },
+ { "Install", "WantedBy", config_parse_strv, 0, &info->wanted_by },
+ { "Install", "RequiredBy", config_parse_strv, 0, &info->required_by },
+ { "Install", "DefaultInstance", config_parse_default_instance, 0, info },
+ { "Install", "Also", config_parse_also, 0, c },
+ {}
+ };
+
+ UnitType type;
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int r;
+
+ assert(info);
+ assert(path);
+
+ if (!(flags & SEARCH_DROPIN)) {
+ /* Loading or checking for the main unit file… */
+
+ type = unit_name_to_type(info->name);
+ if (type < 0)
+ return -EINVAL;
+ if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE|UNIT_NAME_INSTANCE) && !unit_type_may_template(type))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: unit type %s cannot be templated, ignoring.", path, unit_type_to_string(type));
+
+ if (!(flags & SEARCH_LOAD)) {
+ if (lstat(path, &st) < 0)
+ return -errno;
+
+ if (null_or_empty(&st))
+ info->type = UNIT_FILE_TYPE_MASKED;
+ else if (S_ISREG(st.st_mode))
+ info->type = UNIT_FILE_TYPE_REGULAR;
+ else if (S_ISLNK(st.st_mode))
+ return -ELOOP;
+ else if (S_ISDIR(st.st_mode))
+ return -EISDIR;
+ else
+ return -ENOTTY;
+
+ return 0;
+ }
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+ } else {
+ /* Operating on a drop-in file. If we aren't supposed to load the unit file drop-ins don't matter, let's hence shortcut this. */
+
+ if (!(flags & SEARCH_LOAD))
+ return 0;
+
+ fd = chase_symlinks_and_open(path, root_dir, 0, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL);
+ if (fd < 0)
+ return fd;
+ }
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (null_or_empty(&st)) {
+ if ((flags & SEARCH_DROPIN) == 0)
+ info->type = UNIT_FILE_TYPE_MASKED;
+
+ return 0;
+ }
+
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+
+ f = take_fdopen(&fd, "r");
+ if (!f)
+ return -errno;
+
+ /* c is only needed if we actually load the file (it's referenced from items[] btw, in case you wonder.) */
+ assert(c);
+
+ r = config_parse(info->name, path, f,
+ "Install\0"
+ "-Unit\0"
+ "-Automount\0"
+ "-Device\0"
+ "-Mount\0"
+ "-Path\0"
+ "-Scope\0"
+ "-Service\0"
+ "-Slice\0"
+ "-Socket\0"
+ "-Swap\0"
+ "-Target\0"
+ "-Timer\0",
+ config_item_table_lookup, items,
+ 0, info,
+ NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse %s: %m", info->name);
+
+ if ((flags & SEARCH_DROPIN) == 0)
+ info->type = UNIT_FILE_TYPE_REGULAR;
+
+ return
+ (int) strv_length(info->aliases) +
+ (int) strv_length(info->wanted_by) +
+ (int) strv_length(info->required_by);
+}
+
+static int unit_file_load_or_readlink(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const char *path,
+ const char *root_dir,
+ SearchFlags flags) {
+ _cleanup_free_ char *resolved = NULL;
+ struct stat st;
+ int r;
+
+ r = unit_file_load(c, info, path, root_dir, flags);
+ if (r != -ELOOP || (flags & SEARCH_DROPIN))
+ return r;
+
+ r = chase_symlinks(path, root_dir, CHASE_WARN | CHASE_NONEXISTENT, &resolved, NULL);
+ if (r >= 0 &&
+ root_dir &&
+ path_equal_ptr(path_startswith(resolved, root_dir), "dev/null"))
+ /* When looking under root_dir, we can't expect /dev/ to be mounted,
+ * so let's see if the path is a (possibly dangling) symlink to /dev/null. */
+ info->type = UNIT_FILE_TYPE_MASKED;
+
+ else if (r > 0 &&
+ stat(resolved, &st) >= 0 &&
+ null_or_empty(&st))
+
+ info->type = UNIT_FILE_TYPE_MASKED;
+
+ else {
+ _cleanup_free_ char *target = NULL;
+ const char *bn;
+ UnitType a, b;
+
+ /* This is a symlink, let's read it. We read the link again, because last time
+ * we followed the link until resolution, and here we need to do one step. */
+
+ r = readlink_malloc(path, &target);
+ if (r < 0)
+ return r;
+
+ bn = basename(target);
+
+ if (unit_name_is_valid(info->name, UNIT_NAME_PLAIN)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_PLAIN))
+ return -EINVAL;
+
+ } else if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+
+ } else if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE)) {
+
+ if (!unit_name_is_valid(bn, UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ /* Enforce that the symlink destination does not
+ * change the unit file type. */
+
+ a = unit_name_to_type(info->name);
+ b = unit_name_to_type(bn);
+ if (a < 0 || b < 0 || a != b)
+ return -EINVAL;
+
+ if (path_is_absolute(target))
+ /* This is an absolute path, prefix the root so that we always deal with fully qualified paths */
+ info->symlink_target = path_join(root_dir, target);
+ else
+ /* This is a relative path, take it relative to the dir the symlink is located in. */
+ info->symlink_target = file_in_same_dir(path, target);
+ if (!info->symlink_target)
+ return -ENOMEM;
+
+ info->type = UNIT_FILE_TYPE_SYMLINK;
+ }
+
+ return 0;
+}
+
+static int unit_file_search(
+ InstallContext *c,
+ UnitFileInstallInfo *info,
+ const LookupPaths *paths,
+ SearchFlags flags) {
+
+ const char *dropin_dir_name = NULL, *dropin_template_dir_name = NULL;
+ _cleanup_strv_free_ char **dirs = NULL, **files = NULL;
+ _cleanup_free_ char *template = NULL;
+ bool found_unit = false;
+ int r, result;
+ char **p;
+
+ assert(info);
+ assert(paths);
+
+ /* Was this unit already loaded? */
+ if (info->type != _UNIT_FILE_TYPE_INVALID)
+ return 0;
+
+ if (info->path)
+ return unit_file_load_or_readlink(c, info, info->path, paths->root_dir, flags);
+
+ assert(info->name);
+
+ if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) {
+ r = unit_name_template(info->name, &template);
+ if (r < 0)
+ return r;
+ }
+
+ STRV_FOREACH(p, paths->search_path) {
+ _cleanup_free_ char *path = NULL;
+
+ path = path_join(*p, info->name);
+ if (!path)
+ return -ENOMEM;
+
+ r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags);
+ if (r >= 0) {
+ info->path = TAKE_PTR(path);
+ result = r;
+ found_unit = true;
+ break;
+ } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES))
+ return r;
+ }
+
+ if (!found_unit && template) {
+
+ /* Unit file doesn't exist, however instance
+ * enablement was requested. We will check if it is
+ * possible to load template unit file. */
+
+ STRV_FOREACH(p, paths->search_path) {
+ _cleanup_free_ char *path = NULL;
+
+ path = path_join(*p, template);
+ if (!path)
+ return -ENOMEM;
+
+ r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags);
+ if (r >= 0) {
+ info->path = TAKE_PTR(path);
+ result = r;
+ found_unit = true;
+ break;
+ } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES))
+ return r;
+ }
+ }
+
+ if (!found_unit)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Cannot find unit %s%s%s.",
+ info->name, template ? " or " : "", strempty(template));
+
+ if (info->type == UNIT_FILE_TYPE_MASKED)
+ return result;
+
+ /* Search for drop-in directories */
+
+ dropin_dir_name = strjoina(info->name, ".d");
+ STRV_FOREACH(p, paths->search_path) {
+ char *path;
+
+ path = path_join(*p, dropin_dir_name);
+ if (!path)
+ return -ENOMEM;
+
+ r = strv_consume(&dirs, path);
+ if (r < 0)
+ return r;
+ }
+
+ if (template) {
+ dropin_template_dir_name = strjoina(template, ".d");
+ STRV_FOREACH(p, paths->search_path) {
+ char *path;
+
+ path = path_join(*p, dropin_template_dir_name);
+ if (!path)
+ return -ENOMEM;
+
+ r = strv_consume(&dirs, path);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* Load drop-in conf files */
+
+ r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char**) dirs);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get list of conf files: %m");
+
+ STRV_FOREACH(p, files) {
+ r = unit_file_load_or_readlink(c, info, *p, paths->root_dir, flags | SEARCH_DROPIN);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to load conf file %s: %m", *p);
+ }
+
+ return result;
+}
+
+static int install_info_follow(
+ InstallContext *c,
+ UnitFileInstallInfo *i,
+ const char *root_dir,
+ SearchFlags flags,
+ bool ignore_different_name) {
+
+ assert(c);
+ assert(i);
+
+ if (i->type != UNIT_FILE_TYPE_SYMLINK)
+ return -EINVAL;
+ if (!i->symlink_target)
+ return -EINVAL;
+
+ /* If the basename doesn't match, the caller should add a
+ * complete new entry for this. */
+
+ if (!ignore_different_name && !streq(basename(i->symlink_target), i->name))
+ return -EXDEV;
+
+ free_and_replace(i->path, i->symlink_target);
+ i->type = _UNIT_FILE_TYPE_INVALID;
+
+ return unit_file_load_or_readlink(c, i, i->path, root_dir, flags);
+}
+
+/**
+ * Search for the unit file. If the unit name is a symlink, follow the symlink to the
+ * target, maybe more than once. Propagate the instance name if present.
+ */
+static int install_info_traverse(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ UnitFileInstallInfo *start,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret) {
+
+ UnitFileInstallInfo *i;
+ unsigned k = 0;
+ int r;
+
+ assert(paths);
+ assert(start);
+ assert(c);
+
+ r = unit_file_search(c, start, paths, flags);
+ if (r < 0)
+ return r;
+
+ i = start;
+ while (i->type == UNIT_FILE_TYPE_SYMLINK) {
+ /* Follow the symlink */
+
+ if (++k > UNIT_FILE_FOLLOW_SYMLINK_MAX)
+ return -ELOOP;
+
+ if (!(flags & SEARCH_FOLLOW_CONFIG_SYMLINKS)) {
+ r = path_is_config(paths, i->path, true);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return -ELOOP;
+ }
+
+ r = install_info_follow(c, i, paths->root_dir, flags, false);
+ if (r == -EXDEV) {
+ _cleanup_free_ char *buffer = NULL;
+ const char *bn;
+
+ /* Target has a different name, create a new
+ * install info object for that, and continue
+ * with that. */
+
+ bn = basename(i->symlink_target);
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_INSTANCE) &&
+ unit_name_is_valid(bn, UNIT_NAME_TEMPLATE)) {
+
+ _cleanup_free_ char *instance = NULL;
+
+ r = unit_name_to_instance(i->name, &instance);
+ if (r < 0)
+ return r;
+
+ r = unit_name_replace_instance(bn, instance, &buffer);
+ if (r < 0)
+ return r;
+
+ if (streq(buffer, i->name)) {
+
+ /* We filled in the instance, and the target stayed the same? If so, then let's
+ * honour the link as it is. */
+
+ r = install_info_follow(c, i, paths->root_dir, flags, true);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ bn = buffer;
+ }
+
+ r = install_info_add(c, bn, NULL, false, &i);
+ if (r < 0)
+ return r;
+
+ /* Try again, with the new target we found. */
+ r = unit_file_search(c, i, paths, flags);
+ if (r == -ENOENT)
+ /* Translate error code to highlight this specific case */
+ return -ENOLINK;
+ }
+
+ if (r < 0)
+ return r;
+ }
+
+ if (ret)
+ *ret = i;
+
+ return 0;
+}
+
+/**
+ * Call install_info_add() with name_or_path as the path (if name_or_path starts with "/")
+ * or the name (otherwise). root_dir is prepended to the path.
+ */
+static int install_info_add_auto(
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name_or_path,
+ UnitFileInstallInfo **ret) {
+
+ assert(c);
+ assert(name_or_path);
+
+ if (path_is_absolute(name_or_path)) {
+ const char *pp;
+
+ pp = prefix_roota(paths->root_dir, name_or_path);
+
+ return install_info_add(c, NULL, pp, false, ret);
+ } else
+ return install_info_add(c, name_or_path, NULL, false, ret);
+}
+
+static int install_info_discover(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(name);
+
+ r = install_info_add_auto(c, paths, name, &i);
+ if (r >= 0)
+ r = install_info_traverse(scope, c, paths, i, flags, ret);
+
+ if (r < 0)
+ unit_file_changes_add(changes, n_changes, r, name, NULL);
+ return r;
+}
+
+static int install_info_discover_and_check(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *name,
+ SearchFlags flags,
+ UnitFileInstallInfo **ret,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r;
+
+ r = install_info_discover(scope, c, paths, name, flags, ret, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ return install_info_may_process(ret ? *ret : NULL, paths, changes, n_changes);
+}
+
+int unit_file_verify_alias(const UnitFileInstallInfo *i, const char *dst, char **ret_dst) {
+ _cleanup_free_ char *dst_updated = NULL;
+ int r;
+
+ /* Verify that dst is a valid either a valid alias or a valid .wants/.requires symlink for the target
+ * unit *i. Return negative on error or if not compatible, zero on success.
+ *
+ * ret_dst is set in cases where "instance propagation" happens, i.e. when the instance part is
+ * inserted into dst. It is not normally set, even on success, so that the caller can easily
+ * distinguish the case where instance propagation occurred.
+ */
+
+ const char *path_alias = strrchr(dst, '/');
+ if (path_alias) {
+ /* This branch covers legacy Alias= function of creating .wants and .requires symlinks. */
+ _cleanup_free_ char *dir = NULL;
+ char *p;
+
+ path_alias ++; /* skip over slash */
+
+ dir = dirname_malloc(dst);
+ if (!dir)
+ return log_oom();
+
+ p = endswith(dir, ".wants");
+ if (!p)
+ p = endswith(dir, ".requires");
+ if (!p)
+ return log_warning_errno(SYNTHETIC_ERRNO(EXDEV),
+ "Invalid path \"%s\" in alias.", dir);
+ *p = '\0'; /* dir should now be a unit name */
+
+ r = unit_name_classify(dir);
+ if (r < 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EXDEV),
+ "Invalid unit name component \"%s\" in alias.", dir);
+
+ const bool instance_propagation = r == UNIT_NAME_TEMPLATE;
+
+ /* That's the name we want to use for verification. */
+ r = unit_symlink_name_compatible(path_alias, i->name, instance_propagation);
+ if (r < 0)
+ return log_error_errno(r, "Failed to verify alias validity: %m");
+ if (r == 0)
+ return log_warning_errno(SYNTHETIC_ERRNO(EXDEV),
+ "Invalid unit %s symlink %s.",
+ i->name, dst);
+
+ } else {
+ /* If the symlink target has an instance set and the symlink source doesn't, we "propagate
+ * the instance", i.e. instantiate the symlink source with the target instance. */
+ if (unit_name_is_valid(dst, UNIT_NAME_TEMPLATE)) {
+ _cleanup_free_ char *inst = NULL;
+
+ r = unit_name_to_instance(i->name, &inst);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract instance name from %s: %m", i->name);
+
+ if (r == UNIT_NAME_INSTANCE) {
+ r = unit_name_replace_instance(dst, inst, &dst_updated);
+ if (r < 0)
+ return log_error_errno(r, "Failed to build unit name from %s+%s: %m",
+ dst, inst);
+ }
+ }
+
+ r = unit_validate_alias_symlink_and_warn(dst_updated ?: dst, i->name);
+ if (r < 0)
+ return r;
+
+ }
+
+ *ret_dst = TAKE_PTR(dst_updated);
+ return 0;
+}
+
+static int install_info_symlink_alias(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ char **s;
+ int r = 0, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ STRV_FOREACH(s, i->aliases) {
+ _cleanup_free_ char *alias_path = NULL, *dst = NULL, *dst_updated = NULL;
+
+ q = install_full_printf(i, *s, &dst);
+ if (q < 0)
+ return q;
+
+ q = unit_file_verify_alias(i, dst, &dst_updated);
+ if (q < 0)
+ continue;
+
+ alias_path = path_make_absolute(dst_updated ?: dst, config_path);
+ if (!alias_path)
+ return -ENOMEM;
+
+ q = create_symlink(paths, i->path, alias_path, force, changes, n_changes);
+ if (r == 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int install_info_symlink_wants(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ char **list,
+ const char *suffix,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *buf = NULL;
+ const char *n;
+ char **s;
+ int r = 0, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ if (strv_isempty(list))
+ return 0;
+
+ if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) {
+ UnitFileInstallInfo instance = {
+ .type = _UNIT_FILE_TYPE_INVALID,
+ };
+ _cleanup_free_ char *path = NULL;
+
+ /* If this is a template, and we have no instance, don't do anything */
+ if (!i->default_instance)
+ return 1;
+
+ r = unit_name_replace_instance(i->name, i->default_instance, &buf);
+ if (r < 0)
+ return r;
+
+ instance.name = buf;
+ r = unit_file_search(NULL, &instance, paths, SEARCH_FOLLOW_CONFIG_SYMLINKS);
+ if (r < 0)
+ return r;
+
+ path = TAKE_PTR(instance.path);
+
+ if (instance.type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, -ERFKILL, path, NULL);
+ return -ERFKILL;
+ }
+
+ n = buf;
+ } else
+ n = i->name;
+
+ STRV_FOREACH(s, list) {
+ _cleanup_free_ char *path = NULL, *dst = NULL;
+
+ q = install_full_printf(i, *s, &dst);
+ if (q < 0)
+ return q;
+
+ if (!unit_name_is_valid(dst, UNIT_NAME_ANY)) {
+ unit_file_changes_add(changes, n_changes, -EUCLEAN, dst, NULL);
+ r = -EUCLEAN;
+ continue;
+ }
+
+ path = strjoin(config_path, "/", dst, suffix, n);
+ if (!path)
+ return -ENOMEM;
+
+ q = create_symlink(paths, i->path, path, true, changes, n_changes);
+ if (r == 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int install_info_symlink_link(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_free_ char *path = NULL;
+ int r;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+ assert(i->path);
+
+ r = in_search_path(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ path = path_join(config_path, i->name);
+ if (!path)
+ return -ENOMEM;
+
+ return create_symlink(paths, i->path, path, force, changes, n_changes);
+}
+
+static int install_info_apply(
+ UnitFileInstallInfo *i,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r, q;
+
+ assert(i);
+ assert(paths);
+ assert(config_path);
+
+ if (i->type != UNIT_FILE_TYPE_REGULAR)
+ return 0;
+
+ r = install_info_symlink_alias(i, paths, config_path, force, changes, n_changes);
+
+ q = install_info_symlink_wants(i, paths, config_path, i->wanted_by, ".wants/", changes, n_changes);
+ if (r == 0)
+ r = q;
+
+ q = install_info_symlink_wants(i, paths, config_path, i->required_by, ".requires/", changes, n_changes);
+ if (r == 0)
+ r = q;
+
+ q = install_info_symlink_link(i, paths, config_path, force, changes, n_changes);
+ /* Do not count links to the unit file towards the "carries_install_info" count */
+ if (r == 0 && q < 0)
+ r = q;
+
+ return r;
+}
+
+static int install_context_apply(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ const char *config_path,
+ bool force,
+ SearchFlags flags,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(config_path);
+
+ if (ordered_hashmap_isempty(c->will_process))
+ return 0;
+
+ r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ r = 0;
+ while ((i = ordered_hashmap_first(c->will_process))) {
+ int q;
+
+ q = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name);
+ if (q < 0)
+ return q;
+
+ q = install_info_traverse(scope, c, paths, i, flags, NULL);
+ if (q < 0) {
+ unit_file_changes_add(changes, n_changes, q, i->name, NULL);
+ return q;
+ }
+
+ /* We can attempt to process a masked unit when a different unit
+ * that we were processing specifies it in Also=. */
+ if (i->type == UNIT_FILE_TYPE_MASKED) {
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path, NULL);
+ if (r >= 0)
+ /* Assume that something *could* have been enabled here,
+ * avoid "empty [Install] section" warning. */
+ r += 1;
+ continue;
+ }
+
+ if (i->type != UNIT_FILE_TYPE_REGULAR)
+ continue;
+
+ q = install_info_apply(i, paths, config_path, force, changes, n_changes);
+ if (r >= 0) {
+ if (q < 0)
+ r = q;
+ else
+ r += q;
+ }
+ }
+
+ return r;
+}
+
+static int install_context_mark_for_removal(
+ UnitFileScope scope,
+ InstallContext *c,
+ const LookupPaths *paths,
+ Set **remove_symlinks_to,
+ const char *config_path,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ UnitFileInstallInfo *i;
+ int r;
+
+ assert(c);
+ assert(paths);
+ assert(config_path);
+
+ /* Marks all items for removal */
+
+ if (ordered_hashmap_isempty(c->will_process))
+ return 0;
+
+ r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ while ((i = ordered_hashmap_first(c->will_process))) {
+
+ r = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name);
+ if (r < 0)
+ return r;
+
+ r = install_info_traverse(scope, c, paths, i, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, NULL);
+ if (r == -ENOLINK) {
+ log_debug_errno(r, "Name %s leads to a dangling symlink, removing name.", i->name);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_DANGLING, i->path ?: i->name, NULL);
+ } else if (r == -ENOENT) {
+
+ if (i->auxiliary) /* some unit specified in Also= or similar is missing */
+ log_debug_errno(r, "Auxiliary unit of %s not found, removing name.", i->name);
+ else {
+ log_debug_errno(r, "Unit %s not found, removing name.", i->name);
+ unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL);
+ }
+
+ } else if (r < 0) {
+ log_debug_errno(r, "Failed to find unit %s, removing name: %m", i->name);
+ unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL);
+ } else if (i->type == UNIT_FILE_TYPE_MASKED) {
+ log_debug("Unit file %s is masked, ignoring.", i->name);
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path ?: i->name, NULL);
+ continue;
+ } else if (i->type != UNIT_FILE_TYPE_REGULAR) {
+ log_debug("Unit %s has type %s, ignoring.", i->name, unit_file_type_to_string(i->type) ?: "invalid");
+ continue;
+ }
+
+ r = mark_symlink_for_removal(remove_symlinks_to, i->name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int unit_file_mask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *path = NULL;
+ int q;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) {
+ if (r == 0)
+ r = -EINVAL;
+ continue;
+ }
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ q = create_symlink(&paths, "/dev/null", path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+int unit_file_unmask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ const char *config_path;
+ char **i;
+ bool dry_run;
+ int r, q;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ dry_run = !!(flags & UNIT_FILE_DRY_RUN);
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *path = NULL;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ r = null_or_empty_path(path);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo] = strdup(*i);
+ if (!todo[n_todo])
+ return -ENOMEM;
+
+ n_todo++;
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_free_ char *path = NULL;
+ const char *rp;
+
+ path = path_make_absolute(*i, config_path);
+ if (!path)
+ return -ENOMEM;
+
+ if (!dry_run && unlink(path) < 0) {
+ if (errno != ENOENT) {
+ if (r >= 0)
+ r = -errno;
+ unit_file_changes_add(changes, n_changes, -errno, path, NULL);
+ }
+
+ continue;
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, path, NULL);
+
+ rp = skip_root(&paths, path);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: path);
+ if (q < 0)
+ return q;
+ }
+
+ q = remove_marked_symlinks(remove_symlinks_to, config_path, &paths, dry_run, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ return r;
+}
+
+int unit_file_link(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ const char *config_path;
+ char **i;
+ int r, q;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ _cleanup_free_ char *full = NULL;
+ struct stat st;
+ char *fn;
+
+ if (!path_is_absolute(*i))
+ return -EINVAL;
+
+ fn = basename(*i);
+ if (!unit_name_is_valid(fn, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ full = path_join(paths.root_dir, *i);
+ if (!full)
+ return -ENOMEM;
+
+ if (lstat(full, &st) < 0)
+ return -errno;
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+
+ q = in_search_path(&paths, *i);
+ if (q < 0)
+ return q;
+ if (q > 0)
+ continue;
+
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo] = strdup(*i);
+ if (!todo[n_todo])
+ return -ENOMEM;
+
+ n_todo++;
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_free_ char *new_path = NULL;
+
+ new_path = path_make_absolute(basename(*i), config_path);
+ if (!new_path)
+ return -ENOMEM;
+
+ q = create_symlink(&paths, *i, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+static int path_shall_revert(const LookupPaths *paths, const char *path) {
+ int r;
+
+ assert(paths);
+ assert(path);
+
+ /* Checks whether the path is one where the drop-in directories shall be removed. */
+
+ r = path_is_config(paths, path, true);
+ if (r != 0)
+ return r;
+
+ r = path_is_control(paths, path);
+ if (r != 0)
+ return r;
+
+ return path_is_transient(paths, path);
+}
+
+int unit_file_revert(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_strv_free_ char **todo = NULL;
+ size_t n_todo = 0, n_allocated = 0;
+ char **i;
+ int r, q;
+
+ /* Puts a unit file back into vendor state. This means:
+ *
+ * a) we remove all drop-in snippets added by the user ("config"), add to transient units ("transient"), and
+ * added via "systemctl set-property" ("control"), but not if the drop-in is generated ("generated").
+ *
+ * c) if there's a vendor unit file (i.e. one in /usr) we remove any configured overriding unit files (i.e. in
+ * "config", but not in "transient" or "control" or even "generated").
+ *
+ * We remove all that in both the runtime and the persistent directories, if that applies.
+ */
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, files) {
+ bool has_vendor = false;
+ char **p;
+
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ STRV_FOREACH(p, paths.search_path) {
+ _cleanup_free_ char *path = NULL, *dropin = NULL;
+ struct stat st;
+
+ path = path_make_absolute(*i, *p);
+ if (!path)
+ return -ENOMEM;
+
+ r = lstat(path, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISREG(st.st_mode)) {
+ /* Check if there's a vendor version */
+ r = path_is_vendor_or_generator(&paths, path);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ has_vendor = true;
+ }
+
+ dropin = strjoin(path, ".d");
+ if (!dropin)
+ return -ENOMEM;
+
+ r = lstat(dropin, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISDIR(st.st_mode)) {
+ /* Remove the drop-ins */
+ r = path_shall_revert(&paths, dropin);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo++] = TAKE_PTR(dropin);
+ }
+ }
+ }
+
+ if (!has_vendor)
+ continue;
+
+ /* OK, there's a vendor version, hence drop all configuration versions */
+ STRV_FOREACH(p, paths.search_path) {
+ _cleanup_free_ char *path = NULL;
+ struct stat st;
+
+ path = path_make_absolute(*i, *p);
+ if (!path)
+ return -ENOMEM;
+
+ r = lstat(path, &st);
+ if (r < 0) {
+ if (errno != ENOENT)
+ return -errno;
+ } else if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
+ r = path_is_config(&paths, path, true);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2))
+ return -ENOMEM;
+
+ todo[n_todo++] = TAKE_PTR(path);
+ }
+ }
+ }
+ }
+
+ strv_uniq(todo);
+
+ r = 0;
+ STRV_FOREACH(i, todo) {
+ _cleanup_strv_free_ char **fs = NULL;
+ const char *rp;
+ char **j;
+
+ (void) get_files_in_directory(*i, &fs);
+
+ q = rm_rf(*i, REMOVE_ROOT|REMOVE_PHYSICAL);
+ if (q < 0 && q != -ENOENT && r >= 0) {
+ r = q;
+ continue;
+ }
+
+ STRV_FOREACH(j, fs) {
+ _cleanup_free_ char *t = NULL;
+
+ t = path_join(*i, *j);
+ if (!t)
+ return -ENOMEM;
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, t, NULL);
+ }
+
+ unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, *i, NULL);
+
+ rp = skip_root(&paths, *i);
+ q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: *i);
+ if (q < 0)
+ return q;
+ }
+
+ q = remove_marked_symlinks(remove_symlinks_to, paths.runtime_config, &paths, false, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ q = remove_marked_symlinks(remove_symlinks_to, paths.persistent_config, &paths, false, changes, n_changes);
+ if (r >= 0)
+ r = q;
+
+ return r;
+}
+
+int unit_file_add_dependency(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ const char *target,
+ UnitDependency dep,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i, *target_info;
+ const char *config_path;
+ char **f;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(target);
+
+ if (!IN_SET(dep, UNIT_WANTS, UNIT_REQUIRES))
+ return -EINVAL;
+
+ if (!unit_name_is_valid(target, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = install_info_discover_and_check(scope, &c, &paths, target, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &target_info, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(target_info->type == UNIT_FILE_TYPE_REGULAR);
+
+ STRV_FOREACH(f, files) {
+ char ***l;
+
+ r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(i->type == UNIT_FILE_TYPE_REGULAR);
+
+ /* We didn't actually load anything from the unit
+ * file, but instead just add in our new symlink to
+ * create. */
+
+ if (dep == UNIT_WANTS)
+ l = &i->wanted_by;
+ else
+ l = &i->required_by;
+
+ strv_free(*l);
+ *l = strv_new(target_info->name);
+ if (!*l)
+ return -ENOMEM;
+ }
+
+ return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_FOLLOW_CONFIG_SYMLINKS, changes, n_changes);
+}
+
+int unit_file_enable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ const char *config_path;
+ UnitFileInstallInfo *i;
+ char **f;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = config_path_from_flags(&paths, flags);
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(f, files) {
+ r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ assert(i->type == UNIT_FILE_TYPE_REGULAR);
+ }
+
+ /* This will return the number of symlink rules that were
+ supposed to be created, not the ones actually created. This
+ is useful to determine whether the passed files had any
+ installation data at all. */
+
+ return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_LOAD, changes, n_changes);
+}
+
+int unit_file_disable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = config_path_from_flags(&paths, flags);
+ if (!config_path)
+ return -ENXIO;
+
+ STRV_FOREACH(i, files) {
+ if (!unit_name_is_valid(*i, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_add(&c, *i, NULL, false, NULL);
+ if (r < 0)
+ return r;
+ }
+
+ r = install_context_mark_for_removal(scope, &c, &paths, &remove_symlinks_to, config_path, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ return remove_marked_symlinks(remove_symlinks_to, config_path, &paths, !!(flags & UNIT_FILE_DRY_RUN), changes, n_changes);
+}
+
+int unit_file_reenable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ char **n;
+ int r;
+ size_t l, i;
+
+ /* First, we invoke the disable command with only the basename... */
+ l = strv_length(files);
+ n = newa(char*, l+1);
+ for (i = 0; i < l; i++)
+ n[i] = basename(files[i]);
+ n[i] = NULL;
+
+ r = unit_file_disable(scope, flags, root_dir, n, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ /* But the enable command with the full name */
+ return unit_file_enable(scope, flags, root_dir, files, changes, n_changes);
+}
+
+int unit_file_set_default(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ const char *name,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ const char *new_path;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ if (unit_name_to_type(name) != UNIT_TARGET) /* this also validates the name */
+ return -EINVAL;
+ if (streq(name, SPECIAL_DEFAULT_TARGET))
+ return -EINVAL;
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ r = install_info_discover_and_check(scope, &c, &paths, name, 0, &i, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ new_path = strjoina(paths.persistent_config, "/" SPECIAL_DEFAULT_TARGET);
+ return create_symlink(&paths, i->path, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+int unit_file_get_default(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **name) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ char *n;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ r = install_info_discover(scope, &c, &paths, SPECIAL_DEFAULT_TARGET, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, NULL, NULL);
+ if (r < 0)
+ return r;
+ r = install_info_may_process(i, &paths, NULL, 0);
+ if (r < 0)
+ return r;
+
+ n = strdup(i->name);
+ if (!n)
+ return -ENOMEM;
+
+ *name = n;
+ return 0;
+}
+
+int unit_file_lookup_state(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const char *name,
+ UnitFileState *ret) {
+
+ _cleanup_(install_context_done) InstallContext c = {};
+ UnitFileInstallInfo *i;
+ UnitFileState state;
+ int r;
+
+ assert(paths);
+ assert(name);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_discover(scope, &c, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, NULL, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to discover unit %s: %m", name);
+
+ assert(IN_SET(i->type, UNIT_FILE_TYPE_REGULAR, UNIT_FILE_TYPE_MASKED));
+ log_debug("Found unit %s at %s (%s)", name, strna(i->path),
+ i->type == UNIT_FILE_TYPE_REGULAR ? "regular file" : "mask");
+
+ /* Shortcut things, if the caller just wants to know if this unit exists. */
+ if (!ret)
+ return 0;
+
+ switch (i->type) {
+
+ case UNIT_FILE_TYPE_MASKED:
+ r = path_is_runtime(paths, i->path, true);
+ if (r < 0)
+ return r;
+
+ state = r > 0 ? UNIT_FILE_MASKED_RUNTIME : UNIT_FILE_MASKED;
+ break;
+
+ case UNIT_FILE_TYPE_REGULAR:
+ /* Check if the name we were querying is actually an alias */
+ if (!streq(name, basename(i->path)) && !unit_name_is_valid(i->name, UNIT_NAME_INSTANCE)) {
+ state = UNIT_FILE_ALIAS;
+ break;
+ }
+
+ r = path_is_generator(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ state = UNIT_FILE_GENERATED;
+ break;
+ }
+
+ r = path_is_transient(paths, i->path);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ state = UNIT_FILE_TRANSIENT;
+ break;
+ }
+
+ /* Check if any of the Alias= symlinks have been created.
+ * We ignore other aliases, and only check those that would
+ * be created by systemctl enable for this unit. */
+ r = find_symlinks_in_scope(scope, paths, i, true, &state);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ break;
+
+ /* Check if the file is known under other names. If it is,
+ * it might be in use. Report that as UNIT_FILE_INDIRECT. */
+ r = find_symlinks_in_scope(scope, paths, i, false, &state);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ state = UNIT_FILE_INDIRECT;
+ else {
+ if (unit_file_install_info_has_rules(i))
+ state = UNIT_FILE_DISABLED;
+ else if (unit_file_install_info_has_also(i))
+ state = UNIT_FILE_INDIRECT;
+ else
+ state = UNIT_FILE_STATIC;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected unit file type.");
+ }
+
+ *ret = state;
+ return 0;
+}
+
+int unit_file_get_state(
+ UnitFileScope scope,
+ const char *root_dir,
+ const char *name,
+ UnitFileState *ret) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(name);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ return unit_file_lookup_state(scope, &paths, name, ret);
+}
+
+int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name) {
+ _cleanup_(install_context_done) InstallContext c = {};
+ int r;
+
+ assert(paths);
+ assert(name);
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ r = install_info_discover(scope, &c, paths, name, 0, NULL, NULL, NULL);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int split_pattern_into_name_and_instances(const char *pattern, char **out_unit_name, char ***out_instances) {
+ _cleanup_strv_free_ char **instances = NULL;
+ _cleanup_free_ char *unit_name = NULL;
+ int r;
+
+ assert(pattern);
+ assert(out_instances);
+ assert(out_unit_name);
+
+ r = extract_first_word(&pattern, &unit_name, NULL, EXTRACT_RETAIN_ESCAPE);
+ if (r < 0)
+ return r;
+
+ /* We handle the instances logic when unit name is extracted */
+ if (pattern) {
+ /* We only create instances when a rule of templated unit
+ * is seen. A rule like enable foo@.service a b c will
+ * result in an array of (a, b, c) as instance names */
+ if (!unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE))
+ return -EINVAL;
+
+ instances = strv_split(pattern, WHITESPACE);
+ if (!instances)
+ return -ENOMEM;
+
+ *out_instances = TAKE_PTR(instances);
+ }
+
+ *out_unit_name = TAKE_PTR(unit_name);
+
+ return 0;
+}
+
+static int presets_find_config(UnitFileScope scope, const char *root_dir, char ***files) {
+ static const char* const system_dirs[] = {CONF_PATHS("systemd/system-preset"), NULL};
+ static const char* const user_dirs[] = {CONF_PATHS_USR("systemd/user-preset"), NULL};
+ const char* const* dirs;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+
+ if (scope == UNIT_FILE_SYSTEM)
+ dirs = system_dirs;
+ else if (IN_SET(scope, UNIT_FILE_GLOBAL, UNIT_FILE_USER))
+ dirs = user_dirs;
+ else
+ assert_not_reached("Invalid unit file scope");
+
+ return conf_files_list_strv(files, ".preset", root_dir, 0, dirs);
+}
+
+static int read_presets(UnitFileScope scope, const char *root_dir, UnitFilePresets *presets) {
+ _cleanup_(unit_file_presets_freep) UnitFilePresets ps = {};
+ size_t n_allocated = 0;
+ _cleanup_strv_free_ char **files = NULL;
+ char **p;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(presets);
+
+ r = presets_find_config(scope, root_dir, &files);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(p, files) {
+ _cleanup_fclose_ FILE *f;
+ int n = 0;
+
+ f = fopen(*p, "re");
+ if (!f) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ UnitFilePresetRule rule = {};
+ const char *parameter;
+ char *l;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ l = strstrip(line);
+ n++;
+
+ if (isempty(l))
+ continue;
+ if (strchr(COMMENTS, *l))
+ continue;
+
+ parameter = first_word(l, "enable");
+ if (parameter) {
+ char *unit_name;
+ char **instances = NULL;
+
+ /* Unit_name will remain the same as parameter when no instances are specified */
+ r = split_pattern_into_name_and_instances(parameter, &unit_name, &instances);
+ if (r < 0) {
+ log_syntax(NULL, LOG_WARNING, *p, n, r, "Couldn't parse line '%s'. Ignoring.", line);
+ continue;
+ }
+
+ rule = (UnitFilePresetRule) {
+ .pattern = unit_name,
+ .action = PRESET_ENABLE,
+ .instances = instances,
+ };
+ }
+
+ parameter = first_word(l, "disable");
+ if (parameter) {
+ char *pattern;
+
+ pattern = strdup(parameter);
+ if (!pattern)
+ return -ENOMEM;
+
+ rule = (UnitFilePresetRule) {
+ .pattern = pattern,
+ .action = PRESET_DISABLE,
+ };
+ }
+
+ if (rule.action) {
+ if (!GREEDY_REALLOC(ps.rules, n_allocated, ps.n_rules + 1))
+ return -ENOMEM;
+
+ ps.rules[ps.n_rules++] = rule;
+ continue;
+ }
+
+ log_syntax(NULL, LOG_WARNING, *p, n, 0, "Couldn't parse line '%s'. Ignoring.", line);
+ }
+ }
+
+ ps.initialized = true;
+ *presets = ps;
+ ps = (UnitFilePresets){};
+
+ return 0;
+}
+
+static int pattern_match_multiple_instances(
+ const UnitFilePresetRule rule,
+ const char *unit_name,
+ char ***ret) {
+
+ _cleanup_free_ char *templated_name = NULL;
+ int r;
+
+ /* If no ret is needed or the rule itself does not have instances
+ * initialized, we return not matching */
+ if (!ret || !rule.instances)
+ return 0;
+
+ r = unit_name_template(unit_name, &templated_name);
+ if (r < 0)
+ return r;
+ if (!streq(rule.pattern, templated_name))
+ return 0;
+
+ /* Compose a list of specified instances when unit name is a template */
+ if (unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) {
+ _cleanup_strv_free_ char **out_strv = NULL;
+
+ char **iter;
+ STRV_FOREACH(iter, rule.instances) {
+ _cleanup_free_ char *name = NULL;
+
+ r = unit_name_replace_instance(unit_name, *iter, &name);
+ if (r < 0)
+ return r;
+
+ r = strv_consume(&out_strv, TAKE_PTR(name));
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(out_strv);
+ return 1;
+ } else {
+ /* We now know the input unit name is an instance name */
+ _cleanup_free_ char *instance_name = NULL;
+
+ r = unit_name_to_instance(unit_name, &instance_name);
+ if (r < 0)
+ return r;
+
+ if (strv_find(rule.instances, instance_name))
+ return 1;
+ }
+ return 0;
+}
+
+static int query_presets(const char *name, const UnitFilePresets *presets, char ***instance_name_list) {
+ PresetAction action = PRESET_UNKNOWN;
+
+ if (!unit_name_is_valid(name, UNIT_NAME_ANY))
+ return -EINVAL;
+
+ for (size_t i = 0; i < presets->n_rules; i++)
+ if (pattern_match_multiple_instances(presets->rules[i], name, instance_name_list) > 0 ||
+ fnmatch(presets->rules[i].pattern, name, FNM_NOESCAPE) == 0) {
+ action = presets->rules[i].action;
+ break;
+ }
+
+ switch (action) {
+ case PRESET_UNKNOWN:
+ log_debug("Preset files don't specify rule for %s. Enabling.", name);
+ return 1;
+ case PRESET_ENABLE:
+ if (instance_name_list && *instance_name_list) {
+ char **s;
+ STRV_FOREACH(s, *instance_name_list)
+ log_debug("Preset files say enable %s.", *s);
+ } else
+ log_debug("Preset files say enable %s.", name);
+ return 1;
+ case PRESET_DISABLE:
+ log_debug("Preset files say disable %s.", name);
+ return 0;
+ default:
+ assert_not_reached("invalid preset action");
+ }
+}
+
+int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name, UnitFilePresets *cached) {
+ _cleanup_(unit_file_presets_freep) UnitFilePresets tmp = {};
+ int r;
+
+ if (!cached)
+ cached = &tmp;
+ if (!cached->initialized) {
+ r = read_presets(scope, root_dir, cached);
+ if (r < 0)
+ return r;
+ }
+
+ return query_presets(name, cached, NULL);
+}
+
+static int execute_preset(
+ UnitFileScope scope,
+ InstallContext *plus,
+ InstallContext *minus,
+ const LookupPaths *paths,
+ const char *config_path,
+ char **files,
+ UnitFilePresetMode mode,
+ bool force,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ int r;
+
+ assert(plus);
+ assert(minus);
+ assert(paths);
+ assert(config_path);
+
+ if (mode != UNIT_FILE_PRESET_ENABLE_ONLY) {
+ _cleanup_set_free_free_ Set *remove_symlinks_to = NULL;
+
+ r = install_context_mark_for_removal(scope, minus, paths, &remove_symlinks_to, config_path, changes, n_changes);
+ if (r < 0)
+ return r;
+
+ r = remove_marked_symlinks(remove_symlinks_to, config_path, paths, false, changes, n_changes);
+ } else
+ r = 0;
+
+ if (mode != UNIT_FILE_PRESET_DISABLE_ONLY) {
+ int q;
+
+ /* Returns number of symlinks that where supposed to be installed. */
+ q = install_context_apply(scope, plus, paths, config_path, force, SEARCH_LOAD, changes, n_changes);
+ if (r >= 0) {
+ if (q < 0)
+ r = q;
+ else
+ r += q;
+ }
+ }
+
+ return r;
+}
+
+static int preset_prepare_one(
+ UnitFileScope scope,
+ InstallContext *plus,
+ InstallContext *minus,
+ LookupPaths *paths,
+ const char *name,
+ const UnitFilePresets *presets,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext tmp = {};
+ _cleanup_strv_free_ char **instance_name_list = NULL;
+ UnitFileInstallInfo *i;
+ int r;
+
+ if (install_info_find(plus, name) || install_info_find(minus, name))
+ return 0;
+
+ r = install_info_discover(scope, &tmp, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ if (!streq(name, i->name)) {
+ log_debug("Skipping %s because it is an alias for %s.", name, i->name);
+ return 0;
+ }
+
+ r = query_presets(name, presets, &instance_name_list);
+ if (r < 0)
+ return r;
+
+ if (r > 0) {
+ if (instance_name_list) {
+ char **s;
+ STRV_FOREACH(s, instance_name_list) {
+ r = install_info_discover_and_check(scope, plus, paths, *s, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ r = install_info_discover_and_check(scope, plus, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+
+ } else
+ r = install_info_discover(scope, minus, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS,
+ &i, changes, n_changes);
+
+ return r;
+}
+
+int unit_file_preset(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext plus = {}, minus = {};
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(unit_file_presets_freep) UnitFilePresets presets = {};
+ const char *config_path;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(mode < _UNIT_FILE_PRESET_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = read_presets(scope, root_dir, &presets);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, files) {
+ r = preset_prepare_one(scope, &plus, &minus, &paths, *i, &presets, changes, n_changes);
+ if (r < 0)
+ return r;
+ }
+
+ return execute_preset(scope, &plus, &minus, &paths, config_path, files, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+int unit_file_preset_all(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes) {
+
+ _cleanup_(install_context_done) InstallContext plus = {}, minus = {};
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ _cleanup_(unit_file_presets_freep) UnitFilePresets presets = {};
+ const char *config_path = NULL;
+ char **i;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(mode < _UNIT_FILE_PRESET_MAX);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config;
+ if (!config_path)
+ return -ENXIO;
+
+ r = read_presets(scope, root_dir, &presets);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(i, paths.search_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*i);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ dirent_ensure_type(d, de);
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ /* we don't pass changes[] in, because we want to handle errors on our own */
+ r = preset_prepare_one(scope, &plus, &minus, &paths, de->d_name, &presets, NULL, 0);
+ if (r == -ERFKILL)
+ r = unit_file_changes_add(changes, n_changes,
+ UNIT_FILE_IS_MASKED, de->d_name, NULL);
+ else if (r == -ENOLINK)
+ r = unit_file_changes_add(changes, n_changes,
+ UNIT_FILE_IS_DANGLING, de->d_name, NULL);
+ else if (r == -EADDRNOTAVAIL) /* Ignore generated/transient units when applying preset */
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return execute_preset(scope, &plus, &minus, &paths, config_path, NULL, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes);
+}
+
+static void unit_file_list_free_one(UnitFileList *f) {
+ if (!f)
+ return;
+
+ free(f->path);
+ free(f);
+}
+
+Hashmap* unit_file_list_free(Hashmap *h) {
+ return hashmap_free_with_destructor(h, unit_file_list_free_one);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(UnitFileList*, unit_file_list_free_one);
+
+int unit_file_get_list(
+ UnitFileScope scope,
+ const char *root_dir,
+ Hashmap *h,
+ char **states,
+ char **patterns) {
+
+ _cleanup_(lookup_paths_free) LookupPaths paths = {};
+ char **dirname;
+ int r;
+
+ assert(scope >= 0);
+ assert(scope < _UNIT_FILE_SCOPE_MAX);
+ assert(h);
+
+ r = lookup_paths_init(&paths, scope, 0, root_dir);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(dirname, paths.search_path) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(*dirname);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+ if (IN_SET(errno, ENOTDIR, EACCES)) {
+ log_debug_errno(errno, "Failed to open \"%s\": %m", *dirname);
+ continue;
+ }
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_(unit_file_list_free_onep) UnitFileList *f = NULL;
+
+ if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY))
+ continue;
+
+ if (!strv_fnmatch_or_empty(patterns, de->d_name, FNM_NOESCAPE))
+ continue;
+
+ if (hashmap_get(h, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+
+ if (!IN_SET(de->d_type, DT_LNK, DT_REG))
+ continue;
+
+ f = new0(UnitFileList, 1);
+ if (!f)
+ return -ENOMEM;
+
+ f->path = path_make_absolute(de->d_name, *dirname);
+ if (!f->path)
+ return -ENOMEM;
+
+ r = unit_file_lookup_state(scope, &paths, de->d_name, &f->state);
+ if (r < 0)
+ f->state = UNIT_FILE_BAD;
+
+ if (!strv_isempty(states) &&
+ !strv_contains(states, unit_file_state_to_string(f->state)))
+ continue;
+
+ r = hashmap_put(h, basename(f->path), f);
+ if (r < 0)
+ return r;
+
+ f = NULL; /* prevent cleanup */
+ }
+ }
+
+ return 0;
+}
+
+static const char* const unit_file_state_table[_UNIT_FILE_STATE_MAX] = {
+ [UNIT_FILE_ENABLED] = "enabled",
+ [UNIT_FILE_ENABLED_RUNTIME] = "enabled-runtime",
+ [UNIT_FILE_LINKED] = "linked",
+ [UNIT_FILE_LINKED_RUNTIME] = "linked-runtime",
+ [UNIT_FILE_ALIAS] = "alias",
+ [UNIT_FILE_MASKED] = "masked",
+ [UNIT_FILE_MASKED_RUNTIME] = "masked-runtime",
+ [UNIT_FILE_STATIC] = "static",
+ [UNIT_FILE_DISABLED] = "disabled",
+ [UNIT_FILE_INDIRECT] = "indirect",
+ [UNIT_FILE_GENERATED] = "generated",
+ [UNIT_FILE_TRANSIENT] = "transient",
+ [UNIT_FILE_BAD] = "bad",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_state, UnitFileState);
+
+static const char* const unit_file_change_type_table[_UNIT_FILE_CHANGE_TYPE_MAX] = {
+ [UNIT_FILE_SYMLINK] = "symlink",
+ [UNIT_FILE_UNLINK] = "unlink",
+ [UNIT_FILE_IS_MASKED] = "masked",
+ [UNIT_FILE_IS_DANGLING] = "dangling",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_change_type, UnitFileChangeType);
+
+static const char* const unit_file_preset_mode_table[_UNIT_FILE_PRESET_MAX] = {
+ [UNIT_FILE_PRESET_FULL] = "full",
+ [UNIT_FILE_PRESET_ENABLE_ONLY] = "enable-only",
+ [UNIT_FILE_PRESET_DISABLE_ONLY] = "disable-only",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_file_preset_mode, UnitFilePresetMode);
diff --git a/src/shared/install.h b/src/shared/install.h
new file mode 100644
index 0000000..84bf1f5
--- /dev/null
+++ b/src/shared/install.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef enum UnitFilePresetMode UnitFilePresetMode;
+typedef enum UnitFileChangeType UnitFileChangeType;
+typedef enum UnitFileFlags UnitFileFlags;
+typedef enum UnitFileType UnitFileType;
+typedef struct UnitFileChange UnitFileChange;
+typedef struct UnitFileList UnitFileList;
+typedef struct UnitFileInstallInfo UnitFileInstallInfo;
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "macro.h"
+#include "path-lookup.h"
+#include "strv.h"
+#include "unit-name.h"
+
+enum UnitFilePresetMode {
+ UNIT_FILE_PRESET_FULL,
+ UNIT_FILE_PRESET_ENABLE_ONLY,
+ UNIT_FILE_PRESET_DISABLE_ONLY,
+ _UNIT_FILE_PRESET_MAX,
+ _UNIT_FILE_PRESET_INVALID = -1
+};
+
+enum UnitFileChangeType {
+ UNIT_FILE_SYMLINK,
+ UNIT_FILE_UNLINK,
+ UNIT_FILE_IS_MASKED,
+ UNIT_FILE_IS_DANGLING,
+ _UNIT_FILE_CHANGE_TYPE_MAX,
+ _UNIT_FILE_CHANGE_TYPE_INVALID = INT_MIN
+};
+
+enum UnitFileFlags {
+ UNIT_FILE_RUNTIME = 1 << 0, /* Public API via DBUS, do not change */
+ UNIT_FILE_FORCE = 1 << 1, /* Public API via DBUS, do not change */
+ UNIT_FILE_PORTABLE = 1 << 2, /* Public API via DBUS, do not change */
+ UNIT_FILE_DRY_RUN = 1 << 3,
+ _UNIT_FILE_FLAGS_MASK_PUBLIC = UNIT_FILE_RUNTIME|UNIT_FILE_PORTABLE|UNIT_FILE_FORCE,
+};
+
+/* type can either one of the UnitFileChangeTypes listed above, or a negative error.
+ * If source is specified, it should be the contents of the path symlink.
+ * In case of an error, source should be the existing symlink contents or NULL
+ */
+struct UnitFileChange {
+ int type; /* UnitFileChangeType or bust */
+ char *path;
+ char *source;
+};
+
+static inline bool unit_file_changes_have_modification(const UnitFileChange* changes, size_t n_changes) {
+ size_t i;
+ for (i = 0; i < n_changes; i++)
+ if (IN_SET(changes[i].type, UNIT_FILE_SYMLINK, UNIT_FILE_UNLINK))
+ return true;
+ return false;
+}
+
+struct UnitFileList {
+ char *path;
+ UnitFileState state;
+};
+
+enum UnitFileType {
+ UNIT_FILE_TYPE_REGULAR,
+ UNIT_FILE_TYPE_SYMLINK,
+ UNIT_FILE_TYPE_MASKED,
+ _UNIT_FILE_TYPE_MAX,
+ _UNIT_FILE_TYPE_INVALID = -1,
+};
+
+struct UnitFileInstallInfo {
+ char *name;
+ char *path;
+
+ char **aliases;
+ char **wanted_by;
+ char **required_by;
+ char **also;
+
+ char *default_instance;
+ char *symlink_target;
+
+ UnitFileType type;
+ bool auxiliary;
+};
+
+int unit_file_enable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_disable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_reenable(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_preset(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_preset_all(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ UnitFilePresetMode mode,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_mask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_unmask(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_link(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_revert(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **files,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_set_default(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ const char *file,
+ UnitFileChange **changes,
+ size_t *n_changes);
+int unit_file_get_default(
+ UnitFileScope scope,
+ const char *root_dir,
+ char **name);
+int unit_file_add_dependency(
+ UnitFileScope scope,
+ UnitFileFlags flags,
+ const char *root_dir,
+ char **files,
+ const char *target,
+ UnitDependency dep,
+ UnitFileChange **changes,
+ size_t *n_changes);
+
+int unit_file_lookup_state(
+ UnitFileScope scope,
+ const LookupPaths *paths,
+ const char *name,
+ UnitFileState *ret);
+
+int unit_file_get_state(UnitFileScope scope, const char *root_dir, const char *filename, UnitFileState *ret);
+int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name);
+
+int unit_file_get_list(UnitFileScope scope, const char *root_dir, Hashmap *h, char **states, char **patterns);
+Hashmap* unit_file_list_free(Hashmap *h);
+
+int unit_file_changes_add(UnitFileChange **changes, size_t *n_changes, int type, const char *path, const char *source);
+void unit_file_changes_free(UnitFileChange *changes, size_t n_changes);
+void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet);
+
+int unit_file_verify_alias(const UnitFileInstallInfo *i, const char *dst, char **ret_dst);
+
+typedef struct UnitFilePresetRule UnitFilePresetRule;
+
+typedef struct {
+ UnitFilePresetRule *rules;
+ size_t n_rules;
+ bool initialized;
+} UnitFilePresets;
+
+void unit_file_presets_freep(UnitFilePresets *p);
+int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name, UnitFilePresets *cached);
+
+const char *unit_file_state_to_string(UnitFileState s) _const_;
+UnitFileState unit_file_state_from_string(const char *s) _pure_;
+/* from_string conversion is unreliable because of the overlap between -EPERM and -1 for error. */
+
+const char *unit_file_change_type_to_string(UnitFileChangeType s) _const_;
+UnitFileChangeType unit_file_change_type_from_string(const char *s) _pure_;
+
+const char *unit_file_preset_mode_to_string(UnitFilePresetMode m) _const_;
+UnitFilePresetMode unit_file_preset_mode_from_string(const char *s) _pure_;
diff --git a/src/shared/ip-protocol-list.c b/src/shared/ip-protocol-list.c
new file mode 100644
index 0000000..0623d5e
--- /dev/null
+++ b/src/shared/ip-protocol-list.c
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <netinet/in.h>
+
+#include "alloc-util.h"
+#include "ip-protocol-list.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+static const struct ip_protocol_name* lookup_ip_protocol(register const char *str, register GPERF_LEN_TYPE len);
+
+#include "ip-protocol-from-name.h"
+#include "ip-protocol-to-name.h"
+
+const char *ip_protocol_to_name(int id) {
+
+ if (id < 0)
+ return NULL;
+
+ if ((size_t) id >= ELEMENTSOF(ip_protocol_names))
+ return NULL;
+
+ return ip_protocol_names[id];
+}
+
+int ip_protocol_from_name(const char *name) {
+ const struct ip_protocol_name *sc;
+
+ assert(name);
+
+ sc = lookup_ip_protocol(name, strlen(name));
+ if (!sc)
+ return -EINVAL;
+
+ return sc->id;
+}
+
+int parse_ip_protocol(const char *s) {
+ _cleanup_free_ char *str = NULL;
+ int i, r;
+
+ assert(s);
+
+ if (isempty(s))
+ return IPPROTO_IP;
+
+ /* Do not use strdupa() here, as the input string may come from *
+ * command line or config files. */
+ str = strdup(s);
+ if (!str)
+ return -ENOMEM;
+
+ i = ip_protocol_from_name(ascii_strlower(str));
+ if (i >= 0)
+ return i;
+
+ r = safe_atoi(str, &i);
+ if (r < 0)
+ return r;
+
+ if (!ip_protocol_to_name(i))
+ return -EINVAL;
+
+ return i;
+}
diff --git a/src/shared/ip-protocol-list.h b/src/shared/ip-protocol-list.h
new file mode 100644
index 0000000..abe3f5f
--- /dev/null
+++ b/src/shared/ip-protocol-list.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+const char *ip_protocol_to_name(int id);
+int ip_protocol_from_name(const char *name);
+int parse_ip_protocol(const char *s);
diff --git a/src/shared/ip-protocol-to-name.awk b/src/shared/ip-protocol-to-name.awk
new file mode 100644
index 0000000..824f811
--- /dev/null
+++ b/src/shared/ip-protocol-to-name.awk
@@ -0,0 +1,9 @@
+BEGIN{
+ print "static const char* const ip_protocol_names[] = { "
+}
+!/HOPOPTS/ {
+ printf " [IPPROTO_%s] = \"%s\",\n", $1, tolower($1)
+}
+END{
+ print "};"
+}
diff --git a/src/shared/ipvlan-util.c b/src/shared/ipvlan-util.c
new file mode 100644
index 0000000..1f2e2ff
--- /dev/null
+++ b/src/shared/ipvlan-util.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <net/if.h>
+
+#include "ipvlan-util.h"
+#include "string-table.h"
+
+static const char* const ipvlan_mode_table[_NETDEV_IPVLAN_MODE_MAX] = {
+ [NETDEV_IPVLAN_MODE_L2] = "L2",
+ [NETDEV_IPVLAN_MODE_L3] = "L3",
+ [NETDEV_IPVLAN_MODE_L3S] = "L3S",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ipvlan_mode, IPVlanMode);
+
+static const char* const ipvlan_flags_table[_NETDEV_IPVLAN_FLAGS_MAX] = {
+ [NETDEV_IPVLAN_FLAGS_BRIGDE] = "bridge",
+ [NETDEV_IPVLAN_FLAGS_PRIVATE] = "private",
+ [NETDEV_IPVLAN_FLAGS_VEPA] = "vepa",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(ipvlan_flags, IPVlanFlags);
diff --git a/src/shared/ipvlan-util.h b/src/shared/ipvlan-util.h
new file mode 100644
index 0000000..90f755b
--- /dev/null
+++ b/src/shared/ipvlan-util.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <netinet/in.h>
+#include <linux/if_link.h>
+
+#include "macro.h"
+
+typedef enum IPVlanMode {
+ NETDEV_IPVLAN_MODE_L2 = IPVLAN_MODE_L2,
+ NETDEV_IPVLAN_MODE_L3 = IPVLAN_MODE_L3,
+ NETDEV_IPVLAN_MODE_L3S = IPVLAN_MODE_L3S,
+ _NETDEV_IPVLAN_MODE_MAX,
+ _NETDEV_IPVLAN_MODE_INVALID = -1
+} IPVlanMode;
+
+typedef enum IPVlanFlags {
+ NETDEV_IPVLAN_FLAGS_BRIGDE,
+ NETDEV_IPVLAN_FLAGS_PRIVATE = IPVLAN_F_PRIVATE,
+ NETDEV_IPVLAN_FLAGS_VEPA = IPVLAN_F_VEPA,
+ _NETDEV_IPVLAN_FLAGS_MAX,
+ _NETDEV_IPVLAN_FLAGS_INVALID = -1
+} IPVlanFlags;
+
+const char *ipvlan_mode_to_string(IPVlanMode d) _const_;
+IPVlanMode ipvlan_mode_from_string(const char *d) _pure_;
+
+const char *ipvlan_flags_to_string(IPVlanFlags d) _const_;
+IPVlanFlags ipvlan_flags_from_string(const char *d) _pure_;
diff --git a/src/shared/journal-importer.c b/src/shared/journal-importer.c
new file mode 100644
index 0000000..b2785f0
--- /dev/null
+++ b/src/shared/journal-importer.c
@@ -0,0 +1,483 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "journal-file.h"
+#include "journal-importer.h"
+#include "journal-util.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "unaligned.h"
+
+enum {
+ IMPORTER_STATE_LINE = 0, /* waiting to read, or reading line */
+ IMPORTER_STATE_DATA_START, /* reading binary data header */
+ IMPORTER_STATE_DATA, /* reading binary data */
+ IMPORTER_STATE_DATA_FINISH, /* expecting newline */
+ IMPORTER_STATE_EOF, /* done */
+};
+
+void journal_importer_cleanup(JournalImporter *imp) {
+ if (imp->fd >= 0 && !imp->passive_fd) {
+ log_debug("Closing %s (fd=%d)", imp->name ?: "importer", imp->fd);
+ safe_close(imp->fd);
+ }
+
+ free(imp->name);
+ free(imp->buf);
+ iovw_free_contents(&imp->iovw, false);
+}
+
+static char* realloc_buffer(JournalImporter *imp, size_t size) {
+ char *b, *old = imp->buf;
+
+ b = GREEDY_REALLOC(imp->buf, imp->size, size);
+ if (!b)
+ return NULL;
+
+ iovw_rebase(&imp->iovw, old, imp->buf);
+
+ return b;
+}
+
+static int get_line(JournalImporter *imp, char **line, size_t *size) {
+ ssize_t n;
+ char *c = NULL;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_LINE);
+ assert(imp->offset <= imp->filled);
+ assert(imp->filled <= imp->size);
+ assert(!imp->buf || imp->size > 0);
+ assert(imp->fd >= 0);
+
+ for (;;) {
+ if (imp->buf) {
+ size_t start = MAX(imp->scanned, imp->offset);
+
+ c = memchr(imp->buf + start, '\n',
+ imp->filled - start);
+ if (c)
+ break;
+ }
+
+ imp->scanned = imp->filled;
+ if (imp->scanned >= DATA_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOBUFS),
+ "Entry is bigger than %u bytes.",
+ DATA_SIZE_MAX);
+
+ if (imp->passive_fd)
+ /* we have to wait for some data to come to us */
+ return -EAGAIN;
+
+ /* We know that imp->filled is at most DATA_SIZE_MAX, so if
+ we reallocate it, we'll increase the size at least a bit. */
+ assert_cc(DATA_SIZE_MAX < ENTRY_SIZE_MAX);
+ if (imp->size - imp->filled < LINE_CHUNK &&
+ !realloc_buffer(imp, MIN(imp->filled + LINE_CHUNK, ENTRY_SIZE_MAX)))
+ return log_oom();
+
+ assert(imp->buf);
+ assert(imp->size - imp->filled >= LINE_CHUNK ||
+ imp->size == ENTRY_SIZE_MAX);
+
+ n = read(imp->fd,
+ imp->buf + imp->filled,
+ imp->size - imp->filled);
+ if (n < 0) {
+ if (errno != EAGAIN)
+ log_error_errno(errno, "read(%d, ..., %zu): %m",
+ imp->fd,
+ imp->size - imp->filled);
+ return -errno;
+ } else if (n == 0)
+ return 0;
+
+ imp->filled += n;
+ }
+
+ *line = imp->buf + imp->offset;
+ *size = c + 1 - imp->buf - imp->offset;
+ imp->offset += *size;
+
+ return 1;
+}
+
+static int fill_fixed_size(JournalImporter *imp, void **data, size_t size) {
+
+ assert(imp);
+ assert(IN_SET(imp->state, IMPORTER_STATE_DATA_START, IMPORTER_STATE_DATA, IMPORTER_STATE_DATA_FINISH));
+ assert(size <= DATA_SIZE_MAX);
+ assert(imp->offset <= imp->filled);
+ assert(imp->filled <= imp->size);
+ assert(imp->buf || imp->size == 0);
+ assert(!imp->buf || imp->size > 0);
+ assert(imp->fd >= 0);
+ assert(data);
+
+ while (imp->filled - imp->offset < size) {
+ int n;
+
+ if (imp->passive_fd)
+ /* we have to wait for some data to come to us */
+ return -EAGAIN;
+
+ if (!realloc_buffer(imp, imp->offset + size))
+ return log_oom();
+
+ n = read(imp->fd, imp->buf + imp->filled,
+ imp->size - imp->filled);
+ if (n < 0) {
+ if (errno != EAGAIN)
+ log_error_errno(errno, "read(%d, ..., %zu): %m", imp->fd,
+ imp->size - imp->filled);
+ return -errno;
+ } else if (n == 0)
+ return 0;
+
+ imp->filled += n;
+ }
+
+ *data = imp->buf + imp->offset;
+ imp->offset += size;
+
+ return 1;
+}
+
+static int get_data_size(JournalImporter *imp) {
+ int r;
+ void *data;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_DATA_START);
+ assert(imp->data_size == 0);
+
+ r = fill_fixed_size(imp, &data, sizeof(uint64_t));
+ if (r <= 0)
+ return r;
+
+ imp->data_size = unaligned_read_le64(data);
+ if (imp->data_size > DATA_SIZE_MAX)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Stream declares field with size %zu > DATA_SIZE_MAX = %u",
+ imp->data_size, DATA_SIZE_MAX);
+ if (imp->data_size == 0)
+ log_warning("Binary field with zero length");
+
+ return 1;
+}
+
+static int get_data_data(JournalImporter *imp, void **data) {
+ int r;
+
+ assert(imp);
+ assert(data);
+ assert(imp->state == IMPORTER_STATE_DATA);
+
+ r = fill_fixed_size(imp, data, imp->data_size);
+ if (r <= 0)
+ return r;
+
+ return 1;
+}
+
+static int get_data_newline(JournalImporter *imp) {
+ int r;
+ char *data;
+
+ assert(imp);
+ assert(imp->state == IMPORTER_STATE_DATA_FINISH);
+
+ r = fill_fixed_size(imp, (void**) &data, 1);
+ if (r <= 0)
+ return r;
+
+ assert(data);
+ if (*data != '\n') {
+ char buf[4];
+ int l;
+
+ l = cescape_char(*data, buf);
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Expected newline, got '%.*s'", l, buf);
+ }
+
+ return 1;
+}
+
+static int process_special_field(JournalImporter *imp, char *line) {
+ const char *value;
+ char buf[CELLESCAPE_DEFAULT_LENGTH];
+ int r;
+
+ assert(line);
+
+ value = startswith(line, "__CURSOR=");
+ if (value)
+ /* ignore __CURSOR */
+ return 1;
+
+ value = startswith(line, "__REALTIME_TIMESTAMP=");
+ if (value) {
+ uint64_t x;
+
+ r = safe_atou64(value, &x);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse __REALTIME_TIMESTAMP '%s': %m",
+ cellescape(buf, sizeof buf, value));
+ else if (!VALID_REALTIME(x)) {
+ log_warning("__REALTIME_TIMESTAMP out of range, ignoring: %"PRIu64, x);
+ return -ERANGE;
+ }
+
+ imp->ts.realtime = x;
+ return 1;
+ }
+
+ value = startswith(line, "__MONOTONIC_TIMESTAMP=");
+ if (value) {
+ uint64_t x;
+
+ r = safe_atou64(value, &x);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse __MONOTONIC_TIMESTAMP '%s': %m",
+ cellescape(buf, sizeof buf, value));
+ else if (!VALID_MONOTONIC(x)) {
+ log_warning("__MONOTONIC_TIMESTAMP out of range, ignoring: %"PRIu64, x);
+ return -ERANGE;
+ }
+
+ imp->ts.monotonic = x;
+ return 1;
+ }
+
+ /* Just a single underline, but it needs special treatment too. */
+ value = startswith(line, "_BOOT_ID=");
+ if (value) {
+ r = sd_id128_from_string(value, &imp->boot_id);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to parse _BOOT_ID '%s': %m",
+ cellescape(buf, sizeof buf, value));
+
+ /* store the field in the usual fashion too */
+ return 0;
+ }
+
+ value = startswith(line, "__");
+ if (value) {
+ log_notice("Unknown dunder line __%s, ignoring.", cellescape(buf, sizeof buf, value));
+ return 1;
+ }
+
+ /* no dunder */
+ return 0;
+}
+
+int journal_importer_process_data(JournalImporter *imp) {
+ int r;
+
+ switch(imp->state) {
+ case IMPORTER_STATE_LINE: {
+ char *line, *sep;
+ size_t n = 0;
+
+ assert(imp->data_size == 0);
+
+ r = get_line(imp, &line, &n);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+ assert(n > 0);
+ assert(line[n-1] == '\n');
+
+ if (n == 1) {
+ log_trace("Received empty line, event is ready");
+ return 1;
+ }
+
+ /* MESSAGE=xxx\n
+ or
+ COREDUMP\n
+ LLLLLLLL0011223344...\n
+ */
+ sep = memchr(line, '=', n);
+ if (sep) {
+ /* chomp newline */
+ n--;
+
+ if (!journal_field_valid(line, sep - line, true)) {
+ char buf[64], *t;
+
+ t = strndupa(line, sep - line);
+ log_debug("Ignoring invalid field: \"%s\"",
+ cellescape(buf, sizeof buf, t));
+
+ return 0;
+ }
+
+ line[n] = '\0';
+ r = process_special_field(imp, line);
+ if (r != 0)
+ return r < 0 ? r : 0;
+
+ r = iovw_put(&imp->iovw, line, n);
+ if (r < 0)
+ return r;
+ } else {
+ if (!journal_field_valid(line, n - 1, true)) {
+ char buf[64], *t;
+
+ t = strndupa(line, n - 1);
+ log_debug("Ignoring invalid field: \"%s\"",
+ cellescape(buf, sizeof buf, t));
+
+ return 0;
+ }
+
+ /* replace \n with = */
+ line[n-1] = '=';
+
+ imp->field_len = n;
+ imp->state = IMPORTER_STATE_DATA_START;
+
+ /* we cannot put the field in iovec until we have all data */
+ }
+
+ log_trace("Received: %.*s (%s)", (int) n, line, sep ? "text" : "binary");
+
+ return 0; /* continue */
+ }
+
+ case IMPORTER_STATE_DATA_START:
+ assert(imp->data_size == 0);
+
+ r = get_data_size(imp);
+ // log_debug("get_data_size() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ imp->state = imp->data_size > 0 ?
+ IMPORTER_STATE_DATA : IMPORTER_STATE_DATA_FINISH;
+
+ return 0; /* continue */
+
+ case IMPORTER_STATE_DATA: {
+ void *data;
+ char *field;
+
+ assert(imp->data_size > 0);
+
+ r = get_data_data(imp, &data);
+ // log_debug("get_data_data() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ assert(data);
+
+ field = (char*) data - sizeof(uint64_t) - imp->field_len;
+ memmove(field + sizeof(uint64_t), field, imp->field_len);
+
+ r = iovw_put(&imp->iovw, field + sizeof(uint64_t), imp->field_len + imp->data_size);
+ if (r < 0)
+ return r;
+
+ imp->state = IMPORTER_STATE_DATA_FINISH;
+
+ return 0; /* continue */
+ }
+
+ case IMPORTER_STATE_DATA_FINISH:
+ r = get_data_newline(imp);
+ // log_debug("get_data_newline() -> %d", r);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ imp->state = IMPORTER_STATE_EOF;
+ return 0;
+ }
+
+ imp->data_size = 0;
+ imp->state = IMPORTER_STATE_LINE;
+
+ return 0; /* continue */
+ default:
+ assert_not_reached("wtf?");
+ }
+}
+
+int journal_importer_push_data(JournalImporter *imp, const char *data, size_t size) {
+ assert(imp);
+ assert(imp->state != IMPORTER_STATE_EOF);
+
+ if (!realloc_buffer(imp, imp->filled + size))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOMEM),
+ "Failed to store received data of size %zu "
+ "(in addition to existing %zu bytes with %zu filled): %s",
+ size, imp->size, imp->filled,
+ strerror_safe(ENOMEM));
+
+ memcpy(imp->buf + imp->filled, data, size);
+ imp->filled += size;
+
+ return 0;
+}
+
+void journal_importer_drop_iovw(JournalImporter *imp) {
+ size_t remain, target;
+
+ /* This function drops processed data that along with the iovw that points at it */
+
+ iovw_free_contents(&imp->iovw, false);
+
+ /* possibly reset buffer position */
+ remain = imp->filled - imp->offset;
+
+ if (remain == 0) /* no brainer */
+ imp->offset = imp->scanned = imp->filled = 0;
+ else if (imp->offset > imp->size - imp->filled &&
+ imp->offset > remain) {
+ memcpy(imp->buf, imp->buf + imp->offset, remain);
+ imp->offset = imp->scanned = 0;
+ imp->filled = remain;
+ }
+
+ target = imp->size;
+ while (target > 16 * LINE_CHUNK && imp->filled < target / 2)
+ target /= 2;
+ if (target < imp->size) {
+ char *tmp;
+
+ tmp = realloc(imp->buf, target);
+ if (!tmp)
+ log_warning("Failed to reallocate buffer to (smaller) size %zu",
+ target);
+ else {
+ log_debug("Reallocated buffer from %zu to %zu bytes",
+ imp->size, target);
+ imp->buf = tmp;
+ imp->size = target;
+ }
+ }
+}
+
+bool journal_importer_eof(const JournalImporter *imp) {
+ return imp->state == IMPORTER_STATE_EOF;
+}
diff --git a/src/shared/journal-importer.h b/src/shared/journal-importer.h
new file mode 100644
index 0000000..e0073fc
--- /dev/null
+++ b/src/shared/journal-importer.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/uio.h>
+
+#include "sd-id128.h"
+
+#include "io-util.h"
+#include "time-util.h"
+
+/* Make sure not to make this smaller than the maximum coredump size.
+ * See JOURNAL_SIZE_MAX in coredump.c */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#define ENTRY_SIZE_MAX (1024*1024*770u)
+#define DATA_SIZE_MAX (1024*1024*768u)
+#else
+#define ENTRY_SIZE_MAX (1024*1024*13u)
+#define DATA_SIZE_MAX (1024*1024*11u)
+#endif
+#define LINE_CHUNK 8*1024u
+
+/* The maximum number of fields in an entry */
+#define ENTRY_FIELD_COUNT_MAX 1024
+
+typedef struct JournalImporter {
+ int fd;
+ bool passive_fd;
+ char *name;
+
+ char *buf;
+ size_t size; /* total size of the buffer */
+ size_t offset; /* offset to the beginning of live data in the buffer */
+ size_t scanned; /* number of bytes since the beginning of data without a newline */
+ size_t filled; /* total number of bytes in the buffer */
+
+ size_t field_len; /* used for binary fields: the field name length */
+ size_t data_size; /* and the size of the binary data chunk being processed */
+
+ struct iovec_wrapper iovw;
+
+ int state;
+ dual_timestamp ts;
+ sd_id128_t boot_id;
+} JournalImporter;
+
+#define JOURNAL_IMPORTER_INIT(_fd) { .fd = (_fd), .iovw = {} }
+#define JOURNAL_IMPORTER_MAKE(_fd) (JournalImporter) JOURNAL_IMPORTER_INIT(_fd)
+
+void journal_importer_cleanup(JournalImporter *);
+int journal_importer_process_data(JournalImporter *);
+int journal_importer_push_data(JournalImporter *, const char *data, size_t size);
+void journal_importer_drop_iovw(JournalImporter *);
+bool journal_importer_eof(const JournalImporter *);
+
+static inline size_t journal_importer_bytes_remaining(const JournalImporter *imp) {
+ return imp->filled;
+}
diff --git a/src/shared/journal-util.c b/src/shared/journal-util.c
new file mode 100644
index 0000000..9e1870e
--- /dev/null
+++ b/src/shared/journal-util.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "acl-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "journal-internal.h"
+#include "journal-util.h"
+#include "log.h"
+#include "strv.h"
+#include "user-util.h"
+
+static int access_check_var_log_journal(sd_journal *j, bool want_other_users) {
+#if HAVE_ACL
+ _cleanup_strv_free_ char **g = NULL;
+ const char* dir;
+#endif
+ int r;
+
+ assert(j);
+
+ /* If we are root, we should have access, don't warn. */
+ if (getuid() == 0)
+ return 0;
+
+ /* If we are in the 'systemd-journal' group, we should have
+ * access too. */
+ r = in_group("systemd-journal");
+ if (r < 0)
+ return log_error_errno(r, "Failed to check if we are in the 'systemd-journal' group: %m");
+ if (r > 0)
+ return 0;
+
+#if HAVE_ACL
+ if (laccess("/run/log/journal", F_OK) >= 0)
+ dir = "/run/log/journal";
+ else
+ dir = "/var/log/journal";
+
+ /* If we are in any of the groups listed in the journal ACLs,
+ * then all is good, too. Let's enumerate all groups from the
+ * default ACL of the directory, which generally should allow
+ * access to most journal files too. */
+ r = acl_search_groups(dir, &g);
+ if (r < 0)
+ return log_error_errno(r, "Failed to search journal ACL: %m");
+ if (r > 0)
+ return 0;
+
+ /* Print a pretty list, if there were ACLs set. */
+ if (!strv_isempty(g)) {
+ _cleanup_free_ char *s = NULL;
+
+ /* There are groups in the ACL, let's list them */
+ r = strv_extend(&g, "systemd-journal");
+ if (r < 0)
+ return log_oom();
+
+ strv_sort(g);
+ strv_uniq(g);
+
+ s = strv_join(g, "', '");
+ if (!s)
+ return log_oom();
+
+ log_notice("Hint: You are currently not seeing messages from %s.\n"
+ " Users in groups '%s' can see all messages.\n"
+ " Pass -q to turn off this notice.",
+ want_other_users ? "other users and the system" : "the system",
+ s);
+ return 1;
+ }
+#endif
+
+ /* If no ACLs were found, print a short version of the message. */
+ log_notice("Hint: You are currently not seeing messages from %s.\n"
+ " Users in the 'systemd-journal' group can see all messages. Pass -q to\n"
+ " turn off this notice.",
+ want_other_users ? "other users and the system" : "the system");
+
+ return 1;
+}
+
+int journal_access_blocked(sd_journal *j) {
+ return hashmap_contains(j->errors, INT_TO_PTR(-EACCES));
+}
+
+int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users) {
+ void *code;
+ char *path;
+ int r = 0;
+
+ assert(j);
+
+ if (hashmap_isempty(j->errors)) {
+ if (ordered_hashmap_isempty(j->files) && !quiet)
+ log_notice("No journal files were found.");
+
+ return 0;
+ }
+
+ if (journal_access_blocked(j)) {
+ if (!quiet)
+ (void) access_check_var_log_journal(j, want_other_users);
+
+ if (ordered_hashmap_isempty(j->files))
+ r = log_error_errno(EACCES, "No journal files were opened due to insufficient permissions.");
+ }
+
+ HASHMAP_FOREACH_KEY(path, code, j->errors) {
+ int err;
+
+ err = abs(PTR_TO_INT(code));
+
+ switch (err) {
+ case EACCES:
+ continue;
+
+ case ENODATA:
+ log_warning_errno(err, "Journal file %s is truncated, ignoring file.", path);
+ break;
+
+ case EPROTONOSUPPORT:
+ log_warning_errno(err, "Journal file %1$s uses an unsupported feature, ignoring file.\n"
+ "Use SYSTEMD_LOG_LEVEL=debug journalctl --file=%1$s to see the details.",
+ path);
+ break;
+
+ case EBADMSG:
+ log_warning_errno(err, "Journal file %s corrupted, ignoring file.", path);
+ break;
+
+ default:
+ log_warning_errno(err, "An error was encountered while opening journal file or directory %s, ignoring file: %m", path);
+ break;
+ }
+ }
+
+ return r;
+}
diff --git a/src/shared/journal-util.h b/src/shared/journal-util.h
new file mode 100644
index 0000000..86fcba0
--- /dev/null
+++ b/src/shared/journal-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "sd-journal.h"
+
+int journal_access_blocked(sd_journal *j);
+int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users);
diff --git a/src/shared/json-internal.h b/src/shared/json-internal.h
new file mode 100644
index 0000000..63afd22
--- /dev/null
+++ b/src/shared/json-internal.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include "json.h"
+
+/* This header should include all prototypes only the JSON parser itself and
+ * its tests need access to. Normal code consuming the JSON parser should not
+ * interface with this. */
+
+typedef union JsonValue {
+ /* Encodes a simple value. On x86-64 this structure is 16 bytes wide (as long double is 128bit). */
+ bool boolean;
+ long double real;
+ intmax_t integer;
+ uintmax_t unsig;
+} JsonValue;
+
+/* Let's protect us against accidental structure size changes on our most relevant arch */
+#ifdef __x86_64__
+assert_cc(sizeof(JsonValue) == 16U);
+#endif
+
+#define JSON_VALUE_NULL ((JsonValue) {})
+
+/* We use fake JsonVariant objects for some special values, in order to avoid memory allocations for them. Note that
+ * effectively this means that there are multiple ways to encode the same objects: via these magic values or as
+ * properly allocated JsonVariant. We convert between both on-the-fly as necessary. */
+enum
+{
+ _JSON_VARIANT_MAGIC_TRUE = 1,
+#define JSON_VARIANT_MAGIC_TRUE ((JsonVariant*) _JSON_VARIANT_MAGIC_TRUE)
+ _JSON_VARIANT_MAGIC_FALSE,
+#define JSON_VARIANT_MAGIC_FALSE ((JsonVariant*) _JSON_VARIANT_MAGIC_FALSE)
+ _JSON_VARIANT_MAGIC_NULL,
+#define JSON_VARIANT_MAGIC_NULL ((JsonVariant*) _JSON_VARIANT_MAGIC_NULL)
+ _JSON_VARIANT_MAGIC_ZERO_INTEGER,
+#define JSON_VARIANT_MAGIC_ZERO_INTEGER ((JsonVariant*) _JSON_VARIANT_MAGIC_ZERO_INTEGER)
+ _JSON_VARIANT_MAGIC_ZERO_UNSIGNED,
+#define JSON_VARIANT_MAGIC_ZERO_UNSIGNED ((JsonVariant*) _JSON_VARIANT_MAGIC_ZERO_UNSIGNED)
+ _JSON_VARIANT_MAGIC_ZERO_REAL,
+#define JSON_VARIANT_MAGIC_ZERO_REAL ((JsonVariant*) _JSON_VARIANT_MAGIC_ZERO_REAL)
+ _JSON_VARIANT_MAGIC_EMPTY_STRING,
+#define JSON_VARIANT_MAGIC_EMPTY_STRING ((JsonVariant*) _JSON_VARIANT_MAGIC_EMPTY_STRING)
+ _JSON_VARIANT_MAGIC_EMPTY_ARRAY,
+#define JSON_VARIANT_MAGIC_EMPTY_ARRAY ((JsonVariant*) _JSON_VARIANT_MAGIC_EMPTY_ARRAY)
+ _JSON_VARIANT_MAGIC_EMPTY_OBJECT,
+#define JSON_VARIANT_MAGIC_EMPTY_OBJECT ((JsonVariant*) _JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ __JSON_VARIANT_MAGIC_MAX
+#define _JSON_VARIANT_MAGIC_MAX ((JsonVariant*) __JSON_VARIANT_MAGIC_MAX)
+};
+
+/* This is only safe as long as we don't define more than 4K magic pointers, i.e. the page size of the simplest
+ * architectures we support. That's because we rely on the fact that malloc() will never allocate from the first memory
+ * page, as it is a faulting page for catching NULL pointer dereferences. */
+assert_cc((unsigned) __JSON_VARIANT_MAGIC_MAX < 4096U);
+
+enum { /* JSON tokens */
+ JSON_TOKEN_END,
+ JSON_TOKEN_COLON,
+ JSON_TOKEN_COMMA,
+ JSON_TOKEN_OBJECT_OPEN,
+ JSON_TOKEN_OBJECT_CLOSE,
+ JSON_TOKEN_ARRAY_OPEN,
+ JSON_TOKEN_ARRAY_CLOSE,
+ JSON_TOKEN_STRING,
+ JSON_TOKEN_REAL,
+ JSON_TOKEN_INTEGER,
+ JSON_TOKEN_UNSIGNED,
+ JSON_TOKEN_BOOLEAN,
+ JSON_TOKEN_NULL,
+ _JSON_TOKEN_MAX,
+ _JSON_TOKEN_INVALID = -1,
+};
+
+int json_tokenize(const char **p, char **ret_string, JsonValue *ret_value, unsigned *ret_line, unsigned *ret_column, void **state, unsigned *line, unsigned *column);
diff --git a/src/shared/json.c b/src/shared/json.c
new file mode 100644
index 0000000..ddf6dcb
--- /dev/null
+++ b/src/shared/json.c
@@ -0,0 +1,4410 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <locale.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "sd-messages.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "float.h"
+#include "hexdecoct.h"
+#include "json-internal.h"
+#include "json.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-util.h"
+#include "utf8.h"
+
+/* Refuse putting together variants with a larger depth than 2K by default (as a protection against overflowing stacks
+ * if code processes JSON objects recursively. Note that we store the depth in an uint16_t, hence make sure this
+ * remains under 2^16.
+ *
+ * The value first was 16k, but it was discovered to be too high on llvm/x86-64. See also:
+ * https://github.com/systemd/systemd/issues/10738
+ *
+ * The value then was 4k, but it was discovered to be too high on s390x/aarch64. See also:
+ * https://github.com/systemd/systemd/issues/14396 */
+
+#define DEPTH_MAX (2U*1024U)
+assert_cc(DEPTH_MAX <= UINT16_MAX);
+
+typedef struct JsonSource {
+ /* When we parse from a file or similar, encodes the filename, to indicate the source of a json variant */
+ size_t n_ref;
+ unsigned max_line;
+ unsigned max_column;
+ char name[];
+} JsonSource;
+
+/* On x86-64 this whole structure should have a size of 6 * 64 bit = 48 bytes */
+struct JsonVariant {
+ union {
+ /* We either maintain a reference counter for this variant itself, or we are embedded into an
+ * array/object, in which case only that surrounding object is ref-counted. (If 'embedded' is false,
+ * see below.) */
+ size_t n_ref;
+
+ /* If this JsonVariant is part of an array/object, then this field points to the surrounding
+ * JSON_VARIANT_ARRAY/JSON_VARIANT_OBJECT object. (If 'embedded' is true, see below.) */
+ JsonVariant *parent;
+ };
+
+ /* If this was parsed from some file or buffer, this stores where from, as well as the source line/column */
+ JsonSource *source;
+ unsigned line, column;
+
+ JsonVariantType type:5;
+
+ /* A marker whether this variant is embedded into in array/object or not. If true, the 'parent' pointer above
+ * is valid. If false, the 'n_ref' field above is valid instead. */
+ bool is_embedded:1;
+
+ /* In some conditions (for example, if this object is part of an array of strings or objects), we don't store
+ * any data inline, but instead simply reference an external object and act as surrogate of it. In that case
+ * this bool is set, and the external object is referenced through the .reference field below. */
+ bool is_reference:1;
+
+ /* While comparing two arrays, we use this for marking what we already have seen */
+ bool is_marked:1;
+
+ /* Erase from memory when freeing */
+ bool sensitive:1;
+
+ /* If this is an object the fields are strictly ordered by name */
+ bool sorted:1;
+
+ /* If in addition to this object all objects referenced by it are also ordered strictly by name */
+ bool normalized:1;
+
+ /* The current 'depth' of the JsonVariant, i.e. how many levels of member variants this has */
+ uint16_t depth;
+
+ union {
+ /* For simple types we store the value in-line. */
+ JsonValue value;
+
+ /* For objects and arrays we store the number of elements immediately following */
+ size_t n_elements;
+
+ /* If is_reference as indicated above is set, this is where the reference object is actually stored. */
+ JsonVariant *reference;
+
+ /* Strings are placed immediately after the structure. Note that when this is a JsonVariant embedded
+ * into an array we might encode strings up to INLINE_STRING_LENGTH characters directly inside the
+ * element, while longer strings are stored as references. When this object is not embedded into an
+ * array, but stand-alone we allocate the right size for the whole structure, i.e. the array might be
+ * much larger than INLINE_STRING_LENGTH.
+ *
+ * Note that because we want to allocate arrays of the JsonVariant structure we specify [0] here,
+ * rather than the prettier []. If we wouldn't, then this char array would have undefined size, and so
+ * would the union and then the struct this is included in. And of structures with undefined size we
+ * can't allocate arrays (at least not easily). */
+ char string[0];
+ };
+};
+
+/* Inside string arrays we have a series of JasonVariant structures one after the other. In this case, strings longer
+ * than INLINE_STRING_MAX are stored as references, and all shorter ones inline. (This means — on x86-64 — strings up
+ * to 15 chars are stored within the array elements, and all others in separate allocations) */
+#define INLINE_STRING_MAX (sizeof(JsonVariant) - offsetof(JsonVariant, string) - 1U)
+
+/* Let's make sure this structure isn't increased in size accidentally. This check is only for our most relevant arch
+ * (x86-64). */
+#ifdef __x86_64__
+assert_cc(sizeof(JsonVariant) == 48U);
+assert_cc(INLINE_STRING_MAX == 15U);
+#endif
+
+static JsonSource* json_source_new(const char *name) {
+ JsonSource *s;
+
+ assert(name);
+
+ s = malloc(offsetof(JsonSource, name) + strlen(name) + 1);
+ if (!s)
+ return NULL;
+
+ *s = (JsonSource) {
+ .n_ref = 1,
+ };
+ strcpy(s->name, name);
+
+ return s;
+}
+
+DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(JsonSource, json_source, mfree);
+
+static bool json_source_equal(JsonSource *a, JsonSource *b) {
+ if (a == b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ return streq(a->name, b->name);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(JsonSource*, json_source_unref);
+
+/* There are four kind of JsonVariant* pointers:
+ *
+ * 1. NULL
+ * 2. A 'regular' one, i.e. pointing to malloc() memory
+ * 3. A 'magic' one, i.e. one of the special JSON_VARIANT_MAGIC_XYZ values, that encode a few very basic values directly in the pointer.
+ * 4. A 'const string' one, i.e. a pointer to a const string.
+ *
+ * The four kinds of pointers can be discerned like this:
+ *
+ * Detecting #1 is easy, just compare with NULL. Detecting #3 is similarly easy: all magic pointers are below
+ * _JSON_VARIANT_MAGIC_MAX (which is pretty low, within the first memory page, which is special on Linux and other
+ * OSes, as it is a faulting page). In order to discern #2 and #4 we check the lowest bit. If it's off it's #2,
+ * otherwise #4. This makes use of the fact that malloc() will return "maximum aligned" memory, which definitely
+ * means the pointer is even. This means we can use the uneven pointers to reference static strings, as long as we
+ * make sure that all static strings used like this are aligned to 2 (or higher), and that we mask the bit on
+ * access. The JSON_VARIANT_STRING_CONST() macro encodes strings as JsonVariant* pointers, with the bit set. */
+
+static bool json_variant_is_magic(const JsonVariant *v) {
+ if (!v)
+ return false;
+
+ return v < _JSON_VARIANT_MAGIC_MAX;
+}
+
+static bool json_variant_is_const_string(const JsonVariant *v) {
+
+ if (v < _JSON_VARIANT_MAGIC_MAX)
+ return false;
+
+ /* A proper JsonVariant is aligned to whatever malloc() aligns things too, which is definitely not uneven. We
+ * hence use all uneven pointers as indicators for const strings. */
+
+ return (((uintptr_t) v) & 1) != 0;
+}
+
+static bool json_variant_is_regular(const JsonVariant *v) {
+
+ if (v < _JSON_VARIANT_MAGIC_MAX)
+ return false;
+
+ return (((uintptr_t) v) & 1) == 0;
+}
+
+static JsonVariant *json_variant_dereference(JsonVariant *v) {
+
+ /* Recursively dereference variants that are references to other variants */
+
+ if (!v)
+ return NULL;
+
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (!v->is_reference)
+ return v;
+
+ return json_variant_dereference(v->reference);
+}
+
+static uint16_t json_variant_depth(JsonVariant *v) {
+
+ v = json_variant_dereference(v);
+ if (!v)
+ return 0;
+
+ if (!json_variant_is_regular(v))
+ return 0;
+
+ return v->depth;
+}
+
+static JsonVariant *json_variant_formalize(JsonVariant *v) {
+
+ /* Converts json variant pointers to their normalized form, i.e. fully dereferenced and wherever
+ * possible converted to the "magic" version if there is one */
+
+ if (!v)
+ return NULL;
+
+ v = json_variant_dereference(v);
+
+ switch (json_variant_type(v)) {
+
+ case JSON_VARIANT_BOOLEAN:
+ return json_variant_boolean(v) ? JSON_VARIANT_MAGIC_TRUE : JSON_VARIANT_MAGIC_FALSE;
+
+ case JSON_VARIANT_NULL:
+ return JSON_VARIANT_MAGIC_NULL;
+
+ case JSON_VARIANT_INTEGER:
+ return json_variant_integer(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_INTEGER : v;
+
+ case JSON_VARIANT_UNSIGNED:
+ return json_variant_unsigned(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_UNSIGNED : v;
+
+ case JSON_VARIANT_REAL:
+ DISABLE_WARNING_FLOAT_EQUAL;
+ return json_variant_real(v) == 0.0 ? JSON_VARIANT_MAGIC_ZERO_REAL : v;
+ REENABLE_WARNING;
+
+ case JSON_VARIANT_STRING:
+ return isempty(json_variant_string(v)) ? JSON_VARIANT_MAGIC_EMPTY_STRING : v;
+
+ case JSON_VARIANT_ARRAY:
+ return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_ARRAY : v;
+
+ case JSON_VARIANT_OBJECT:
+ return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_OBJECT : v;
+
+ default:
+ return v;
+ }
+}
+
+static JsonVariant *json_variant_conservative_formalize(JsonVariant *v) {
+
+ /* Much like json_variant_formalize(), but won't simplify if the variant has a source/line location attached to
+ * it, in order not to lose context */
+
+ if (!v)
+ return NULL;
+
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (v->source || v->line > 0 || v->column > 0)
+ return v;
+
+ return json_variant_formalize(v);
+}
+
+static int json_variant_new(JsonVariant **ret, JsonVariantType type, size_t space) {
+ JsonVariant *v;
+
+ assert_return(ret, -EINVAL);
+
+ v = malloc0(MAX(sizeof(JsonVariant),
+ offsetof(JsonVariant, value) + space));
+ if (!v)
+ return -ENOMEM;
+
+ v->n_ref = 1;
+ v->type = type;
+
+ *ret = v;
+ return 0;
+}
+
+int json_variant_new_integer(JsonVariant **ret, intmax_t i) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (i == 0) {
+ *ret = JSON_VARIANT_MAGIC_ZERO_INTEGER;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_INTEGER, sizeof(i));
+ if (r < 0)
+ return r;
+
+ v->value.integer = i;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ if (u == 0) {
+ *ret = JSON_VARIANT_MAGIC_ZERO_UNSIGNED;
+ return 0;
+ }
+
+ r = json_variant_new(&v, JSON_VARIANT_UNSIGNED, sizeof(u));
+ if (r < 0)
+ return r;
+
+ v->value.unsig = u;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_real(JsonVariant **ret, long double d) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ DISABLE_WARNING_FLOAT_EQUAL;
+ if (d == 0.0) {
+ *ret = JSON_VARIANT_MAGIC_ZERO_REAL;
+ return 0;
+ }
+ REENABLE_WARNING;
+
+ r = json_variant_new(&v, JSON_VARIANT_REAL, sizeof(d));
+ if (r < 0)
+ return r;
+
+ v->value.real = d;
+ *ret = v;
+
+ return 0;
+}
+
+int json_variant_new_boolean(JsonVariant **ret, bool b) {
+ assert_return(ret, -EINVAL);
+
+ if (b)
+ *ret = JSON_VARIANT_MAGIC_TRUE;
+ else
+ *ret = JSON_VARIANT_MAGIC_FALSE;
+
+ return 0;
+}
+
+int json_variant_new_null(JsonVariant **ret) {
+ assert_return(ret, -EINVAL);
+
+ *ret = JSON_VARIANT_MAGIC_NULL;
+ return 0;
+}
+
+int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n) {
+ JsonVariant *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ if (!s) {
+ assert_return(IN_SET(n, 0, (size_t) -1), -EINVAL);
+ return json_variant_new_null(ret);
+ }
+ if (n == (size_t) -1) /* determine length automatically */
+ n = strlen(s);
+ else if (memchr(s, 0, n)) /* don't allow embedded NUL, as we can't express that in JSON */
+ return -EINVAL;
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_STRING;
+ return 0;
+ }
+
+ if (!utf8_is_valid_n(s, n)) /* JSON strings must be valid UTF-8 */
+ return -EUCLEAN;
+
+ r = json_variant_new(&v, JSON_VARIANT_STRING, n + 1);
+ if (r < 0)
+ return r;
+
+ memcpy(v->string, s, n);
+ v->string[n] = 0;
+
+ *ret = v;
+ return 0;
+}
+
+int json_variant_new_base64(JsonVariant **ret, const void *p, size_t n) {
+ _cleanup_free_ char *s = NULL;
+ ssize_t k;
+
+ assert_return(ret, -EINVAL);
+ assert_return(n == 0 || p, -EINVAL);
+
+ k = base64mem(p, n, &s);
+ if (k < 0)
+ return k;
+
+ return json_variant_new_stringn(ret, s, k);
+}
+
+int json_variant_new_id128(JsonVariant **ret, sd_id128_t id) {
+ char s[SD_ID128_STRING_MAX];
+
+ return json_variant_new_string(ret, sd_id128_to_string(id, s));
+}
+
+static void json_variant_set(JsonVariant *a, JsonVariant *b) {
+ assert(a);
+
+ b = json_variant_dereference(b);
+ if (!b) {
+ a->type = JSON_VARIANT_NULL;
+ return;
+ }
+
+ a->type = json_variant_type(b);
+ switch (a->type) {
+
+ case JSON_VARIANT_INTEGER:
+ a->value.integer = json_variant_integer(b);
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ a->value.unsig = json_variant_unsigned(b);
+ break;
+
+ case JSON_VARIANT_REAL:
+ a->value.real = json_variant_real(b);
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+ a->value.boolean = json_variant_boolean(b);
+ break;
+
+ case JSON_VARIANT_STRING: {
+ const char *s;
+
+ assert_se(s = json_variant_string(b));
+
+ /* Short strings we can store inline */
+ if (strnlen(s, INLINE_STRING_MAX+1) <= INLINE_STRING_MAX) {
+ strcpy(a->string, s);
+ break;
+ }
+
+ /* For longer strings, use a reference… */
+ _fallthrough_;
+ }
+
+ case JSON_VARIANT_ARRAY:
+ case JSON_VARIANT_OBJECT:
+ a->is_reference = true;
+ a->reference = json_variant_ref(json_variant_conservative_formalize(b));
+ break;
+
+ case JSON_VARIANT_NULL:
+ break;
+
+ default:
+ assert_not_reached("Unexpected variant type");
+ }
+}
+
+static void json_variant_copy_source(JsonVariant *v, JsonVariant *from) {
+ assert(v);
+ assert(from);
+
+ if (!json_variant_is_regular(from))
+ return;
+
+ v->line = from->line;
+ v->column = from->column;
+ v->source = json_source_ref(from->source);
+}
+
+int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ bool normalized = true;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+ assert_return(array, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements,
+ *c = array[v->n_elements];
+ uint16_t d;
+
+ d = json_variant_depth(c);
+ if (d >= DEPTH_MAX) /* Refuse too deep nesting */
+ return -ELNRNG;
+ if (d >= v->depth)
+ v->depth = d + 1;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ };
+
+ json_variant_set(w, c);
+ json_variant_copy_source(w, c);
+
+ if (!json_variant_is_normalized(c))
+ normalized = false;
+ }
+
+ v->normalized = normalized;
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n) {
+ JsonVariant *v;
+ size_t i;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+ assert_return(p, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ .n_elements = n,
+ .depth = 1,
+ };
+
+ for (i = 0; i < n; i++) {
+ JsonVariant *w = v + 1 + i;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ .type = JSON_VARIANT_UNSIGNED,
+ .value.unsig = ((const uint8_t*) p)[i],
+ };
+ }
+
+ v->normalized = true;
+
+ *ret = v;
+ return 0;
+}
+
+int json_variant_new_array_strv(JsonVariant **ret, char **l) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ size_t n;
+ int r;
+
+ assert(ret);
+
+ n = strv_length(l);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY;
+ return 0;
+ }
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_ARRAY,
+ .depth = 1,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements;
+ size_t k;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ .type = JSON_VARIANT_STRING,
+ };
+
+ k = strlen(l[v->n_elements]);
+
+ if (k > INLINE_STRING_MAX) {
+ /* If string is too long, store it as reference. */
+
+ r = json_variant_new_string(&w->reference, l[v->n_elements]);
+ if (r < 0)
+ return r;
+
+ w->is_reference = true;
+ } else {
+ if (!utf8_is_valid_n(l[v->n_elements], k)) /* JSON strings must be valid UTF-8 */
+ return -EUCLEAN;
+
+ memcpy(w->string, l[v->n_elements], k+1);
+ }
+ }
+
+ v->normalized = true;
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ const char *prev = NULL;
+ bool sorted = true, normalized = true;
+
+ assert_return(ret, -EINVAL);
+ if (n == 0) {
+ *ret = JSON_VARIANT_MAGIC_EMPTY_OBJECT;
+ return 0;
+ }
+ assert_return(array, -EINVAL);
+ assert_return(n % 2 == 0, -EINVAL);
+
+ v = new(JsonVariant, n + 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (JsonVariant) {
+ .n_ref = 1,
+ .type = JSON_VARIANT_OBJECT,
+ };
+
+ for (v->n_elements = 0; v->n_elements < n; v->n_elements++) {
+ JsonVariant *w = v + 1 + v->n_elements,
+ *c = array[v->n_elements];
+ uint16_t d;
+
+ if ((v->n_elements & 1) == 0) {
+ const char *k;
+
+ if (!json_variant_is_string(c))
+ return -EINVAL; /* Every second one needs to be a string, as it is the key name */
+
+ assert_se(k = json_variant_string(c));
+
+ if (prev && strcmp(k, prev) <= 0)
+ sorted = normalized = false;
+
+ prev = k;
+ } else if (!json_variant_is_normalized(c))
+ normalized = false;
+
+ d = json_variant_depth(c);
+ if (d >= DEPTH_MAX) /* Refuse too deep nesting */
+ return -ELNRNG;
+ if (d >= v->depth)
+ v->depth = d + 1;
+
+ *w = (JsonVariant) {
+ .is_embedded = true,
+ .parent = v,
+ };
+
+ json_variant_set(w, c);
+ json_variant_copy_source(w, c);
+ }
+
+ v->normalized = normalized;
+ v->sorted = sorted;
+
+ *ret = TAKE_PTR(v);
+ return 0;
+}
+
+static size_t json_variant_size(JsonVariant* v) {
+
+ if (!json_variant_is_regular(v))
+ return 0;
+
+ if (v->is_reference)
+ return offsetof(JsonVariant, reference) + sizeof(JsonVariant*);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_STRING:
+ return offsetof(JsonVariant, string) + strlen(v->string) + 1;
+
+ case JSON_VARIANT_REAL:
+ return offsetof(JsonVariant, value) + sizeof(long double);
+
+ case JSON_VARIANT_UNSIGNED:
+ return offsetof(JsonVariant, value) + sizeof(uintmax_t);
+
+ case JSON_VARIANT_INTEGER:
+ return offsetof(JsonVariant, value) + sizeof(intmax_t);
+
+ case JSON_VARIANT_BOOLEAN:
+ return offsetof(JsonVariant, value) + sizeof(bool);
+
+ case JSON_VARIANT_ARRAY:
+ case JSON_VARIANT_OBJECT:
+ return offsetof(JsonVariant, n_elements) + sizeof(size_t);
+
+ case JSON_VARIANT_NULL:
+ return offsetof(JsonVariant, value);
+
+ default:
+ assert_not_reached("unexpected type");
+ }
+}
+
+static void json_variant_free_inner(JsonVariant *v, bool force_sensitive) {
+ bool sensitive;
+
+ assert(v);
+
+ if (!json_variant_is_regular(v))
+ return;
+
+ json_source_unref(v->source);
+
+ sensitive = v->sensitive || force_sensitive;
+
+ if (v->is_reference) {
+ if (sensitive)
+ json_variant_sensitive(v->reference);
+
+ json_variant_unref(v->reference);
+ return;
+ }
+
+ if (IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT)) {
+ size_t i;
+
+ for (i = 0; i < v->n_elements; i++)
+ json_variant_free_inner(v + 1 + i, sensitive);
+ }
+
+ if (sensitive)
+ explicit_bzero_safe(v, json_variant_size(v));
+}
+
+JsonVariant *json_variant_ref(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ return v;
+
+ if (v->is_embedded)
+ json_variant_ref(v->parent); /* ref the compounding variant instead */
+ else {
+ assert(v->n_ref > 0);
+ v->n_ref++;
+ }
+
+ return v;
+}
+
+JsonVariant *json_variant_unref(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ return NULL;
+
+ if (v->is_embedded)
+ json_variant_unref(v->parent);
+ else {
+ assert(v->n_ref > 0);
+ v->n_ref--;
+
+ if (v->n_ref == 0) {
+ json_variant_free_inner(v, false);
+ free(v);
+ }
+ }
+
+ return NULL;
+}
+
+void json_variant_unref_many(JsonVariant **array, size_t n) {
+ size_t i;
+
+ assert(array || n == 0);
+
+ for (i = 0; i < n; i++)
+ json_variant_unref(array[i]);
+}
+
+const char *json_variant_string(JsonVariant *v) {
+ if (!v)
+ return NULL;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_STRING)
+ return "";
+ if (json_variant_is_magic(v))
+ goto mismatch;
+ if (json_variant_is_const_string(v)) {
+ uintptr_t p = (uintptr_t) v;
+
+ assert((p & 1) != 0);
+ return (const char*) (p ^ 1U);
+ }
+
+ if (v->is_reference)
+ return json_variant_string(v->reference);
+ if (v->type != JSON_VARIANT_STRING)
+ goto mismatch;
+
+ return v->string;
+
+mismatch:
+ log_debug("Non-string JSON variant requested as string, returning NULL.");
+ return NULL;
+}
+
+bool json_variant_boolean(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_TRUE)
+ return true;
+ if (v == JSON_VARIANT_MAGIC_FALSE)
+ return false;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->type != JSON_VARIANT_BOOLEAN)
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_boolean(v->reference);
+
+ return v->value.boolean;
+
+mismatch:
+ log_debug("Non-boolean JSON variant requested as boolean, returning false.");
+ return false;
+}
+
+intmax_t json_variant_integer(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_integer(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_INTEGER:
+ return v->value.integer;
+
+ case JSON_VARIANT_UNSIGNED:
+ if (v->value.unsig <= INTMAX_MAX)
+ return (intmax_t) v->value.unsig;
+
+ log_debug("Unsigned integer %ju requested as signed integer and out of range, returning 0.", v->value.unsig);
+ return 0;
+
+ case JSON_VARIANT_REAL: {
+ intmax_t converted;
+
+ converted = (intmax_t) v->value.real;
+
+ DISABLE_WARNING_FLOAT_EQUAL;
+ if ((long double) converted == v->value.real)
+ return converted;
+ REENABLE_WARNING;
+
+ log_debug("Real %Lg requested as integer, and cannot be converted losslessly, returning 0.", v->value.real);
+ return 0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as integer, returning 0.");
+ return 0;
+}
+
+uintmax_t json_variant_unsigned(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_integer(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_INTEGER:
+ if (v->value.integer >= 0)
+ return (uintmax_t) v->value.integer;
+
+ log_debug("Signed integer %ju requested as unsigned integer and out of range, returning 0.", v->value.integer);
+ return 0;
+
+ case JSON_VARIANT_UNSIGNED:
+ return v->value.unsig;
+
+ case JSON_VARIANT_REAL: {
+ uintmax_t converted;
+
+ converted = (uintmax_t) v->value.real;
+
+ DISABLE_WARNING_FLOAT_EQUAL;
+ if ((long double) converted == v->value.real)
+ return converted;
+ REENABLE_WARNING;
+
+ log_debug("Real %Lg requested as unsigned integer, and cannot be converted losslessly, returning 0.", v->value.real);
+ return 0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as unsigned, returning 0.");
+ return 0;
+}
+
+long double json_variant_real(JsonVariant *v) {
+ if (!v)
+ return 0.0;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return 0.0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_real(v->reference);
+
+ switch (v->type) {
+
+ case JSON_VARIANT_REAL:
+ return v->value.real;
+
+ case JSON_VARIANT_INTEGER: {
+ long double converted;
+
+ converted = (long double) v->value.integer;
+
+ if ((intmax_t) converted == v->value.integer)
+ return converted;
+
+ log_debug("Signed integer %ji requested as real, and cannot be converted losslessly, returning 0.", v->value.integer);
+ return 0.0;
+ }
+
+ case JSON_VARIANT_UNSIGNED: {
+ long double converted;
+
+ converted = (long double) v->value.unsig;
+
+ if ((uintmax_t) converted == v->value.unsig)
+ return converted;
+
+ log_debug("Unsigned integer %ju requested as real, and cannot be converted losslessly, returning 0.", v->value.unsig);
+ return 0.0;
+ }
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant requested as integer, returning 0.");
+ return 0.0;
+}
+
+bool json_variant_is_negative(JsonVariant *v) {
+ if (!v)
+ goto mismatch;
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER ||
+ v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED ||
+ v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return false;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_is_negative(v->reference);
+
+ /* This function is useful as checking whether numbers are negative is pretty complex since we have three types
+ * of numbers. And some JSON code (OCI for example) uses negative numbers to mark "not defined" numeric
+ * values. */
+
+ switch (v->type) {
+
+ case JSON_VARIANT_REAL:
+ return v->value.real < 0;
+
+ case JSON_VARIANT_INTEGER:
+ return v->value.integer < 0;
+
+ case JSON_VARIANT_UNSIGNED:
+ return false;
+
+ default:
+ break;
+ }
+
+mismatch:
+ log_debug("Non-integer JSON variant tested for negativity, returning false.");
+ return false;
+}
+
+bool json_variant_is_blank_object(JsonVariant *v) {
+ /* Returns true if the specified object is null or empty */
+ return !v ||
+ json_variant_is_null(v) ||
+ (json_variant_is_object(v) && json_variant_elements(v) == 0);
+}
+
+bool json_variant_is_blank_array(JsonVariant *v) {
+ return !v ||
+ json_variant_is_null(v) ||
+ (json_variant_is_array(v) && json_variant_elements(v) == 0);
+}
+
+JsonVariantType json_variant_type(JsonVariant *v) {
+
+ if (!v)
+ return _JSON_VARIANT_TYPE_INVALID;
+
+ if (json_variant_is_const_string(v))
+ return JSON_VARIANT_STRING;
+
+ if (v == JSON_VARIANT_MAGIC_TRUE || v == JSON_VARIANT_MAGIC_FALSE)
+ return JSON_VARIANT_BOOLEAN;
+
+ if (v == JSON_VARIANT_MAGIC_NULL)
+ return JSON_VARIANT_NULL;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER)
+ return JSON_VARIANT_INTEGER;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED)
+ return JSON_VARIANT_UNSIGNED;
+
+ if (v == JSON_VARIANT_MAGIC_ZERO_REAL)
+ return JSON_VARIANT_REAL;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_STRING)
+ return JSON_VARIANT_STRING;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY)
+ return JSON_VARIANT_ARRAY;
+
+ if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return JSON_VARIANT_OBJECT;
+
+ return v->type;
+}
+
+_function_no_sanitize_float_cast_overflow_ bool json_variant_has_type(JsonVariant *v, JsonVariantType type) {
+ JsonVariantType rt;
+
+ /* Note: we turn off ubsan float cast overflo detection for this function, since it would complain
+ * about our float casts but we do them explicitly to detect conversion errors. */
+
+ v = json_variant_dereference(v);
+ if (!v)
+ return false;
+
+ rt = json_variant_type(v);
+ if (rt == type)
+ return true;
+
+ /* If it's a const string, then it only can be a string, and if it is not, it's not */
+ if (json_variant_is_const_string(v))
+ return false;
+
+ /* All three magic zeroes qualify as integer, unsigned and as real */
+ if ((v == JSON_VARIANT_MAGIC_ZERO_INTEGER || v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || v == JSON_VARIANT_MAGIC_ZERO_REAL) &&
+ IN_SET(type, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL, JSON_VARIANT_NUMBER))
+ return true;
+
+ /* All other magic variant types are only equal to themselves */
+ if (json_variant_is_magic(v))
+ return false;
+
+ /* Handle the "number" pseudo type */
+ if (type == JSON_VARIANT_NUMBER)
+ return IN_SET(rt, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL);
+
+ /* Integer conversions are OK in many cases */
+ if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_UNSIGNED)
+ return v->value.integer >= 0;
+ if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_INTEGER)
+ return v->value.unsig <= INTMAX_MAX;
+
+ /* Any integer that can be converted lossley to a real and back may also be considered a real */
+ if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_REAL)
+ return (intmax_t) (long double) v->value.integer == v->value.integer;
+ if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_REAL)
+ return (uintmax_t) (long double) v->value.unsig == v->value.unsig;
+
+ DISABLE_WARNING_FLOAT_EQUAL;
+
+ /* Any real that can be converted losslessly to an integer and back may also be considered an integer */
+ if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_INTEGER)
+ return (long double) (intmax_t) v->value.real == v->value.real;
+ if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_UNSIGNED)
+ return (long double) (uintmax_t) v->value.real == v->value.real;
+
+ REENABLE_WARNING;
+
+ return false;
+}
+
+size_t json_variant_elements(JsonVariant *v) {
+ if (!v)
+ return 0;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY ||
+ v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return 0;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_elements(v->reference);
+
+ return v->n_elements;
+
+mismatch:
+ log_debug("Number of elements in non-array/non-object JSON variant requested, returning 0.");
+ return 0;
+}
+
+JsonVariant *json_variant_by_index(JsonVariant *v, size_t idx) {
+ if (!v)
+ return NULL;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY ||
+ v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ return NULL;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT))
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_by_index(v->reference, idx);
+ if (idx >= v->n_elements)
+ return NULL;
+
+ return json_variant_conservative_formalize(v + 1 + idx);
+
+mismatch:
+ log_debug("Element in non-array/non-object JSON variant requested by index, returning NULL.");
+ return NULL;
+}
+
+JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key) {
+ size_t i;
+
+ if (!v)
+ goto not_found;
+ if (!key)
+ goto not_found;
+ if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT)
+ goto not_found;
+ if (!json_variant_is_regular(v))
+ goto mismatch;
+ if (v->type != JSON_VARIANT_OBJECT)
+ goto mismatch;
+ if (v->is_reference)
+ return json_variant_by_key(v->reference, key);
+
+ if (v->sorted) {
+ size_t a = 0, b = v->n_elements/2;
+
+ /* If the variant is sorted we can use bisection to find the entry we need in O(log(n)) time */
+
+ while (b > a) {
+ JsonVariant *p;
+ const char *f;
+ int c;
+
+ i = (a + b) / 2;
+ p = json_variant_dereference(v + 1 + i*2);
+
+ assert_se(f = json_variant_string(p));
+
+ c = strcmp(key, f);
+ if (c == 0) {
+ if (ret_key)
+ *ret_key = json_variant_conservative_formalize(v + 1 + i*2);
+
+ return json_variant_conservative_formalize(v + 1 + i*2 + 1);
+ } else if (c < 0)
+ b = i;
+ else
+ a = i + 1;
+ }
+
+ goto not_found;
+ }
+
+ /* The variant is not sorted, hence search for the field linearly */
+ for (i = 0; i < v->n_elements; i += 2) {
+ JsonVariant *p;
+
+ p = json_variant_dereference(v + 1 + i);
+
+ if (!json_variant_has_type(p, JSON_VARIANT_STRING))
+ continue;
+
+ if (streq(json_variant_string(p), key)) {
+
+ if (ret_key)
+ *ret_key = json_variant_conservative_formalize(v + 1 + i);
+
+ return json_variant_conservative_formalize(v + 1 + i + 1);
+ }
+ }
+
+not_found:
+ if (ret_key)
+ *ret_key = NULL;
+
+ return NULL;
+
+mismatch:
+ log_debug("Element in non-object JSON variant requested by key, returning NULL.");
+ if (ret_key)
+ *ret_key = NULL;
+
+ return NULL;
+}
+
+JsonVariant *json_variant_by_key(JsonVariant *v, const char *key) {
+ return json_variant_by_key_full(v, key, NULL);
+}
+
+bool json_variant_equal(JsonVariant *a, JsonVariant *b) {
+ JsonVariantType t;
+
+ a = json_variant_formalize(a);
+ b = json_variant_formalize(b);
+
+ if (a == b)
+ return true;
+
+ t = json_variant_type(a);
+ if (!json_variant_has_type(b, t))
+ return false;
+
+ switch (t) {
+
+ case JSON_VARIANT_STRING:
+ return streq(json_variant_string(a), json_variant_string(b));
+
+ case JSON_VARIANT_INTEGER:
+ return json_variant_integer(a) == json_variant_integer(b);
+
+ case JSON_VARIANT_UNSIGNED:
+ return json_variant_unsigned(a) == json_variant_unsigned(b);
+
+ case JSON_VARIANT_REAL:
+ DISABLE_WARNING_FLOAT_EQUAL;
+ return json_variant_real(a) == json_variant_real(b);
+ REENABLE_WARNING;
+
+ case JSON_VARIANT_BOOLEAN:
+ return json_variant_boolean(a) == json_variant_boolean(b);
+
+ case JSON_VARIANT_NULL:
+ return true;
+
+ case JSON_VARIANT_ARRAY: {
+ size_t i, n;
+
+ n = json_variant_elements(a);
+ if (n != json_variant_elements(b))
+ return false;
+
+ for (i = 0; i < n; i++) {
+ if (!json_variant_equal(json_variant_by_index(a, i), json_variant_by_index(b, i)))
+ return false;
+ }
+
+ return true;
+ }
+
+ case JSON_VARIANT_OBJECT: {
+ size_t i, n;
+
+ n = json_variant_elements(a);
+ if (n != json_variant_elements(b))
+ return false;
+
+ /* Iterate through all keys in 'a' */
+ for (i = 0; i < n; i += 2) {
+ bool found = false;
+ size_t j;
+
+ /* Match them against all keys in 'b' */
+ for (j = 0; j < n; j += 2) {
+ JsonVariant *key_b;
+
+ key_b = json_variant_by_index(b, j);
+
+ /* During the first iteration unmark everything */
+ if (i == 0)
+ key_b->is_marked = false;
+ else if (key_b->is_marked) /* In later iterations if we already marked something, don't bother with it again */
+ continue;
+
+ if (found)
+ continue;
+
+ if (json_variant_equal(json_variant_by_index(a, i), key_b) &&
+ json_variant_equal(json_variant_by_index(a, i+1), json_variant_by_index(b, j+1))) {
+ /* Key and values match! */
+ key_b->is_marked = found = true;
+
+ /* In the first iteration we continue the inner loop since we want to mark
+ * everything, otherwise exit the loop quickly after we found what we were
+ * looking for. */
+ if (i != 0)
+ break;
+ }
+ }
+
+ if (!found)
+ return false;
+ }
+
+ return true;
+ }
+
+ default:
+ assert_not_reached("Unknown variant type.");
+ }
+}
+
+void json_variant_sensitive(JsonVariant *v) {
+ assert(v);
+
+ /* Marks a variant as "sensitive", so that it is erased from memory when it is destroyed. This is a
+ * one-way operation: as soon as it is marked this way it remains marked this way until it's
+ * destroyed. A magic variant is never sensitive though, even when asked, since it's too
+ * basic. Similar, const string variant are never sensitive either, after all they are included in
+ * the source code as they are, which is not suitable for inclusion of secrets.
+ *
+ * Note that this flag has a recursive effect: when we destroy an object or array we'll propagate the
+ * flag to all contained variants. And if those are then destroyed this is propagated further down,
+ * and so on. */
+
+ v = json_variant_formalize(v);
+ if (!json_variant_is_regular(v))
+ return;
+
+ v->sensitive = true;
+}
+
+bool json_variant_is_sensitive(JsonVariant *v) {
+ v = json_variant_formalize(v);
+ if (!json_variant_is_regular(v))
+ return false;
+
+ return v->sensitive;
+}
+
+static void json_variant_propagate_sensitive(JsonVariant *from, JsonVariant *to) {
+ if (json_variant_is_sensitive(from))
+ json_variant_sensitive(to);
+}
+
+int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column) {
+ assert_return(v, -EINVAL);
+
+ if (ret_source)
+ *ret_source = json_variant_is_regular(v) && v->source ? v->source->name : NULL;
+
+ if (ret_line)
+ *ret_line = json_variant_is_regular(v) ? v->line : 0;
+
+ if (ret_column)
+ *ret_column = json_variant_is_regular(v) ? v->column : 0;
+
+ return 0;
+}
+
+static int print_source(FILE *f, JsonVariant *v, JsonFormatFlags flags, bool whitespace) {
+ size_t w, k;
+
+ if (!FLAGS_SET(flags, JSON_FORMAT_SOURCE|JSON_FORMAT_PRETTY))
+ return 0;
+
+ if (!json_variant_is_regular(v))
+ return 0;
+
+ if (!v->source && v->line == 0 && v->column == 0)
+ return 0;
+
+ /* The max width we need to format the line numbers for this source file */
+ w = (v->source && v->source->max_line > 0) ?
+ DECIMAL_STR_WIDTH(v->source->max_line) :
+ DECIMAL_STR_MAX(unsigned)-1;
+ k = (v->source && v->source->max_column > 0) ?
+ DECIMAL_STR_WIDTH(v->source->max_column) :
+ DECIMAL_STR_MAX(unsigned) -1;
+
+ if (whitespace) {
+ size_t i, n;
+
+ n = 1 + (v->source ? strlen(v->source->name) : 0) +
+ ((v->source && (v->line > 0 || v->column > 0)) ? 1 : 0) +
+ (v->line > 0 ? w : 0) +
+ (((v->source || v->line > 0) && v->column > 0) ? 1 : 0) +
+ (v->column > 0 ? k : 0) +
+ 2;
+
+ for (i = 0; i < n; i++)
+ fputc(' ', f);
+ } else {
+ fputc('[', f);
+
+ if (v->source)
+ fputs(v->source->name, f);
+ if (v->source && (v->line > 0 || v->column > 0))
+ fputc(':', f);
+ if (v->line > 0)
+ fprintf(f, "%*u", (int) w, v->line);
+ if ((v->source || v->line > 0) || v->column > 0)
+ fputc(':', f);
+ if (v->column > 0)
+ fprintf(f, "%*u", (int) k, v->column);
+
+ fputc(']', f);
+ fputc(' ', f);
+ }
+
+ return 0;
+}
+
+static void json_format_string(FILE *f, const char *q, JsonFormatFlags flags) {
+ assert(q);
+
+ fputc('"', f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_GREEN, f);
+
+ for (; *q; q++)
+ switch (*q) {
+ case '"':
+ fputs("\\\"", f);
+ break;
+
+ case '\\':
+ fputs("\\\\", f);
+ break;
+
+ case '\b':
+ fputs("\\b", f);
+ break;
+
+ case '\f':
+ fputs("\\f", f);
+ break;
+
+ case '\n':
+ fputs("\\n", f);
+ break;
+
+ case '\r':
+ fputs("\\r", f);
+ break;
+
+ case '\t':
+ fputs("\\t", f);
+ break;
+
+ default:
+ if ((signed char) *q >= 0 && *q < ' ')
+ fprintf(f, "\\u%04x", *q);
+ else
+ fputc(*q, f);
+ break;
+ }
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ fputc('"', f);
+}
+
+static int json_format(FILE *f, JsonVariant *v, JsonFormatFlags flags, const char *prefix) {
+ int r;
+
+ assert(f);
+ assert(v);
+
+ switch (json_variant_type(v)) {
+
+ case JSON_VARIANT_REAL: {
+ locale_t loc;
+
+ loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0);
+ if (loc == (locale_t) 0)
+ return -errno;
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%.*Le", DECIMAL_DIG, json_variant_real(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ freelocale(loc);
+ break;
+ }
+
+ case JSON_VARIANT_INTEGER:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%" PRIdMAX, json_variant_integer(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT_BLUE, f);
+
+ fprintf(f, "%" PRIuMAX, json_variant_unsigned(v));
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT, f);
+
+ if (json_variant_boolean(v))
+ fputs("true", f);
+ else
+ fputs("false", f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+
+ break;
+
+ case JSON_VARIANT_NULL:
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_HIGHLIGHT, f);
+
+ fputs("null", f);
+
+ if (flags & JSON_FORMAT_COLOR)
+ fputs(ANSI_NORMAL, f);
+ break;
+
+ case JSON_VARIANT_STRING:
+ json_format_string(f, json_variant_string(v), flags);
+ break;
+
+ case JSON_VARIANT_ARRAY: {
+ size_t i, n;
+
+ n = json_variant_elements(v);
+
+ if (n == 0)
+ fputs("[]", f);
+ else {
+ _cleanup_free_ char *joined = NULL;
+ const char *prefix2;
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ joined = strjoin(strempty(prefix), "\t");
+ if (!joined)
+ return -ENOMEM;
+
+ prefix2 = joined;
+ fputs("[\n", f);
+ } else {
+ prefix2 = strempty(prefix);
+ fputc('[', f);
+ }
+
+ for (i = 0; i < n; i++) {
+ JsonVariant *e;
+
+ assert_se(e = json_variant_by_index(v, i));
+
+ if (i > 0) {
+ if (flags & JSON_FORMAT_PRETTY)
+ fputs(",\n", f);
+ else
+ fputc(',', f);
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ print_source(f, e, flags, false);
+ fputs(prefix2, f);
+ }
+
+ r = json_format(f, e, flags, prefix2);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ fputc('\n', f);
+ print_source(f, v, flags, true);
+ fputs(strempty(prefix), f);
+ }
+
+ fputc(']', f);
+ }
+ break;
+ }
+
+ case JSON_VARIANT_OBJECT: {
+ size_t i, n;
+
+ n = json_variant_elements(v);
+
+ if (n == 0)
+ fputs("{}", f);
+ else {
+ _cleanup_free_ char *joined = NULL;
+ const char *prefix2;
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ joined = strjoin(strempty(prefix), "\t");
+ if (!joined)
+ return -ENOMEM;
+
+ prefix2 = joined;
+ fputs("{\n", f);
+ } else {
+ prefix2 = strempty(prefix);
+ fputc('{', f);
+ }
+
+ for (i = 0; i < n; i += 2) {
+ JsonVariant *e;
+
+ e = json_variant_by_index(v, i);
+
+ if (i > 0) {
+ if (flags & JSON_FORMAT_PRETTY)
+ fputs(",\n", f);
+ else
+ fputc(',', f);
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ print_source(f, e, flags, false);
+ fputs(prefix2, f);
+ }
+
+ r = json_format(f, e, flags, prefix2);
+ if (r < 0)
+ return r;
+
+ fputs(flags & JSON_FORMAT_PRETTY ? " : " : ":", f);
+
+ r = json_format(f, json_variant_by_index(v, i+1), flags, prefix2);
+ if (r < 0)
+ return r;
+ }
+
+ if (flags & JSON_FORMAT_PRETTY) {
+ fputc('\n', f);
+ print_source(f, v, flags, true);
+ fputs(strempty(prefix), f);
+ }
+
+ fputc('}', f);
+ }
+ break;
+ }
+
+ default:
+ assert_not_reached("Unexpected variant type.");
+ }
+
+ return 0;
+}
+
+int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t sz = 0;
+ int r;
+
+ /* Returns the length of the generated string (without the terminating NUL),
+ * or negative on error. */
+
+ assert_return(v, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ f = open_memstream_unlocked(&s, &sz);
+ if (!f)
+ return -ENOMEM;
+
+ json_variant_dump(v, flags, f, NULL);
+
+ /* Add terminating 0, so that the output buffer is a valid string. */
+ fputc('\0', f);
+
+ r = fflush_and_check(f);
+ }
+ if (r < 0)
+ return r;
+
+ assert(s);
+ *ret = TAKE_PTR(s);
+ assert(sz > 0);
+ return (int) sz - 1;
+}
+
+void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix) {
+ if (!v)
+ return;
+
+ if (!f)
+ f = stdout;
+
+ print_source(f, v, flags, false);
+
+ if (((flags & (JSON_FORMAT_COLOR_AUTO|JSON_FORMAT_COLOR)) == JSON_FORMAT_COLOR_AUTO) && colors_enabled())
+ flags |= JSON_FORMAT_COLOR;
+
+ if (((flags & (JSON_FORMAT_PRETTY_AUTO|JSON_FORMAT_PRETTY)) == JSON_FORMAT_PRETTY_AUTO))
+ flags |= on_tty() ? JSON_FORMAT_PRETTY : JSON_FORMAT_NEWLINE;
+
+ if (flags & JSON_FORMAT_SSE)
+ fputs("data: ", f);
+ if (flags & JSON_FORMAT_SEQ)
+ fputc('\x1e', f); /* ASCII Record Separator */
+
+ json_format(f, v, flags, prefix);
+
+ if (flags & (JSON_FORMAT_PRETTY|JSON_FORMAT_SEQ|JSON_FORMAT_SSE|JSON_FORMAT_NEWLINE))
+ fputc('\n', f);
+ if (flags & JSON_FORMAT_SSE)
+ fputc('\n', f); /* In case of SSE add a second newline */
+
+ if (flags & JSON_FORMAT_FLUSH)
+ fflush(f);
+}
+
+int json_variant_filter(JsonVariant **v, char **to_remove) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ _cleanup_free_ JsonVariant **array = NULL;
+ size_t i, n = 0, k = 0;
+ int r;
+
+ assert(v);
+
+ if (json_variant_is_blank_object(*v))
+ return 0;
+ if (!json_variant_is_object(*v))
+ return -EINVAL;
+
+ if (strv_isempty(to_remove))
+ return 0;
+
+ for (i = 0; i < json_variant_elements(*v); i += 2) {
+ JsonVariant *p;
+
+ p = json_variant_by_index(*v, i);
+ if (!json_variant_has_type(p, JSON_VARIANT_STRING))
+ return -EINVAL;
+
+ if (strv_contains(to_remove, json_variant_string(p))) {
+ if (!array) {
+ array = new(JsonVariant*, json_variant_elements(*v) - 2);
+ if (!array)
+ return -ENOMEM;
+
+ for (k = 0; k < i; k++)
+ array[k] = json_variant_by_index(*v, k);
+ }
+
+ n++;
+ } else if (array) {
+ array[k++] = p;
+ array[k++] = json_variant_by_index(*v, i + 1);
+ }
+ }
+
+ if (n == 0)
+ return 0;
+
+ r = json_variant_new_object(&w, array, k);
+ if (r < 0)
+ return r;
+
+ json_variant_propagate_sensitive(*v, w);
+
+ json_variant_unref(*v);
+ *v = TAKE_PTR(w);
+
+ return (int) n;
+}
+
+int json_variant_set_field(JsonVariant **v, const char *field, JsonVariant *value) {
+ _cleanup_(json_variant_unrefp) JsonVariant *field_variant = NULL, *w = NULL;
+ _cleanup_free_ JsonVariant **array = NULL;
+ size_t i, k = 0;
+ int r;
+
+ assert(v);
+ assert(field);
+
+ if (json_variant_is_blank_object(*v)) {
+ array = new(JsonVariant*, 2);
+ if (!array)
+ return -ENOMEM;
+
+ } else {
+ if (!json_variant_is_object(*v))
+ return -EINVAL;
+
+ for (i = 0; i < json_variant_elements(*v); i += 2) {
+ JsonVariant *p;
+
+ p = json_variant_by_index(*v, i);
+ if (!json_variant_is_string(p))
+ return -EINVAL;
+
+ if (streq(json_variant_string(p), field)) {
+
+ if (!array) {
+ array = new(JsonVariant*, json_variant_elements(*v));
+ if (!array)
+ return -ENOMEM;
+
+ for (k = 0; k < i; k++)
+ array[k] = json_variant_by_index(*v, k);
+ }
+
+ } else if (array) {
+ array[k++] = p;
+ array[k++] = json_variant_by_index(*v, i + 1);
+ }
+ }
+
+ if (!array) {
+ array = new(JsonVariant*, json_variant_elements(*v) + 2);
+ if (!array)
+ return -ENOMEM;
+
+ for (k = 0; k < json_variant_elements(*v); k++)
+ array[k] = json_variant_by_index(*v, k);
+ }
+ }
+
+ r = json_variant_new_string(&field_variant, field);
+ if (r < 0)
+ return r;
+
+ array[k++] = field_variant;
+ array[k++] = value;
+
+ r = json_variant_new_object(&w, array, k);
+ if (r < 0)
+ return r;
+
+ json_variant_propagate_sensitive(*v, w);
+
+ json_variant_unref(*v);
+ *v = TAKE_PTR(w);
+
+ return 1;
+}
+
+int json_variant_set_field_string(JsonVariant **v, const char *field, const char *value) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ r = json_variant_new_string(&m, value);
+ if (r < 0)
+ return r;
+
+ return json_variant_set_field(v, field, m);
+}
+
+int json_variant_set_field_integer(JsonVariant **v, const char *field, intmax_t i) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ r = json_variant_new_integer(&m, i);
+ if (r < 0)
+ return r;
+
+ return json_variant_set_field(v, field, m);
+}
+
+int json_variant_set_field_unsigned(JsonVariant **v, const char *field, uintmax_t u) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ r = json_variant_new_unsigned(&m, u);
+ if (r < 0)
+ return r;
+
+ return json_variant_set_field(v, field, m);
+}
+
+int json_variant_set_field_boolean(JsonVariant **v, const char *field, bool b) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ r = json_variant_new_boolean(&m, b);
+ if (r < 0)
+ return r;
+
+ return json_variant_set_field(v, field, m);
+}
+
+int json_variant_set_field_strv(JsonVariant **v, const char *field, char **l) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ r = json_variant_new_array_strv(&m, l);
+ if (r < 0)
+ return r;
+
+ return json_variant_set_field(v, field, m);
+}
+
+int json_variant_merge(JsonVariant **v, JsonVariant *m) {
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ _cleanup_free_ JsonVariant **array = NULL;
+ size_t v_elements, m_elements, i, k;
+ bool v_blank, m_blank;
+ int r;
+
+ m = json_variant_dereference(m);
+
+ v_blank = json_variant_is_blank_object(*v);
+ m_blank = json_variant_is_blank_object(m);
+
+ if (!v_blank && !json_variant_is_object(*v))
+ return -EINVAL;
+ if (!m_blank && !json_variant_is_object(m))
+ return -EINVAL;
+
+ if (m_blank)
+ return 0; /* nothing to do */
+
+ if (v_blank) {
+ json_variant_unref(*v);
+ *v = json_variant_ref(m);
+ return 1;
+ }
+
+ v_elements = json_variant_elements(*v);
+ m_elements = json_variant_elements(m);
+ if (v_elements > SIZE_MAX - m_elements) /* overflow check */
+ return -ENOMEM;
+
+ array = new(JsonVariant*, v_elements + m_elements);
+ if (!array)
+ return -ENOMEM;
+
+ k = 0;
+ for (i = 0; i < v_elements; i += 2) {
+ JsonVariant *u;
+
+ u = json_variant_by_index(*v, i);
+ if (!json_variant_is_string(u))
+ return -EINVAL;
+
+ if (json_variant_by_key(m, json_variant_string(u)))
+ continue; /* skip if exists in second variant */
+
+ array[k++] = u;
+ array[k++] = json_variant_by_index(*v, i + 1);
+ }
+
+ for (i = 0; i < m_elements; i++)
+ array[k++] = json_variant_by_index(m, i);
+
+ r = json_variant_new_object(&w, array, k);
+ if (r < 0)
+ return r;
+
+ json_variant_propagate_sensitive(*v, w);
+ json_variant_propagate_sensitive(m, w);
+
+ json_variant_unref(*v);
+ *v = TAKE_PTR(w);
+
+ return 1;
+}
+
+int json_variant_append_array(JsonVariant **v, JsonVariant *element) {
+ _cleanup_(json_variant_unrefp) JsonVariant *nv = NULL;
+ bool blank;
+ int r;
+
+ assert(v);
+ assert(element);
+
+
+ if (!*v || json_variant_is_null(*v))
+ blank = true;
+ else if (!json_variant_is_array(*v))
+ return -EINVAL;
+ else
+ blank = json_variant_elements(*v) == 0;
+
+ if (blank)
+ r = json_variant_new_array(&nv, (JsonVariant*[]) { element }, 1);
+ else {
+ _cleanup_free_ JsonVariant **array = NULL;
+ size_t i;
+
+ array = new(JsonVariant*, json_variant_elements(*v) + 1);
+ if (!array)
+ return -ENOMEM;
+
+ for (i = 0; i < json_variant_elements(*v); i++)
+ array[i] = json_variant_by_index(*v, i);
+
+ array[i] = element;
+
+ r = json_variant_new_array(&nv, array, i + 1);
+ }
+ if (r < 0)
+ return r;
+
+ json_variant_propagate_sensitive(*v, nv);
+
+ json_variant_unref(*v);
+ *v = TAKE_PTR(nv);
+
+ return 0;
+}
+
+int json_variant_strv(JsonVariant *v, char ***ret) {
+ char **l = NULL;
+ size_t n, i;
+ bool sensitive;
+ int r;
+
+ assert(ret);
+
+ if (!v || json_variant_is_null(v)) {
+ l = new0(char*, 1);
+ if (!l)
+ return -ENOMEM;
+
+ *ret = l;
+ return 0;
+ }
+
+ if (!json_variant_is_array(v))
+ return -EINVAL;
+
+ sensitive = v->sensitive;
+
+ n = json_variant_elements(v);
+ l = new(char*, n+1);
+ if (!l)
+ return -ENOMEM;
+
+ for (i = 0; i < n; i++) {
+ JsonVariant *e;
+
+ assert_se(e = json_variant_by_index(v, i));
+ sensitive = sensitive || e->sensitive;
+
+ if (!json_variant_is_string(e)) {
+ l[i] = NULL;
+ r = -EINVAL;
+ goto fail;
+ }
+
+ l[i] = strdup(json_variant_string(e));
+ if (!l[i]) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ l[i] = NULL;
+ *ret = TAKE_PTR(l);
+
+ return 0;
+
+fail:
+ if (sensitive)
+ strv_free_erase(l);
+ else
+ strv_free(l);
+
+ return r;
+}
+
+static int json_variant_copy(JsonVariant **nv, JsonVariant *v) {
+ JsonVariantType t;
+ JsonVariant *c;
+ JsonValue value;
+ const void *source;
+ size_t k;
+
+ assert(nv);
+ assert(v);
+
+ /* Let's copy the simple types literally, and the larger types by references */
+ t = json_variant_type(v);
+ switch (t) {
+ case JSON_VARIANT_INTEGER:
+ k = sizeof(intmax_t);
+ value.integer = json_variant_integer(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_UNSIGNED:
+ k = sizeof(uintmax_t);
+ value.unsig = json_variant_unsigned(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_REAL:
+ k = sizeof(long double);
+ value.real = json_variant_real(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_BOOLEAN:
+ k = sizeof(bool);
+ value.boolean = json_variant_boolean(v);
+ source = &value;
+ break;
+
+ case JSON_VARIANT_NULL:
+ k = 0;
+ source = NULL;
+ break;
+
+ case JSON_VARIANT_STRING:
+ source = json_variant_string(v);
+ k = strnlen(source, INLINE_STRING_MAX + 1);
+ if (k <= INLINE_STRING_MAX) {
+ k ++;
+ break;
+ }
+
+ _fallthrough_;
+
+ default:
+ /* Everything else copy by reference */
+
+ c = malloc0(MAX(sizeof(JsonVariant),
+ offsetof(JsonVariant, reference) + sizeof(JsonVariant*)));
+ if (!c)
+ return -ENOMEM;
+
+ c->n_ref = 1;
+ c->type = t;
+ c->is_reference = true;
+ c->reference = json_variant_ref(json_variant_formalize(v));
+
+ *nv = c;
+ return 0;
+ }
+
+ c = malloc0(MAX(sizeof(JsonVariant),
+ offsetof(JsonVariant, value) + k));
+ if (!c)
+ return -ENOMEM;
+
+ c->n_ref = 1;
+ c->type = t;
+
+ memcpy_safe(&c->value, source, k);
+
+ json_variant_propagate_sensitive(v, c);
+
+ *nv = c;
+ return 0;
+}
+
+static bool json_single_ref(JsonVariant *v) {
+
+ /* Checks whether the caller is the single owner of the object, i.e. can get away with changing it */
+
+ if (!json_variant_is_regular(v))
+ return false;
+
+ if (v->is_embedded)
+ return json_single_ref(v->parent);
+
+ assert(v->n_ref > 0);
+ return v->n_ref == 1;
+}
+
+static int json_variant_set_source(JsonVariant **v, JsonSource *source, unsigned line, unsigned column) {
+ JsonVariant *w;
+ int r;
+
+ assert(v);
+
+ /* Patch in source and line/column number. Tries to do this in-place if the caller is the sole referencer of
+ * the object. If not, allocates a new object, possibly a surrogate for the original one */
+
+ if (!*v)
+ return 0;
+
+ if (source && line > source->max_line)
+ source->max_line = line;
+ if (source && column > source->max_column)
+ source->max_column = column;
+
+ if (!json_variant_is_regular(*v)) {
+
+ if (!source && line == 0 && column == 0)
+ return 0;
+
+ } else {
+ if (json_source_equal((*v)->source, source) &&
+ (*v)->line == line &&
+ (*v)->column == column)
+ return 0;
+
+ if (json_single_ref(*v)) { /* Sole reference? */
+ json_source_unref((*v)->source);
+ (*v)->source = json_source_ref(source);
+ (*v)->line = line;
+ (*v)->column = column;
+ return 1;
+ }
+ }
+
+ r = json_variant_copy(&w, *v);
+ if (r < 0)
+ return r;
+
+ assert(json_variant_is_regular(w));
+ assert(!w->is_embedded);
+ assert(w->n_ref == 1);
+ assert(!w->source);
+
+ w->source = json_source_ref(source);
+ w->line = line;
+ w->column = column;
+
+ json_variant_unref(*v);
+ *v = w;
+
+ return 1;
+}
+
+static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, size_t n) {
+ assert(line);
+ assert(column);
+ assert(s || n == 0);
+
+ while (n > 0) {
+ if (*s == '\n') {
+ (*line)++;
+ *column = 1;
+ } else if ((signed char) *s >= 0 && *s < 127) /* Process ASCII chars quickly */
+ (*column)++;
+ else {
+ int w;
+
+ w = utf8_encoded_valid_unichar(s, n);
+ if (w < 0) /* count invalid unichars as normal characters */
+ w = 1;
+ else if ((size_t) w > n) /* never read more than the specified number of characters */
+ w = (int) n;
+
+ (*column)++;
+
+ s += w;
+ n -= w;
+ continue;
+ }
+
+ s++;
+ n--;
+ }
+}
+
+static int unhex_ucs2(const char *c, uint16_t *ret) {
+ int aa, bb, cc, dd;
+ uint16_t x;
+
+ assert(c);
+ assert(ret);
+
+ aa = unhexchar(c[0]);
+ if (aa < 0)
+ return -EINVAL;
+
+ bb = unhexchar(c[1]);
+ if (bb < 0)
+ return -EINVAL;
+
+ cc = unhexchar(c[2]);
+ if (cc < 0)
+ return -EINVAL;
+
+ dd = unhexchar(c[3]);
+ if (dd < 0)
+ return -EINVAL;
+
+ x = ((uint16_t) aa << 12) |
+ ((uint16_t) bb << 8) |
+ ((uint16_t) cc << 4) |
+ ((uint16_t) dd);
+
+ if (x <= 0)
+ return -EINVAL;
+
+ *ret = x;
+
+ return 0;
+}
+
+static int json_parse_string(const char **p, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t n = 0, allocated = 0;
+ const char *c;
+
+ assert(p);
+ assert(*p);
+ assert(ret);
+
+ c = *p;
+
+ if (*c != '"')
+ return -EINVAL;
+
+ c++;
+
+ for (;;) {
+ int len;
+
+ /* Check for EOF */
+ if (*c == 0)
+ return -EINVAL;
+
+ /* Check for control characters 0x00..0x1f */
+ if (*c > 0 && *c < ' ')
+ return -EINVAL;
+
+ /* Check for control character 0x7f */
+ if (*c == 0x7f)
+ return -EINVAL;
+
+ if (*c == '"') {
+ if (!s) {
+ s = strdup("");
+ if (!s)
+ return -ENOMEM;
+ } else
+ s[n] = 0;
+
+ *p = c + 1;
+
+ *ret = TAKE_PTR(s);
+ return JSON_TOKEN_STRING;
+ }
+
+ if (*c == '\\') {
+ char ch = 0;
+ c++;
+
+ if (*c == 0)
+ return -EINVAL;
+
+ if (IN_SET(*c, '"', '\\', '/'))
+ ch = *c;
+ else if (*c == 'b')
+ ch = '\b';
+ else if (*c == 'f')
+ ch = '\f';
+ else if (*c == 'n')
+ ch = '\n';
+ else if (*c == 'r')
+ ch = '\r';
+ else if (*c == 't')
+ ch = '\t';
+ else if (*c == 'u') {
+ char16_t x;
+ int r;
+
+ r = unhex_ucs2(c + 1, &x);
+ if (r < 0)
+ return r;
+
+ c += 5;
+
+ if (!GREEDY_REALLOC(s, allocated, n + 5))
+ return -ENOMEM;
+
+ if (!utf16_is_surrogate(x))
+ n += utf8_encode_unichar(s + n, (char32_t) x);
+ else if (utf16_is_trailing_surrogate(x))
+ return -EINVAL;
+ else {
+ char16_t y;
+
+ if (c[0] != '\\' || c[1] != 'u')
+ return -EINVAL;
+
+ r = unhex_ucs2(c + 2, &y);
+ if (r < 0)
+ return r;
+
+ c += 6;
+
+ if (!utf16_is_trailing_surrogate(y))
+ return -EINVAL;
+
+ n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y));
+ }
+
+ continue;
+ } else
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(s, allocated, n + 2))
+ return -ENOMEM;
+
+ s[n++] = ch;
+ c ++;
+ continue;
+ }
+
+ len = utf8_encoded_valid_unichar(c, (size_t) -1);
+ if (len < 0)
+ return len;
+
+ if (!GREEDY_REALLOC(s, allocated, n + len + 1))
+ return -ENOMEM;
+
+ memcpy(s + n, c, len);
+ n += len;
+ c += len;
+ }
+}
+
+static int json_parse_number(const char **p, JsonValue *ret) {
+ bool negative = false, exponent_negative = false, is_real = false;
+ long double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0;
+ intmax_t i = 0;
+ uintmax_t u = 0;
+ const char *c;
+
+ assert(p);
+ assert(*p);
+ assert(ret);
+
+ c = *p;
+
+ if (*c == '-') {
+ negative = true;
+ c++;
+ }
+
+ if (*c == '0')
+ c++;
+ else {
+ if (!strchr("123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ if (!is_real) {
+ if (negative) {
+
+ if (i < INTMAX_MIN / 10) /* overflow */
+ is_real = true;
+ else {
+ intmax_t t = 10 * i;
+
+ if (t < INTMAX_MIN + (*c - '0')) /* overflow */
+ is_real = true;
+ else
+ i = t - (*c - '0');
+ }
+ } else {
+ if (u > UINTMAX_MAX / 10) /* overflow */
+ is_real = true;
+ else {
+ uintmax_t t = 10 * u;
+
+ if (t > UINTMAX_MAX - (*c - '0')) /* overflow */
+ is_real = true;
+ else
+ u = t + (*c - '0');
+ }
+ }
+ }
+
+ x = 10.0 * x + (*c - '0');
+
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ if (*c == '.') {
+ is_real = true;
+ c++;
+
+ if (!strchr("0123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ y = 10.0 * y + (*c - '0');
+ shift = 10.0 * shift;
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ if (IN_SET(*c, 'e', 'E')) {
+ is_real = true;
+ c++;
+
+ if (*c == '-') {
+ exponent_negative = true;
+ c++;
+ } else if (*c == '+')
+ c++;
+
+ if (!strchr("0123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ exponent = 10.0 * exponent + (*c - '0');
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ *p = c;
+
+ if (is_real) {
+ ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10l((exponent_negative ? -1.0 : 1.0) * exponent);
+ return JSON_TOKEN_REAL;
+ } else if (negative) {
+ ret->integer = i;
+ return JSON_TOKEN_INTEGER;
+ } else {
+ ret->unsig = u;
+ return JSON_TOKEN_UNSIGNED;
+ }
+}
+
+int json_tokenize(
+ const char **p,
+ char **ret_string,
+ JsonValue *ret_value,
+ unsigned *ret_line, /* 'ret_line' returns the line at the beginning of this token */
+ unsigned *ret_column,
+ void **state,
+ unsigned *line, /* 'line' is used as a line state, it always reflect the line we are at after the token was read */
+ unsigned *column) {
+
+ unsigned start_line, start_column;
+ const char *start, *c;
+ size_t n;
+ int t, r;
+
+ enum {
+ STATE_NULL,
+ STATE_VALUE,
+ STATE_VALUE_POST,
+ };
+
+ assert(p);
+ assert(*p);
+ assert(ret_string);
+ assert(ret_value);
+ assert(ret_line);
+ assert(ret_column);
+ assert(line);
+ assert(column);
+ assert(state);
+
+ t = PTR_TO_INT(*state);
+ if (t == STATE_NULL) {
+ *line = 1;
+ *column = 1;
+ t = STATE_VALUE;
+ }
+
+ /* Skip over the whitespace */
+ n = strspn(*p, WHITESPACE);
+ inc_lines_columns(line, column, *p, n);
+ c = *p + n;
+
+ /* Remember where we started processing this token */
+ start = c;
+ start_line = *line;
+ start_column = *column;
+
+ if (*c == 0) {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ r = JSON_TOKEN_END;
+ goto finish;
+ }
+
+ switch (t) {
+
+ case STATE_VALUE:
+
+ if (*c == '{') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_OBJECT_OPEN;
+ goto null_return;
+
+ } else if (*c == '}') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_OBJECT_CLOSE;
+ goto null_return;
+
+ } else if (*c == '[') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_ARRAY_OPEN;
+ goto null_return;
+
+ } else if (*c == ']') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_ARRAY_CLOSE;
+ goto null_return;
+
+ } else if (*c == '"') {
+
+ r = json_parse_string(&c, ret_string);
+ if (r < 0)
+ return r;
+
+ *ret_value = JSON_VALUE_NULL;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ goto finish;
+
+ } else if (strchr("-0123456789", *c)) {
+
+ r = json_parse_number(&c, ret_value);
+ if (r < 0)
+ return r;
+
+ *ret_string = NULL;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ goto finish;
+
+ } else if (startswith(c, "true")) {
+ *ret_string = NULL;
+ ret_value->boolean = true;
+ c += 4;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_BOOLEAN;
+ goto finish;
+
+ } else if (startswith(c, "false")) {
+ *ret_string = NULL;
+ ret_value->boolean = false;
+ c += 5;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_BOOLEAN;
+ goto finish;
+
+ } else if (startswith(c, "null")) {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ c += 4;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_NULL;
+ goto finish;
+
+ }
+
+ return -EINVAL;
+
+ case STATE_VALUE_POST:
+
+ if (*c == ':') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_COLON;
+ goto null_return;
+
+ } else if (*c == ',') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE);
+ r = JSON_TOKEN_COMMA;
+ goto null_return;
+
+ } else if (*c == '}') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_OBJECT_CLOSE;
+ goto null_return;
+
+ } else if (*c == ']') {
+ c++;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ r = JSON_TOKEN_ARRAY_CLOSE;
+ goto null_return;
+ }
+
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unexpected tokenizer state");
+ }
+
+null_return:
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+
+finish:
+ inc_lines_columns(line, column, start, c - start);
+ *p = c;
+
+ *ret_line = start_line;
+ *ret_column = start_column;
+
+ return r;
+}
+
+typedef enum JsonExpect {
+ /* The following values are used by json_parse() */
+ EXPECT_TOPLEVEL,
+ EXPECT_END,
+ EXPECT_OBJECT_FIRST_KEY,
+ EXPECT_OBJECT_NEXT_KEY,
+ EXPECT_OBJECT_COLON,
+ EXPECT_OBJECT_VALUE,
+ EXPECT_OBJECT_COMMA,
+ EXPECT_ARRAY_FIRST_ELEMENT,
+ EXPECT_ARRAY_NEXT_ELEMENT,
+ EXPECT_ARRAY_COMMA,
+
+ /* And these are used by json_build() */
+ EXPECT_ARRAY_ELEMENT,
+ EXPECT_OBJECT_KEY,
+} JsonExpect;
+
+typedef struct JsonStack {
+ JsonExpect expect;
+ JsonVariant **elements;
+ size_t n_elements, n_elements_allocated;
+ unsigned line_before;
+ unsigned column_before;
+ size_t n_suppress; /* When building: if > 0, suppress this many subsequent elements. If == (size_t) -1, suppress all subsequent elements */
+} JsonStack;
+
+static void json_stack_release(JsonStack *s) {
+ assert(s);
+
+ json_variant_unref_many(s->elements, s->n_elements);
+ s->elements = mfree(s->elements);
+}
+
+static int json_parse_internal(
+ const char **input,
+ JsonSource *source,
+ JsonParseFlags flags,
+ JsonVariant **ret,
+ unsigned *line,
+ unsigned *column,
+ bool continue_end) {
+
+ size_t n_stack = 1, n_stack_allocated = 0, i;
+ unsigned line_buffer = 0, column_buffer = 0;
+ void *tokenizer_state = NULL;
+ JsonStack *stack = NULL;
+ const char *p;
+ int r;
+
+ assert_return(input, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ p = *input;
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack))
+ return -ENOMEM;
+
+ stack[0] = (JsonStack) {
+ .expect = EXPECT_TOPLEVEL,
+ };
+
+ if (!line)
+ line = &line_buffer;
+ if (!column)
+ column = &column_buffer;
+
+ for (;;) {
+ _cleanup_(json_variant_unrefp) JsonVariant *add = NULL;
+ _cleanup_free_ char *string = NULL;
+ unsigned line_token, column_token;
+ JsonStack *current;
+ JsonValue value;
+ int token;
+
+ assert(n_stack > 0);
+ current = stack + n_stack - 1;
+
+ if (continue_end && current->expect == EXPECT_END)
+ goto done;
+
+ token = json_tokenize(&p, &string, &value, &line_token, &column_token, &tokenizer_state, line, column);
+ if (token < 0) {
+ r = token;
+ goto finish;
+ }
+
+ switch (token) {
+
+ case JSON_TOKEN_END:
+ if (current->expect != EXPECT_END) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(current->n_elements == 1);
+ assert(n_stack == 1);
+ goto done;
+
+ case JSON_TOKEN_COLON:
+
+ if (current->expect != EXPECT_OBJECT_COLON) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+
+ case JSON_TOKEN_COMMA:
+
+ if (current->expect == EXPECT_OBJECT_COMMA)
+ current->expect = EXPECT_OBJECT_NEXT_KEY;
+ else if (current->expect == EXPECT_ARRAY_COMMA)
+ current->expect = EXPECT_ARRAY_NEXT_ELEMENT;
+ else {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ break;
+
+ case JSON_TOKEN_OBJECT_OPEN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ /* Prepare the expect for when we return from the child */
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_OBJECT_FIRST_KEY,
+ .line_before = line_token,
+ .column_before = column_token,
+ };
+
+ current = stack + n_stack - 1;
+ break;
+
+ case JSON_TOKEN_OBJECT_CLOSE:
+ if (!IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_COMMA)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ r = json_variant_new_object(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+
+ line_token = current->line_before;
+ column_token = current->column_before;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case JSON_TOKEN_ARRAY_OPEN:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ /* Prepare the expect for when we return from the child */
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_ARRAY_FIRST_ELEMENT,
+ .line_before = line_token,
+ .column_before = column_token,
+ };
+
+ break;
+
+ case JSON_TOKEN_ARRAY_CLOSE:
+ if (!IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_COMMA)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ r = json_variant_new_array(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+
+ line_token = current->line_before;
+ column_token = current->column_before;
+
+ json_stack_release(current);
+ n_stack--, current--;
+ break;
+
+ case JSON_TOKEN_STRING:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_string(&add, string);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY))
+ current->expect = EXPECT_OBJECT_COLON;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_REAL:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_real(&add, value.real);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_INTEGER:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_integer(&add, value.integer);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_UNSIGNED:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_unsigned(&add, value.unsig);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_BOOLEAN:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_boolean(&add, value.boolean);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ case JSON_TOKEN_NULL:
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ r = json_variant_new_null(&add);
+ if (r < 0)
+ goto finish;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_COMMA;
+ else {
+ assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT));
+ current->expect = EXPECT_ARRAY_COMMA;
+ }
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected token");
+ }
+
+ if (add) {
+ /* If we are asked to make this parsed object sensitive, then let's apply this
+ * immediately after allocating each variant, so that when we abort half-way
+ * everything we already allocated that is then freed is correctly marked. */
+ if (FLAGS_SET(flags, JSON_PARSE_SENSITIVE))
+ json_variant_sensitive(add);
+
+ (void) json_variant_set_source(&add, source, line_token, column_token);
+
+ if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ current->elements[current->n_elements++] = TAKE_PTR(add);
+ }
+ }
+
+done:
+ assert(n_stack == 1);
+ assert(stack[0].n_elements == 1);
+
+ *ret = json_variant_ref(stack[0].elements[0]);
+ *input = p;
+ r = 0;
+
+finish:
+ for (i = 0; i < n_stack; i++)
+ json_stack_release(stack + i);
+
+ free(stack);
+
+ return r;
+}
+
+int json_parse(const char *input, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ return json_parse_internal(&input, NULL, flags, ret, ret_line, ret_column, false);
+}
+
+int json_parse_continue(const char **p, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ return json_parse_internal(p, NULL, flags, ret, ret_line, ret_column, true);
+}
+
+int json_parse_file_at(FILE *f, int dir_fd, const char *path, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ _cleanup_(json_source_unrefp) JsonSource *source = NULL;
+ _cleanup_free_ char *text = NULL;
+ const char *p;
+ int r;
+
+ if (f)
+ r = read_full_stream(f, &text, NULL);
+ else if (path)
+ r = read_full_file_full(dir_fd, path, 0, NULL, &text, NULL);
+ else
+ return -EINVAL;
+ if (r < 0)
+ return r;
+
+ if (path) {
+ source = json_source_new(path);
+ if (!source)
+ return -ENOMEM;
+ }
+
+ p = text;
+ return json_parse_internal(&p, source, flags, ret, ret_line, ret_column, false);
+}
+
+int json_buildv(JsonVariant **ret, va_list ap) {
+ JsonStack *stack = NULL;
+ size_t n_stack = 1, n_stack_allocated = 0, i;
+ int r;
+
+ assert_return(ret, -EINVAL);
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack))
+ return -ENOMEM;
+
+ stack[0] = (JsonStack) {
+ .expect = EXPECT_TOPLEVEL,
+ };
+
+ for (;;) {
+ _cleanup_(json_variant_unrefp) JsonVariant *add = NULL;
+ size_t n_subtract = 0; /* how much to subtract from current->n_suppress, i.e. how many elements would
+ * have been added to the current variant */
+ JsonStack *current;
+ int command;
+
+ assert(n_stack > 0);
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_END)
+ goto done;
+
+ command = va_arg(ap, int);
+
+ switch (command) {
+
+ case _JSON_BUILD_STRING: {
+ const char *p;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ p = va_arg(ap, const char *);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_string(&add, p);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_INTEGER: {
+ intmax_t j;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ j = va_arg(ap, intmax_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_integer(&add, j);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_UNSIGNED: {
+ uintmax_t j;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ j = va_arg(ap, uintmax_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_unsigned(&add, j);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_REAL: {
+ long double d;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ d = va_arg(ap, long double);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_real(&add, d);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_BOOLEAN: {
+ bool b;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ b = va_arg(ap, int);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_boolean(&add, b);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_NULL:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_null(&add);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+
+ case _JSON_BUILD_VARIANT:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ /* Note that we don't care for current->n_suppress here, after all the variant is already
+ * allocated anyway... */
+ add = va_arg(ap, JsonVariant*);
+ if (!add)
+ add = JSON_VARIANT_MAGIC_NULL;
+ else
+ json_variant_ref(add);
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+
+ case _JSON_BUILD_VARIANT_ARRAY: {
+ JsonVariant **array;
+ size_t n;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ array = va_arg(ap, JsonVariant**);
+ n = va_arg(ap, size_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array(&add, array, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_LITERAL: {
+ const char *l;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ l = va_arg(ap, const char *);
+
+ if (l) {
+ /* Note that we don't care for current->n_suppress here, we should generate parsing
+ * errors even in suppressed object properties */
+
+ r = json_parse(l, 0, &add, NULL, NULL);
+ if (r < 0)
+ goto finish;
+ } else
+ add = JSON_VARIANT_MAGIC_NULL;
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_ARRAY_BEGIN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_ARRAY_ELEMENT,
+ .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the
+ * new array, then we should
+ * also suppress all array
+ * members */
+ };
+
+ break;
+
+ case _JSON_BUILD_ARRAY_END:
+ if (current->expect != EXPECT_ARRAY_ELEMENT) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case _JSON_BUILD_STRV: {
+ char **l;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ l = va_arg(ap, char **);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array_strv(&add, l);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_BASE64: {
+ const void *p;
+ size_t n;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ p = va_arg(ap, const void *);
+ n = va_arg(ap, size_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_base64(&add, p, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_ID128: {
+ sd_id128_t id;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ id = va_arg(ap, sd_id128_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_id128(&add, id);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_BYTE_ARRAY: {
+ const void *array;
+ size_t n;
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ array = va_arg(ap, const void*);
+ n = va_arg(ap, size_t);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_array_bytes(&add, array, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ break;
+ }
+
+ case _JSON_BUILD_OBJECT_BEGIN:
+
+ if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+ current = stack + n_stack - 1;
+
+ if (current->expect == EXPECT_TOPLEVEL)
+ current->expect = EXPECT_END;
+ else if (current->expect == EXPECT_OBJECT_VALUE)
+ current->expect = EXPECT_OBJECT_KEY;
+ else
+ assert(current->expect == EXPECT_ARRAY_ELEMENT);
+
+ stack[n_stack++] = (JsonStack) {
+ .expect = EXPECT_OBJECT_KEY,
+ .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the
+ * new object, then we should
+ * also suppress all object
+ * members */
+ };
+
+ break;
+
+ case _JSON_BUILD_OBJECT_END:
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ assert(n_stack > 1);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_object(&add, current->elements, current->n_elements);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ json_stack_release(current);
+ n_stack--, current--;
+
+ break;
+
+ case _JSON_BUILD_PAIR: {
+ const char *n;
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ n = va_arg(ap, const char *);
+
+ if (current->n_suppress == 0) {
+ r = json_variant_new_string(&add, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1;
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+ }
+
+ case _JSON_BUILD_PAIR_CONDITION: {
+ const char *n;
+ bool b;
+
+ if (current->expect != EXPECT_OBJECT_KEY) {
+ r = -EINVAL;
+ goto finish;
+ }
+
+ b = va_arg(ap, int);
+ n = va_arg(ap, const char *);
+
+ if (b && current->n_suppress == 0) {
+ r = json_variant_new_string(&add, n);
+ if (r < 0)
+ goto finish;
+ }
+
+ n_subtract = 1; /* we generated one item */
+
+ if (!b && current->n_suppress != (size_t) -1)
+ current->n_suppress += 2; /* Suppress this one and the next item */
+
+ current->expect = EXPECT_OBJECT_VALUE;
+ break;
+ }}
+
+ /* If a variant was generated, add it to our current variant, but only if we are not supposed to suppress additions */
+ if (add && current->n_suppress == 0) {
+ if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ current->elements[current->n_elements++] = TAKE_PTR(add);
+ }
+
+ /* If we are supposed to suppress items, let's subtract how many items where generated from that
+ * counter. Except if the counter is (size_t) -1, i.e. we shall suppress an infinite number of elements
+ * on this stack level */
+ if (current->n_suppress != (size_t) -1) {
+ if (current->n_suppress <= n_subtract) /* Saturated */
+ current->n_suppress = 0;
+ else
+ current->n_suppress -= n_subtract;
+ }
+ }
+
+done:
+ assert(n_stack == 1);
+ assert(stack[0].n_elements == 1);
+
+ *ret = json_variant_ref(stack[0].elements[0]);
+ r = 0;
+
+finish:
+ for (i = 0; i < n_stack; i++)
+ json_stack_release(stack + i);
+
+ free(stack);
+
+ return r;
+}
+
+int json_build(JsonVariant **ret, ...) {
+ va_list ap;
+ int r;
+
+ va_start(ap, ret);
+ r = json_buildv(ret, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int json_log_internal(
+ JsonVariant *variant,
+ int level,
+ int error,
+ const char *file,
+ int line,
+ const char *func,
+ const char *format, ...) {
+
+ PROTECT_ERRNO;
+
+ unsigned source_line, source_column;
+ char buffer[LINE_MAX];
+ const char *source;
+ va_list ap;
+ int r;
+
+ errno = ERRNO_VALUE(error);
+
+ va_start(ap, format);
+ (void) vsnprintf(buffer, sizeof buffer, format, ap);
+ va_end(ap);
+
+ if (variant) {
+ r = json_variant_get_source(variant, &source, &source_line, &source_column);
+ if (r < 0)
+ return r;
+ } else {
+ source = NULL;
+ source_line = 0;
+ source_column = 0;
+ }
+
+ if (source && source_line > 0 && source_column > 0)
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ "CONFIG_FILE=%s", source,
+ "CONFIG_LINE=%u", source_line,
+ "CONFIG_COLUMN=%u", source_column,
+ LOG_MESSAGE("%s:%u:%u: %s", source, source_line, source_column, buffer),
+ NULL);
+ else
+ return log_struct_internal(
+ LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level),
+ error,
+ file, line, func,
+ "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR,
+ LOG_MESSAGE("%s", buffer),
+ NULL);
+}
+
+int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata) {
+ const JsonDispatch *p;
+ size_t i, n, m;
+ int r, done = 0;
+ bool *found;
+
+ if (!json_variant_is_object(v)) {
+ json_log(v, flags, 0, "JSON variant is not an object.");
+
+ if (flags & JSON_PERMISSIVE)
+ return 0;
+
+ return -EINVAL;
+ }
+
+ for (p = table, m = 0; p->name; p++)
+ m++;
+
+ found = newa0(bool, m);
+
+ n = json_variant_elements(v);
+ for (i = 0; i < n; i += 2) {
+ JsonVariant *key, *value;
+
+ assert_se(key = json_variant_by_index(v, i));
+ assert_se(value = json_variant_by_index(v, i+1));
+
+ for (p = table; p->name; p++)
+ if (p->name == POINTER_MAX ||
+ streq_ptr(json_variant_string(key), p->name))
+ break;
+
+ if (p->name) { /* Found a matching entry! :-) */
+ JsonDispatchFlags merged_flags;
+
+ merged_flags = flags | p->flags;
+
+ if (p->type != _JSON_VARIANT_TYPE_INVALID &&
+ !json_variant_has_type(value, p->type)) {
+
+ json_log(value, merged_flags, 0,
+ "Object field '%s' has wrong type %s, expected %s.", json_variant_string(key),
+ json_variant_type_to_string(json_variant_type(value)), json_variant_type_to_string(p->type));
+
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return -EINVAL;
+ }
+
+ if (found[p-table]) {
+ json_log(value, merged_flags, 0, "Duplicate object field '%s'.", json_variant_string(key));
+
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return -ENOTUNIQ;
+ }
+
+ found[p-table] = true;
+
+ if (p->callback) {
+ r = p->callback(json_variant_string(key), value, merged_flags, (uint8_t*) userdata + p->offset);
+ if (r < 0) {
+ if (merged_flags & JSON_PERMISSIVE)
+ continue;
+
+ return r;
+ }
+ }
+
+ done ++;
+
+ } else { /* Didn't find a matching entry! :-( */
+
+ if (bad) {
+ r = bad(json_variant_string(key), value, flags, userdata);
+ if (r < 0) {
+ if (flags & JSON_PERMISSIVE)
+ continue;
+
+ return r;
+ } else
+ done ++;
+
+ } else {
+ json_log(value, flags, 0, "Unexpected object field '%s'.", json_variant_string(key));
+
+ if (flags & JSON_PERMISSIVE)
+ continue;
+
+ return -EADDRNOTAVAIL;
+ }
+ }
+ }
+
+ for (p = table; p->name; p++) {
+ JsonDispatchFlags merged_flags = p->flags | flags;
+
+ if ((merged_flags & JSON_MANDATORY) && !found[p-table]) {
+ json_log(v, merged_flags, 0, "Missing object field '%s'.", p->name);
+
+ if ((merged_flags & JSON_PERMISSIVE))
+ continue;
+
+ return -ENXIO;
+ }
+ }
+
+ return done;
+}
+
+int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ bool *b = userdata;
+
+ assert(variant);
+ assert(b);
+
+ if (!json_variant_is_boolean(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a boolean.", strna(name));
+
+ *b = json_variant_boolean(variant);
+ return 0;
+}
+
+int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int *b = userdata;
+
+ assert(variant);
+ assert(b);
+
+ if (json_variant_is_null(variant)) {
+ *b = -1;
+ return 0;
+ }
+
+ if (!json_variant_is_boolean(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a boolean.", strna(name));
+
+ *b = json_variant_boolean(variant);
+ return 0;
+}
+
+int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ intmax_t *i = userdata;
+
+ assert(variant);
+ assert(i);
+
+ if (!json_variant_is_integer(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an integer.", strna(name));
+
+ *i = json_variant_integer(variant);
+ return 0;
+}
+
+int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uintmax_t *u = userdata;
+
+ assert(variant);
+ assert(u);
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an unsigned integer.", strna(name));
+
+ *u = json_variant_unsigned(variant);
+ return 0;
+}
+
+int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uint32_t *u = userdata;
+
+ assert(variant);
+ assert(u);
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an unsigned integer.", strna(name));
+
+ if (json_variant_unsigned(variant) > UINT32_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' out of bounds.", strna(name));
+
+ *u = (uint32_t) json_variant_unsigned(variant);
+ return 0;
+}
+
+int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int32_t *i = userdata;
+
+ assert(variant);
+ assert(i);
+
+ if (!json_variant_is_integer(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an integer.", strna(name));
+
+ if (json_variant_integer(variant) < INT32_MIN || json_variant_integer(variant) > INT32_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' out of bounds.", strna(name));
+
+ *i = (int32_t) json_variant_integer(variant);
+ return 0;
+}
+
+int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ int r;
+
+ assert(variant);
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(variant)))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name));
+
+ r = free_and_strdup(s, json_variant_string(variant));
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+int json_dispatch_const_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ const char **s = userdata;
+
+ assert(variant);
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = NULL;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(variant)))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name));
+
+ *s = json_variant_string(variant);
+ return 0;
+}
+
+int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***s = userdata;
+ JsonVariant *e;
+ int r;
+
+ assert(variant);
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = strv_free(*s);
+ return 0;
+ }
+
+ /* Let's be flexible here: accept a single string in place of a single-item array */
+ if (json_variant_is_string(variant)) {
+ if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(variant)))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name));
+
+ l = strv_new(json_variant_string(variant));
+ if (!l)
+ return log_oom();
+
+ strv_free_and_replace(*s, l);
+ return 0;
+ }
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, SYNTHETIC_ERRNO(EINVAL), flags, "JSON field '%s' is not an array.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ if (!json_variant_is_string(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string.");
+
+ if ((flags & JSON_SAFE) && !string_is_safe(json_variant_string(e)))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' contains unsafe characters, refusing.", strna(name));
+
+ r = strv_extend(&l, json_variant_string(e));
+ if (r < 0)
+ return json_log(e, flags, r, "Failed to append array element: %m");
+ }
+
+ strv_free_and_replace(*s, l);
+ return 0;
+}
+
+int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ JsonVariant **p = userdata;
+
+ assert(variant);
+ assert(p);
+
+ json_variant_unref(*p);
+ *p = json_variant_ref(variant);
+
+ return 0;
+}
+
+int json_dispatch_uid_gid(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uid_t *uid = userdata;
+ uintmax_t k;
+
+ assert_cc(sizeof(uid_t) == sizeof(uint32_t));
+ assert_cc(sizeof(gid_t) == sizeof(uint32_t));
+
+ DISABLE_WARNING_TYPE_LIMITS;
+ assert_cc(((uid_t) -1 < (uid_t) 0) == ((gid_t) -1 < (gid_t) 0));
+ REENABLE_WARNING;
+
+ if (json_variant_is_null(variant)) {
+ *uid = UID_INVALID;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a integer.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k > UINT32_MAX || !uid_is_valid(k))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid UID/GID.", strna(name));
+
+ *uid = k;
+ return 0;
+}
+
+int json_dispatch_user_group_name(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (!valid_user_group_name(n, FLAGS_SET(flags, JSON_RELAX) ? VALID_USER_RELAX : 0))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid user/group name.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+int json_dispatch_id128(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ sd_id128_t *uuid = userdata;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *uuid = SD_ID128_NULL;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ r = sd_id128_from_string(json_variant_string(variant), uuid);
+ if (r < 0)
+ return json_log(variant, flags, r, "JSON field '%s' is not a valid UID.", strna(name));
+
+ return 0;
+}
+
+int json_dispatch_unsupported(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not allowed in this object.", strna(name));
+}
+
+static int json_cmp_strings(const void *x, const void *y) {
+ JsonVariant *const *a = x, *const *b = y;
+
+ if (!json_variant_is_string(*a) || !json_variant_is_string(*b))
+ return CMP(*a, *b);
+
+ return strcmp(json_variant_string(*a), json_variant_string(*b));
+}
+
+int json_variant_sort(JsonVariant **v) {
+ _cleanup_free_ JsonVariant **a = NULL;
+ JsonVariant *n = NULL;
+ size_t i, m;
+ int r;
+
+ assert(v);
+
+ if (json_variant_is_sorted(*v))
+ return 0;
+
+ if (!json_variant_is_object(*v))
+ return -EMEDIUMTYPE;
+
+ /* Sorts they key/value pairs in an object variant */
+
+ m = json_variant_elements(*v);
+ a = new(JsonVariant*, m);
+ if (!a)
+ return -ENOMEM;
+
+ for (i = 0; i < m; i++)
+ a[i] = json_variant_by_index(*v, i);
+
+ qsort(a, m/2, sizeof(JsonVariant*)*2, json_cmp_strings);
+
+ r = json_variant_new_object(&n, a, m);
+ if (r < 0)
+ return r;
+
+ json_variant_propagate_sensitive(*v, n);
+
+ if (!n->sorted) /* Check if this worked. This will fail if there are multiple identical keys used. */
+ return -ENOTUNIQ;
+
+ json_variant_unref(*v);
+ *v = n;
+
+ return 1;
+}
+
+int json_variant_normalize(JsonVariant **v) {
+ _cleanup_free_ JsonVariant **a = NULL;
+ JsonVariant *n = NULL;
+ size_t i, j, m;
+ int r;
+
+ assert(v);
+
+ if (json_variant_is_normalized(*v))
+ return 0;
+
+ if (!json_variant_is_object(*v) && !json_variant_is_array(*v))
+ return -EMEDIUMTYPE;
+
+ /* Sorts the key/value pairs in an object variant anywhere down the tree in the specified variant */
+
+ m = json_variant_elements(*v);
+ a = new(JsonVariant*, m);
+ if (!a)
+ return -ENOMEM;
+
+ for (i = 0; i < m; i++) {
+ a[i] = json_variant_ref(json_variant_by_index(*v, i));
+
+ r = json_variant_normalize(a + i);
+ if (r < 0)
+ goto finish;
+ }
+
+ qsort(a, m/2, sizeof(JsonVariant*)*2, json_cmp_strings);
+
+ if (json_variant_is_object(*v))
+ r = json_variant_new_object(&n, a, m);
+ else {
+ assert(json_variant_is_array(*v));
+ r = json_variant_new_array(&n, a, m);
+ }
+ if (r < 0)
+ goto finish;
+
+ json_variant_propagate_sensitive(*v, n);
+
+ if (!n->normalized) { /* Let's see if normalization worked. It will fail if there are multiple
+ * identical keys used in the same object anywhere, or if there are floating
+ * point numbers used (see below) */
+ r = -ENOTUNIQ;
+ goto finish;
+ }
+
+ json_variant_unref(*v);
+ *v = n;
+
+ r = 1;
+
+finish:
+ for (j = 0; j < i; j++)
+ json_variant_unref(a[j]);
+
+ return r;
+}
+
+bool json_variant_is_normalized(JsonVariant *v) {
+
+ /* For now, let's consider anything containing numbers not expressible as integers as
+ * non-normalized. That's because we cannot sensibly compare them due to accuracy issues, nor even
+ * store them if they are too large. */
+ if (json_variant_is_real(v) && !json_variant_is_integer(v) && !json_variant_is_unsigned(v))
+ return false;
+
+ /* The concept only applies to variants that include other variants, i.e. objects and arrays. All
+ * others are normalized anyway. */
+ if (!json_variant_is_object(v) && !json_variant_is_array(v))
+ return true;
+
+ /* Empty objects/arrays don't include any other variant, hence are always normalized too */
+ if (json_variant_elements(v) == 0)
+ return true;
+
+ return v->normalized; /* For everything else there's an explicit boolean we maintain */
+}
+
+bool json_variant_is_sorted(JsonVariant *v) {
+
+ /* Returns true if all key/value pairs of an object are properly sorted. Note that this only applies
+ * to objects, not arrays. */
+
+ if (!json_variant_is_object(v))
+ return true;
+ if (json_variant_elements(v) <= 1)
+ return true;
+
+ return v->sorted;
+}
+
+int json_variant_unbase64(JsonVariant *v, void **ret, size_t *ret_size) {
+
+ if (!json_variant_is_string(v))
+ return -EINVAL;
+
+ return unbase64mem(json_variant_string(v), (size_t) -1, ret, ret_size);
+}
+
+static const char* const json_variant_type_table[_JSON_VARIANT_TYPE_MAX] = {
+ [JSON_VARIANT_STRING] = "string",
+ [JSON_VARIANT_INTEGER] = "integer",
+ [JSON_VARIANT_UNSIGNED] = "unsigned",
+ [JSON_VARIANT_REAL] = "real",
+ [JSON_VARIANT_NUMBER] = "number",
+ [JSON_VARIANT_BOOLEAN] = "boolean",
+ [JSON_VARIANT_ARRAY] = "array",
+ [JSON_VARIANT_OBJECT] = "object",
+ [JSON_VARIANT_NULL] = "null",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(json_variant_type, JsonVariantType);
diff --git a/src/shared/json.h b/src/shared/json.h
new file mode 100644
index 0000000..0809f31
--- /dev/null
+++ b/src/shared/json.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "sd-id128.h"
+
+#include "macro.h"
+#include "string-util.h"
+#include "log.h"
+
+/*
+ In case you wonder why we have our own JSON implementation, here are a couple of reasons why this implementation has
+ benefits over various other implementations:
+
+ - We need support for 64bit signed and unsigned integers, i.e. the full 64,5bit range of -9223372036854775808…18446744073709551615
+ - All our variants are immutable after creation
+ - Special values such as true, false, zero, null, empty strings, empty array, empty objects require zero dynamic memory
+ - Progressive parsing
+ - Our integer/real type implicitly converts, but only if that's safe and loss-lessly possible
+ - There's a "builder" for putting together objects easily in varargs function calls
+ - There's a "dispatcher" for mapping objects to C data structures
+ - Every variant optionally carries parsing location information, which simplifies debugging and parse log error generation
+ - Formatter has color, line, column support
+
+ Limitations:
+ - Doesn't allow embedded NUL in strings
+ - Can't store integers outside of the -9223372036854775808…18446744073709551615 range (it will use 'long double' for
+ values outside this range, which is lossy)
+ - Can't store negative zero (will be treated identical to positive zero, and not retained across serialization)
+ - Can't store non-integer numbers that can't be stored in "long double" losslessly
+ - Allows creation and parsing of objects with duplicate keys. The "dispatcher" will refuse them however. This means
+ we can parse and pass around such objects, but will carefully refuse them when we convert them into our own data.
+
+ (These limitations should be pretty much in line with those of other JSON implementations, in fact might be less
+ limiting in most cases even.)
+*/
+
+typedef struct JsonVariant JsonVariant;
+
+typedef enum JsonVariantType {
+ JSON_VARIANT_STRING,
+ JSON_VARIANT_INTEGER,
+ JSON_VARIANT_UNSIGNED,
+ JSON_VARIANT_REAL,
+ JSON_VARIANT_NUMBER, /* This a pseudo-type: we can never create variants of this type, but we use it as wildcard check for the above three types */
+ JSON_VARIANT_BOOLEAN,
+ JSON_VARIANT_ARRAY,
+ JSON_VARIANT_OBJECT,
+ JSON_VARIANT_NULL,
+ _JSON_VARIANT_TYPE_MAX,
+ _JSON_VARIANT_TYPE_INVALID = -1
+} JsonVariantType;
+
+int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n);
+int json_variant_new_base64(JsonVariant **ret, const void *p, size_t n);
+int json_variant_new_integer(JsonVariant **ret, intmax_t i);
+int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u);
+int json_variant_new_real(JsonVariant **ret, long double d);
+int json_variant_new_boolean(JsonVariant **ret, bool b);
+int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n);
+int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n);
+int json_variant_new_array_strv(JsonVariant **ret, char **l);
+int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n);
+int json_variant_new_null(JsonVariant **ret);
+int json_variant_new_id128(JsonVariant **ret, sd_id128_t id);
+
+static inline int json_variant_new_string(JsonVariant **ret, const char *s) {
+ return json_variant_new_stringn(ret, s, (size_t) -1);
+}
+
+JsonVariant *json_variant_ref(JsonVariant *v);
+JsonVariant *json_variant_unref(JsonVariant *v);
+void json_variant_unref_many(JsonVariant **array, size_t n);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(JsonVariant *, json_variant_unref);
+
+const char *json_variant_string(JsonVariant *v);
+intmax_t json_variant_integer(JsonVariant *v);
+uintmax_t json_variant_unsigned(JsonVariant *v);
+long double json_variant_real(JsonVariant *v);
+bool json_variant_boolean(JsonVariant *v);
+
+JsonVariantType json_variant_type(JsonVariant *v);
+bool json_variant_has_type(JsonVariant *v, JsonVariantType type);
+
+static inline bool json_variant_is_string(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_STRING);
+}
+
+static inline bool json_variant_is_integer(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_INTEGER);
+}
+
+static inline bool json_variant_is_unsigned(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_UNSIGNED);
+}
+
+static inline bool json_variant_is_real(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_REAL);
+}
+
+static inline bool json_variant_is_number(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_NUMBER);
+}
+
+static inline bool json_variant_is_boolean(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_BOOLEAN);
+}
+
+static inline bool json_variant_is_array(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_ARRAY);
+}
+
+static inline bool json_variant_is_object(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_OBJECT);
+}
+
+static inline bool json_variant_is_null(JsonVariant *v) {
+ return json_variant_has_type(v, JSON_VARIANT_NULL);
+}
+
+bool json_variant_is_negative(JsonVariant *v);
+bool json_variant_is_blank_object(JsonVariant *v);
+bool json_variant_is_blank_array(JsonVariant *v);
+bool json_variant_is_normalized(JsonVariant *v);
+bool json_variant_is_sorted(JsonVariant *v);
+
+size_t json_variant_elements(JsonVariant *v);
+JsonVariant *json_variant_by_index(JsonVariant *v, size_t index);
+JsonVariant *json_variant_by_key(JsonVariant *v, const char *key);
+JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key);
+
+bool json_variant_equal(JsonVariant *a, JsonVariant *b);
+
+void json_variant_sensitive(JsonVariant *v);
+bool json_variant_is_sensitive(JsonVariant *v);
+
+struct json_variant_foreach_state {
+ JsonVariant *variant;
+ size_t idx;
+};
+
+#define JSON_VARIANT_ARRAY_FOREACH(i, v) \
+ for (struct json_variant_foreach_state _state = { (v), 0 }; \
+ json_variant_is_array(_state.variant) && \
+ _state.idx < json_variant_elements(_state.variant) && \
+ ({ i = json_variant_by_index(_state.variant, _state.idx); \
+ true; }); \
+ _state.idx++)
+
+#define JSON_VARIANT_OBJECT_FOREACH(k, e, v) \
+ for (struct json_variant_foreach_state _state = { (v), 0 }; \
+ json_variant_is_object(_state.variant) && \
+ _state.idx < json_variant_elements(_state.variant) && \
+ ({ k = json_variant_string(json_variant_by_index(_state.variant, _state.idx)); \
+ e = json_variant_by_index(_state.variant, _state.idx + 1); \
+ true; }); \
+ _state.idx += 2)
+
+int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column);
+
+typedef enum JsonFormatFlags {
+ JSON_FORMAT_NEWLINE = 1 << 0, /* suffix with newline */
+ JSON_FORMAT_PRETTY = 1 << 1, /* add internal whitespace to appeal to human readers */
+ JSON_FORMAT_PRETTY_AUTO = 1 << 2, /* same, but only if connected to a tty (and JSON_FORMAT_NEWLINE otherwise) */
+ JSON_FORMAT_COLOR = 1 << 3, /* insert ANSI color sequences */
+ JSON_FORMAT_COLOR_AUTO = 1 << 4, /* insert ANSI color sequences if colors_enabled() says so */
+ JSON_FORMAT_SOURCE = 1 << 5, /* prefix with source filename/line/column */
+ JSON_FORMAT_SSE = 1 << 6, /* prefix/suffix with W3C server-sent events */
+ JSON_FORMAT_SEQ = 1 << 7, /* prefix/suffix with RFC 7464 application/json-seq */
+ JSON_FORMAT_FLUSH = 1 << 8, /* call fflush() after dumping JSON */
+} JsonFormatFlags;
+
+int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret);
+void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix);
+
+int json_variant_filter(JsonVariant **v, char **to_remove);
+
+int json_variant_set_field(JsonVariant **v, const char *field, JsonVariant *value);
+int json_variant_set_field_string(JsonVariant **v, const char *field, const char *value);
+int json_variant_set_field_integer(JsonVariant **v, const char *field, intmax_t value);
+int json_variant_set_field_unsigned(JsonVariant **v, const char *field, uintmax_t value);
+int json_variant_set_field_boolean(JsonVariant **v, const char *field, bool b);
+int json_variant_set_field_strv(JsonVariant **v, const char *field, char **l);
+
+int json_variant_append_array(JsonVariant **v, JsonVariant *element);
+
+int json_variant_merge(JsonVariant **v, JsonVariant *m);
+
+int json_variant_strv(JsonVariant *v, char ***ret);
+
+int json_variant_sort(JsonVariant **v);
+int json_variant_normalize(JsonVariant **v);
+
+typedef enum JsonParseFlags {
+ JSON_PARSE_SENSITIVE = 1 << 0, /* mark variant as "sensitive", i.e. something containing secret key material or such */
+} JsonParseFlags;
+
+int json_parse(const char *string, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+int json_parse_continue(const char **p, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+int json_parse_file_at(FILE *f, int dir_fd, const char *path, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column);
+
+static inline int json_parse_file(FILE *f, const char *path, JsonParseFlags flags, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) {
+ return json_parse_file_at(f, AT_FDCWD, path, flags, ret, ret_line, ret_column);
+}
+
+enum {
+ _JSON_BUILD_STRING,
+ _JSON_BUILD_INTEGER,
+ _JSON_BUILD_UNSIGNED,
+ _JSON_BUILD_REAL,
+ _JSON_BUILD_BOOLEAN,
+ _JSON_BUILD_ARRAY_BEGIN,
+ _JSON_BUILD_ARRAY_END,
+ _JSON_BUILD_OBJECT_BEGIN,
+ _JSON_BUILD_OBJECT_END,
+ _JSON_BUILD_PAIR,
+ _JSON_BUILD_PAIR_CONDITION,
+ _JSON_BUILD_NULL,
+ _JSON_BUILD_VARIANT,
+ _JSON_BUILD_VARIANT_ARRAY,
+ _JSON_BUILD_LITERAL,
+ _JSON_BUILD_STRV,
+ _JSON_BUILD_BASE64,
+ _JSON_BUILD_ID128,
+ _JSON_BUILD_BYTE_ARRAY,
+ _JSON_BUILD_MAX,
+};
+
+#define JSON_BUILD_STRING(s) _JSON_BUILD_STRING, ({ const char *_x = s; _x; })
+#define JSON_BUILD_INTEGER(i) _JSON_BUILD_INTEGER, ({ intmax_t _x = i; _x; })
+#define JSON_BUILD_UNSIGNED(u) _JSON_BUILD_UNSIGNED, ({ uintmax_t _x = u; _x; })
+#define JSON_BUILD_REAL(d) _JSON_BUILD_REAL, ({ long double _x = d; _x; })
+#define JSON_BUILD_BOOLEAN(b) _JSON_BUILD_BOOLEAN, ({ bool _x = b; _x; })
+#define JSON_BUILD_ARRAY(...) _JSON_BUILD_ARRAY_BEGIN, __VA_ARGS__, _JSON_BUILD_ARRAY_END
+#define JSON_BUILD_EMPTY_ARRAY _JSON_BUILD_ARRAY_BEGIN, _JSON_BUILD_ARRAY_END
+#define JSON_BUILD_OBJECT(...) _JSON_BUILD_OBJECT_BEGIN, __VA_ARGS__, _JSON_BUILD_OBJECT_END
+#define JSON_BUILD_EMPTY_OBJECT _JSON_BUILD_OBJECT_BEGIN, _JSON_BUILD_OBJECT_END
+#define JSON_BUILD_PAIR(n, ...) _JSON_BUILD_PAIR, ({ const char *_x = n; _x; }), __VA_ARGS__
+#define JSON_BUILD_PAIR_CONDITION(c, n, ...) _JSON_BUILD_PAIR_CONDITION, ({ bool _x = c; _x; }), ({ const char *_x = n; _x; }), __VA_ARGS__
+#define JSON_BUILD_NULL _JSON_BUILD_NULL
+#define JSON_BUILD_VARIANT(v) _JSON_BUILD_VARIANT, ({ JsonVariant *_x = v; _x; })
+#define JSON_BUILD_VARIANT_ARRAY(v, n) _JSON_BUILD_VARIANT_ARRAY, ({ JsonVariant **_x = v; _x; }), ({ size_t _y = n; _y; })
+#define JSON_BUILD_LITERAL(l) _JSON_BUILD_LITERAL, ({ const char *_x = l; _x; })
+#define JSON_BUILD_STRV(l) _JSON_BUILD_STRV, ({ char **_x = l; _x; })
+#define JSON_BUILD_BASE64(p, n) _JSON_BUILD_BASE64, ({ const void *_x = p; _x; }), ({ size_t _y = n; _y; })
+#define JSON_BUILD_ID128(id) _JSON_BUILD_ID128, ({ sd_id128_t _x = id; _x; })
+#define JSON_BUILD_BYTE_ARRAY(v, n) _JSON_BUILD_BYTE_ARRAY, ({ const void *_x = v; _x; }), ({ size_t _y = n; _y; })
+
+int json_build(JsonVariant **ret, ...);
+int json_buildv(JsonVariant **ret, va_list ap);
+
+/* A bitmask of flags used by the dispatch logic. Note that this is a combined bit mask, that is generated from the bit
+ * mask originally passed into json_dispatch(), the individual bitmask associated with the static JsonDispatch callout
+ * entry, as well the bitmask specified for json_log() calls */
+typedef enum JsonDispatchFlags {
+ /* The following three may be set in JsonDispatch's .flags field or the json_dispatch() flags parameter */
+ JSON_PERMISSIVE = 1 << 0, /* Shall parsing errors be considered fatal for this property? */
+ JSON_MANDATORY = 1 << 1, /* Should existence of this property be mandatory? */
+ JSON_LOG = 1 << 2, /* Should the parser log about errors? */
+ JSON_SAFE = 1 << 3, /* Don't accept "unsafe" strings in json_dispatch_string() + json_dispatch_string() */
+ JSON_RELAX = 1 << 4, /* Use relaxed user name checking in json_dispatch_user_group_name */
+
+ /* The following two may be passed into log_json() in addition to the three above */
+ JSON_DEBUG = 1 << 4, /* Indicates that this log message is a debug message */
+ JSON_WARNING = 1 << 5, /* Indicates that this log message is a warning message */
+} JsonDispatchFlags;
+
+typedef int (*JsonDispatchCallback)(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+
+typedef struct JsonDispatch {
+ const char *name;
+ JsonVariantType type;
+ JsonDispatchCallback callback;
+ size_t offset;
+ JsonDispatchFlags flags;
+} JsonDispatch;
+
+int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata);
+
+int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_const_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_uid_gid(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_user_group_name(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_id128(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_unsupported(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+
+assert_cc(sizeof(uintmax_t) == sizeof(uint64_t));
+#define json_dispatch_uint64 json_dispatch_unsigned
+
+assert_cc(sizeof(intmax_t) == sizeof(int64_t));
+#define json_dispatch_int64 json_dispatch_integer
+
+assert_cc(sizeof(uint32_t) == sizeof(unsigned));
+#define json_dispatch_uint json_dispatch_uint32
+
+assert_cc(sizeof(int32_t) == sizeof(int));
+#define json_dispatch_int json_dispatch_int32
+
+static inline int json_dispatch_level(JsonDispatchFlags flags) {
+
+ /* Did the user request no logging? If so, then never log higher than LOG_DEBUG. Also, if this is marked as
+ * debug message, then also log at debug level. */
+
+ if (!(flags & JSON_LOG) ||
+ (flags & JSON_DEBUG))
+ return LOG_DEBUG;
+
+ /* Are we invoked in permissive mode, or is this explicitly marked as warning message? Then this should be
+ * printed at LOG_WARNING */
+ if (flags & (JSON_PERMISSIVE|JSON_WARNING))
+ return LOG_WARNING;
+
+ /* Otherwise it's an error. */
+ return LOG_ERR;
+}
+
+int json_log_internal(JsonVariant *variant, int level, int error, const char *file, int line, const char *func, const char *format, ...) _printf_(7, 8);
+
+#define json_log(variant, flags, error, ...) \
+ ({ \
+ int _level = json_dispatch_level(flags), _e = (error); \
+ (log_get_max_level() >= LOG_PRI(_level)) \
+ ? json_log_internal(variant, _level, _e, PROJECT_FILE, __LINE__, __func__, __VA_ARGS__) \
+ : -ERRNO_VALUE(_e); \
+ })
+
+#define json_log_oom(variant, flags) \
+ json_log(variant, flags, SYNTHETIC_ERRNO(ENOMEM), "Out of memory.")
+
+#define JSON_VARIANT_STRING_CONST(x) _JSON_VARIANT_STRING_CONST(UNIQ, (x))
+
+#define _JSON_VARIANT_STRING_CONST(xq, x) \
+ ({ \
+ _align_(2) static const char UNIQ_T(json_string_const, xq)[] = (x); \
+ assert((((uintptr_t) UNIQ_T(json_string_const, xq)) & 1) == 0); \
+ (JsonVariant*) ((uintptr_t) UNIQ_T(json_string_const, xq) + 1); \
+ })
+
+int json_variant_unbase64(JsonVariant *v, void **ret, size_t *ret_size);
+
+const char *json_variant_type_to_string(JsonVariantType t);
+JsonVariantType json_variant_type_from_string(const char *s);
diff --git a/src/shared/libcrypt-util.c b/src/shared/libcrypt-util.c
new file mode 100644
index 0000000..5b31541
--- /dev/null
+++ b/src/shared/libcrypt-util.c
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_CRYPT_H
+/* libxcrypt is a replacement for glibc's libcrypt, and libcrypt might be
+ * removed from glibc at some point. As part of the removal, defines for
+ * crypt(3) are dropped from unistd.h, and we must include crypt.h instead.
+ *
+ * Newer versions of glibc (v2.0+) already ship crypt.h with a definition
+ * of crypt(3) as well, so we simply include it if it is present. MariaDB,
+ * MySQL, PostgreSQL, Perl and some other wide-spread packages do it the
+ * same way since ages without any problems.
+ */
+# include <crypt.h>
+#else
+# include <unistd.h>
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "libcrypt-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "missing_stdlib.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int make_salt(char **ret) {
+
+#if HAVE_CRYPT_GENSALT_RA
+ const char *e;
+ char *salt;
+
+ /* If we have crypt_gensalt_ra() we default to the "preferred method" (i.e. usually yescrypt).
+ * crypt_gensalt_ra() is usually provided by libxcrypt. */
+
+ e = secure_getenv("SYSTEMD_CRYPT_PREFIX");
+ if (!e)
+#if HAVE_CRYPT_PREFERRED_METHOD
+ e = crypt_preferred_method();
+#else
+ e = "$6$";
+#endif
+
+ log_debug("Generating salt for hash prefix: %s", e);
+
+ salt = crypt_gensalt_ra(e, 0, NULL, 0);
+ if (!salt)
+ return -errno;
+
+ *ret = salt;
+ return 0;
+#else
+ /* If crypt_gensalt_ra() is not available, we use SHA512 and generate the salt on our own. */
+
+ static const char table[] =
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "0123456789"
+ "./";
+
+ uint8_t raw[16];
+ char *salt, *j;
+ size_t i;
+ int r;
+
+ /* This is a bit like crypt_gensalt_ra(), but doesn't require libcrypt, and doesn't do anything but
+ * SHA512, i.e. is legacy-free and minimizes our deps. */
+
+ assert_cc(sizeof(table) == 64U + 1U);
+
+ log_debug("Generating fallback salt for hash prefix: $6$");
+
+ /* Insist on the best randomness by setting RANDOM_BLOCK, this is about keeping passwords secret after all. */
+ r = genuine_random_bytes(raw, sizeof(raw), RANDOM_BLOCK);
+ if (r < 0)
+ return r;
+
+ salt = new(char, 3+sizeof(raw)+1+1);
+ if (!salt)
+ return -ENOMEM;
+
+ /* We only bother with SHA512 hashed passwords, the rest is legacy, and we don't do legacy. */
+ j = stpcpy(salt, "$6$");
+ for (i = 0; i < sizeof(raw); i++)
+ j[i] = table[raw[i] & 63];
+ j[i++] = '$';
+ j[i] = 0;
+
+ *ret = salt;
+ return 0;
+#endif
+}
+
+#if HAVE_CRYPT_RA
+# define CRYPT_RA_NAME "crypt_ra"
+#else
+# define CRYPT_RA_NAME "crypt_r"
+
+/* Provide a poor man's fallback that uses a fixed size buffer. */
+
+static char* systemd_crypt_ra(const char *phrase, const char *setting, void **data, int *size) {
+ assert(data);
+ assert(size);
+
+ /* We allocate the buffer because crypt(3) says: struct crypt_data may be quite large (32kB in this
+ * implementation of libcrypt; over 128kB in some other implementations). This is large enough that
+ * it may be unwise to allocate it on the stack. */
+
+ if (!*data) {
+ *data = new0(struct crypt_data, 1);
+ if (!*data) {
+ errno = -ENOMEM;
+ return NULL;
+ }
+
+ *size = (int) (sizeof(struct crypt_data));
+ }
+
+ char *t = crypt_r(phrase, setting, *data);
+ if (!t)
+ return NULL;
+
+ /* crypt_r may return a pointer to an invalid hashed password on error. Our callers expect NULL on
+ * error, so let's just return that. */
+ if (t[0] == '*')
+ return NULL;
+
+ return t;
+}
+
+#define crypt_ra systemd_crypt_ra
+
+#endif
+
+int hash_password_full(const char *password, void **cd_data, int *cd_size, char **ret) {
+ _cleanup_free_ char *salt = NULL;
+ _cleanup_(erase_and_freep) void *_cd_data = NULL;
+ char *p;
+ int r, _cd_size = 0;
+
+ assert(!!cd_data == !!cd_size);
+
+ r = make_salt(&salt);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to generate salt: %m");
+
+ errno = 0;
+ p = crypt_ra(password, salt, cd_data ?: &_cd_data, cd_size ?: &_cd_size);
+ if (!p)
+ return log_debug_errno(errno_or_else(SYNTHETIC_ERRNO(EINVAL)),
+ CRYPT_RA_NAME "() failed: %m");
+
+ p = strdup(p);
+ if (!p)
+ return -ENOMEM;
+
+ *ret = p;
+ return 0;
+}
+
+bool looks_like_hashed_password(const char *s) {
+ /* Returns false if the specified string is certainly not a hashed UNIX password. crypt(5) lists
+ * various hashing methods. We only reject (return false) strings which are documented to have
+ * different meanings.
+ *
+ * In particular, we allow locked passwords, i.e. strings starting with "!", including just "!",
+ * i.e. the locked empty password. See also fc58c0c7bf7e4f525b916e3e5be0de2307fef04e.
+ */
+ if (!s)
+ return false;
+
+ s += strspn(s, "!"); /* Skip (possibly duplicated) locking prefix */
+
+ return !STR_IN_SET(s, "x", "*");
+}
+
+int test_password_one(const char *hashed_password, const char *password) {
+ _cleanup_(erase_and_freep) void *cd_data = NULL;
+ int cd_size = 0;
+ const char *k;
+
+ errno = 0;
+ k = crypt_ra(password, hashed_password, &cd_data, &cd_size);
+ if (!k) {
+ if (errno == ENOMEM)
+ return -ENOMEM;
+ /* Unknown or unavailable hashing method or string too short */
+ return 0;
+ }
+
+ return streq(k, hashed_password);
+}
+
+int test_password_many(char **hashed_password, const char *password) {
+ char **hpw;
+ int r;
+
+ STRV_FOREACH(hpw, hashed_password) {
+ r = test_password_one(*hpw, password);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/shared/libcrypt-util.h b/src/shared/libcrypt-util.h
new file mode 100644
index 0000000..5b9b945
--- /dev/null
+++ b/src/shared/libcrypt-util.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+int make_salt(char **ret);
+int hash_password_full(const char *password, void **cd_data, int *cd_size, char **ret);
+static inline int hash_password(const char *password, char **ret) {
+ return hash_password_full(password, NULL, NULL, ret);
+}
+bool looks_like_hashed_password(const char *s);
+int test_password_one(const char *hashed_password, const char *password);
+int test_password_many(char **hashed_password, const char *password);
diff --git a/src/shared/libmount-util.h b/src/shared/libmount-util.h
new file mode 100644
index 0000000..db9728c
--- /dev/null
+++ b/src/shared/libmount-util.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+/* This needs to be after sys/mount.h */
+#include <libmount.h>
+
+#include "macro.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct libmnt_table*, mnt_free_table);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct libmnt_iter*, mnt_free_iter);
+
+static inline int libmount_parse(
+ const char *path,
+ FILE *source,
+ struct libmnt_table **ret_table,
+ struct libmnt_iter **ret_iter) {
+
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+ int r;
+
+ /* Older libmount seems to require this. */
+ assert(!source || path);
+
+ table = mnt_new_table();
+ iter = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!table || !iter)
+ return -ENOMEM;
+
+ /* If source or path are specified, we use on the functions which ignore utab.
+ * Only if both are empty, we use mnt_table_parse_mtab(). */
+
+ if (source)
+ r = mnt_table_parse_stream(table, source, path);
+ else if (path)
+ r = mnt_table_parse_file(table, path);
+ else
+ r = mnt_table_parse_mtab(table, NULL);
+ if (r < 0)
+ return r;
+
+ *ret_table = TAKE_PTR(table);
+ *ret_iter = TAKE_PTR(iter);
+ return 0;
+}
diff --git a/src/shared/libshared.sym b/src/shared/libshared.sym
new file mode 100644
index 0000000..6a7495a
--- /dev/null
+++ b/src/shared/libshared.sym
@@ -0,0 +1,3 @@
+SD_SHARED {
+ global: *;
+};
diff --git a/src/shared/linux/README b/src/shared/linux/README
new file mode 100644
index 0000000..46d5547
--- /dev/null
+++ b/src/shared/linux/README
@@ -0,0 +1,8 @@
+The files in this directory are copied from kernel-5.2, and the following modifications are applied:
+- auto_dev-ioctl.h: set AUTOFS_DEV_IOCTL_VERSION_MINOR to 0
+- auto_dev-ioctl.h: define AUTOFS_IOCTL if not defined
+- bpf_insn.h: This is imported from samples/bpf/bpf_insn.h
+- bpf_insn.h: BPF_JMP_A() macro is also imported from include/linux/filter.h
+- dm-ioctl.h: set DM_VERSION_MINOR to 27
+- ethtool.h: define __KERNEL_DIV_ROUND_UP if not defined
+- ethtool.h: add casts in ethtool_cmd_speed()
diff --git a/src/shared/linux/auto_dev-ioctl.h b/src/shared/linux/auto_dev-ioctl.h
new file mode 100644
index 0000000..261546c
--- /dev/null
+++ b/src/shared/linux/auto_dev-ioctl.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright 2008 Red Hat, Inc. All rights reserved.
+ * Copyright 2008 Ian Kent <raven@themaw.net>
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ */
+
+#ifndef _UAPI_LINUX_AUTO_DEV_IOCTL_H
+#define _UAPI_LINUX_AUTO_DEV_IOCTL_H
+
+#include <linux/auto_fs.h>
+#include <linux/string.h>
+
+#define AUTOFS_DEVICE_NAME "autofs"
+
+#define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1
+#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0
+
+#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
+
+/*
+ * An ioctl interface for autofs mount point control.
+ */
+
+struct args_protover {
+ __u32 version;
+};
+
+struct args_protosubver {
+ __u32 sub_version;
+};
+
+struct args_openmount {
+ __u32 devid;
+};
+
+struct args_ready {
+ __u32 token;
+};
+
+struct args_fail {
+ __u32 token;
+ __s32 status;
+};
+
+struct args_setpipefd {
+ __s32 pipefd;
+};
+
+struct args_timeout {
+ __u64 timeout;
+};
+
+struct args_requester {
+ __u32 uid;
+ __u32 gid;
+};
+
+struct args_expire {
+ __u32 how;
+};
+
+struct args_askumount {
+ __u32 may_umount;
+};
+
+struct args_ismountpoint {
+ union {
+ struct args_in {
+ __u32 type;
+ } in;
+ struct args_out {
+ __u32 devid;
+ __u32 magic;
+ } out;
+ };
+};
+
+/*
+ * All the ioctls use this structure.
+ * When sending a path size must account for the total length
+ * of the chunk of memory otherwise is is the size of the
+ * structure.
+ */
+
+struct autofs_dev_ioctl {
+ __u32 ver_major;
+ __u32 ver_minor;
+ __u32 size; /* total size of data passed in
+ * including this struct */
+ __s32 ioctlfd; /* automount command fd */
+
+ /* Command parameters */
+
+ union {
+ struct args_protover protover;
+ struct args_protosubver protosubver;
+ struct args_openmount openmount;
+ struct args_ready ready;
+ struct args_fail fail;
+ struct args_setpipefd setpipefd;
+ struct args_timeout timeout;
+ struct args_requester requester;
+ struct args_expire expire;
+ struct args_askumount askumount;
+ struct args_ismountpoint ismountpoint;
+ };
+
+ char path[0];
+};
+
+static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
+{
+ memset(in, 0, AUTOFS_DEV_IOCTL_SIZE);
+ in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
+ in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
+ in->size = AUTOFS_DEV_IOCTL_SIZE;
+ in->ioctlfd = -1;
+}
+
+enum {
+ /* Get various version info */
+ AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
+ AUTOFS_DEV_IOCTL_PROTOVER_CMD,
+ AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD,
+
+ /* Open mount ioctl fd */
+ AUTOFS_DEV_IOCTL_OPENMOUNT_CMD,
+
+ /* Close mount ioctl fd */
+ AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD,
+
+ /* Mount/expire status returns */
+ AUTOFS_DEV_IOCTL_READY_CMD,
+ AUTOFS_DEV_IOCTL_FAIL_CMD,
+
+ /* Activate/deactivate autofs mount */
+ AUTOFS_DEV_IOCTL_SETPIPEFD_CMD,
+ AUTOFS_DEV_IOCTL_CATATONIC_CMD,
+
+ /* Expiry timeout */
+ AUTOFS_DEV_IOCTL_TIMEOUT_CMD,
+
+ /* Get mount last requesting uid and gid */
+ AUTOFS_DEV_IOCTL_REQUESTER_CMD,
+
+ /* Check for eligible expire candidates */
+ AUTOFS_DEV_IOCTL_EXPIRE_CMD,
+
+ /* Request busy status */
+ AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD,
+
+ /* Check if path is a mountpoint */
+ AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
+};
+
+#ifndef AUTOFS_IOCTL
+#define AUTOFS_IOCTL 0x93
+#endif
+
+#define AUTOFS_DEV_IOCTL_VERSION \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOVER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOSUBVER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_OPENMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_READY \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_FAIL \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_SETPIPEFD \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CATATONIC \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_TIMEOUT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_REQUESTER \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_EXPIRE \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ASKUMOUNT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \
+ _IOWR(AUTOFS_IOCTL, \
+ AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl)
+
+#endif /* _UAPI_LINUX_AUTO_DEV_IOCTL_H */
diff --git a/src/shared/linux/bpf.h b/src/shared/linux/bpf.h
new file mode 100644
index 0000000..359fc37
--- /dev/null
+++ b/src/shared/linux/bpf.h
@@ -0,0 +1,3057 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64 0x07 /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW 0x18 /* double word (64-bit) */
+#define BPF_XADD 0xc0 /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV 0xb0 /* mov reg to reg */
+#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END 0xd0 /* flags for endianness conversion: */
+#define BPF_TO_LE 0x00 /* convert to little-endian */
+#define BPF_TO_BE 0x08 /* convert to big-endian */
+#define BPF_FROM_LE BPF_TO_LE
+#define BPF_FROM_BE BPF_TO_BE
+
+/* jmp encodings */
+#define BPF_JNE 0x50 /* jump != */
+#define BPF_JLT 0xa0 /* LT is unsigned, '<' */
+#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */
+#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
+#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
+#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_CALL 0x80 /* function call */
+#define BPF_EXIT 0x90 /* function return */
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG __MAX_BPF_REG
+
+struct bpf_insn {
+ __u8 code; /* opcode */
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+struct bpf_lpm_trie_key {
+ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
+ __u8 data[0]; /* Arbitrary size */
+};
+
+struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id; /* cgroup inode id */
+ __u32 attach_type; /* program attach type */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+ BPF_MAP_CREATE,
+ BPF_MAP_LOOKUP_ELEM,
+ BPF_MAP_UPDATE_ELEM,
+ BPF_MAP_DELETE_ELEM,
+ BPF_MAP_GET_NEXT_KEY,
+ BPF_PROG_LOAD,
+ BPF_OBJ_PIN,
+ BPF_OBJ_GET,
+ BPF_PROG_ATTACH,
+ BPF_PROG_DETACH,
+ BPF_PROG_TEST_RUN,
+ BPF_PROG_GET_NEXT_ID,
+ BPF_MAP_GET_NEXT_ID,
+ BPF_PROG_GET_FD_BY_ID,
+ BPF_MAP_GET_FD_BY_ID,
+ BPF_OBJ_GET_INFO_BY_FD,
+ BPF_PROG_QUERY,
+ BPF_RAW_TRACEPOINT_OPEN,
+ BPF_BTF_LOAD,
+ BPF_BTF_GET_FD_BY_ID,
+ BPF_TASK_FD_QUERY,
+ BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+};
+
+enum bpf_map_type {
+ BPF_MAP_TYPE_UNSPEC,
+ BPF_MAP_TYPE_HASH,
+ BPF_MAP_TYPE_ARRAY,
+ BPF_MAP_TYPE_PROG_ARRAY,
+ BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ BPF_MAP_TYPE_PERCPU_HASH,
+ BPF_MAP_TYPE_PERCPU_ARRAY,
+ BPF_MAP_TYPE_STACK_TRACE,
+ BPF_MAP_TYPE_CGROUP_ARRAY,
+ BPF_MAP_TYPE_LRU_HASH,
+ BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ BPF_MAP_TYPE_LPM_TRIE,
+ BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ BPF_MAP_TYPE_HASH_OF_MAPS,
+ BPF_MAP_TYPE_DEVMAP,
+ BPF_MAP_TYPE_SOCKMAP,
+ BPF_MAP_TYPE_CPUMAP,
+ BPF_MAP_TYPE_XSKMAP,
+ BPF_MAP_TYPE_SOCKHASH,
+ BPF_MAP_TYPE_CGROUP_STORAGE,
+ BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_QUEUE,
+ BPF_MAP_TYPE_STACK,
+};
+
+/* Note that tracing related programs such as
+ * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT}
+ * are not subject to a stable API since kernel internal data
+ * structures can change from release to release and may
+ * therefore break existing tracing BPF programs. Tracing BPF
+ * programs correspond to /a/ specific kernel which is to be
+ * analyzed, and not /a/ specific kernel /and/ all future ones.
+ */
+enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+ BPF_PROG_TYPE_TRACEPOINT,
+ BPF_PROG_TYPE_XDP,
+ BPF_PROG_TYPE_PERF_EVENT,
+ BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_PROG_TYPE_LWT_IN,
+ BPF_PROG_TYPE_LWT_OUT,
+ BPF_PROG_TYPE_LWT_XMIT,
+ BPF_PROG_TYPE_SOCK_OPS,
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_CGROUP_DEVICE,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_RAW_TRACEPOINT,
+ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_PROG_TYPE_LWT_SEG6LOCAL,
+ BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_PROG_TYPE_FLOW_DISSECTOR,
+};
+
+enum bpf_attach_type {
+ BPF_CGROUP_INET_INGRESS,
+ BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_DEVICE,
+ BPF_SK_MSG_VERDICT,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_LIRC_MODE2,
+ BPF_FLOW_DISSECTOR,
+ __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
+ *
+ * NONE(default): No further bpf programs allowed in the subtree.
+ *
+ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
+ * the program in this cgroup yields to sub-cgroup program.
+ *
+ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
+ * that cgroup program gets run in addition to the program in this cgroup.
+ *
+ * Only one program is allowed to be attached to a cgroup with
+ * NONE or BPF_F_ALLOW_OVERRIDE flag.
+ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
+ * release old program and attach the new one. Attach flags has to match.
+ *
+ * Multiple programs are allowed to be attached to a cgroup with
+ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
+ * (those that were attached first, run first)
+ * The programs of sub-cgroup are executed first, then programs of
+ * this cgroup and then programs of parent cgroup.
+ * When children program makes decision (like picking TCP CA or sock bind)
+ * parent program has a chance to override it.
+ *
+ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
+ * A cgroup with NONE doesn't allow any programs in sub-cgroups.
+ * Ex1:
+ * cgrp1 (MULTI progs A, B) ->
+ * cgrp2 (OVERRIDE prog C) ->
+ * cgrp3 (MULTI prog D) ->
+ * cgrp4 (OVERRIDE prog E) ->
+ * cgrp5 (NONE prog F)
+ * the event in cgrp5 triggers execution of F,D,A,B in that order.
+ * if prog F is detached, the execution is E,D,A,B
+ * if prog F and D are detached, the execution is E,A,B
+ * if prog F, E and D are detached, the execution is C,A,B
+ *
+ * All eligible programs are executed regardless of return code from
+ * earlier programs.
+ */
+#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+#define BPF_F_ALLOW_MULTI (1U << 1)
+
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+
+/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
+ * verifier will allow any alignment whatsoever. On platforms
+ * with strict alignment requirements for loads ands stores (such
+ * as sparc and mips) the verifier validates that all loads and
+ * stores provably follow this requirement. This flag turns that
+ * checking and enforcement off.
+ *
+ * It is mostly used for testing when we want to validate the
+ * context and memory access aspects of the verifier, but because
+ * of an unaligned access the alignment check would trigger before
+ * the one we are interested in.
+ */
+#define BPF_F_ANY_ALIGNMENT (1U << 1)
+
+/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+#define BPF_PSEUDO_MAP_FD 1
+
+/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+ * offset to another bpf function
+ */
+#define BPF_PSEUDO_CALL 1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY 0 /* create new element or update existing */
+#define BPF_NOEXIST 1 /* create new element if it didn't exist */
+#define BPF_EXIST 2 /* update existing element */
+
+/* flags for BPF_MAP_CREATE command */
+#define BPF_F_NO_PREALLOC (1U << 0)
+/* Instead of having one common LRU list in the
+ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+ * which can scale and perform better.
+ * Note, the LRU nodes (including free nodes) cannot be moved
+ * across different LRU lists.
+ */
+#define BPF_F_NO_COMMON_LRU (1U << 1)
+/* Specify numa node during map creation */
+#define BPF_F_NUMA_NODE (1U << 2)
+
+#define BPF_OBJ_NAME_LEN 16U
+
+/* Flags for accessing BPF object */
+#define BPF_F_RDONLY (1U << 3)
+#define BPF_F_WRONLY (1U << 4)
+
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID (1U << 5)
+
+/* Zero-initialize hash function seed. This should only be used for testing. */
+#define BPF_F_ZERO_SEED (1U << 6)
+
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+
+enum bpf_stack_build_id_status {
+ /* user space need an empty entry to identify end of a trace */
+ BPF_STACK_BUILD_ID_EMPTY = 0,
+ /* with valid build_id and offset */
+ BPF_STACK_BUILD_ID_VALID = 1,
+ /* couldn't get build_id, fallback to ip */
+ BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+ __s32 status;
+ unsigned char build_id[BPF_BUILD_ID_SIZE];
+ union {
+ __u64 offset;
+ __u64 ip;
+ };
+};
+
+union bpf_attr {
+ struct { /* anonymous struct used by BPF_MAP_CREATE command */
+ __u32 map_type; /* one of enum bpf_map_type */
+ __u32 key_size; /* size of key in bytes */
+ __u32 value_size; /* size of value in bytes */
+ __u32 max_entries; /* max number of entries in a map */
+ __u32 map_flags; /* BPF_MAP_CREATE related
+ * flags defined above.
+ */
+ __u32 inner_map_fd; /* fd pointing to the inner map */
+ __u32 numa_node; /* numa node (effective only if
+ * BPF_F_NUMA_NODE is set).
+ */
+ char map_name[BPF_OBJ_NAME_LEN];
+ __u32 map_ifindex; /* ifindex of netdev to create on */
+ __u32 btf_fd; /* fd pointing to a BTF type data */
+ __u32 btf_key_type_id; /* BTF type_id of the key */
+ __u32 btf_value_type_id; /* BTF type_id of the value */
+ };
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ __u64 flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_LOAD command */
+ __u32 prog_type; /* one of enum bpf_prog_type */
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ __aligned_u64 license;
+ __u32 log_level; /* verbosity level of verifier */
+ __u32 log_size; /* size of user buffer */
+ __aligned_u64 log_buf; /* user supplied buffer */
+ __u32 kern_version; /* not used */
+ __u32 prog_flags;
+ char prog_name[BPF_OBJ_NAME_LEN];
+ __u32 prog_ifindex; /* ifindex of netdev to prep for */
+ /* For some prog types expected attach type must be known at
+ * load time to verify attach type specific parts of prog
+ * (context accesses, allowed helpers, etc).
+ */
+ __u32 expected_attach_type;
+ __u32 prog_btf_fd; /* fd pointing to BTF type data */
+ __u32 func_info_rec_size; /* userspace bpf_func_info size */
+ __aligned_u64 func_info; /* func info */
+ __u32 func_info_cnt; /* number of bpf_func_info records */
+ __u32 line_info_rec_size; /* userspace bpf_line_info size */
+ __aligned_u64 line_info; /* line info */
+ __u32 line_info_cnt; /* number of bpf_line_info records */
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_* commands */
+ __aligned_u64 pathname;
+ __u32 bpf_fd;
+ __u32 file_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+ __u32 target_fd; /* container object to attach to */
+ __u32 attach_bpf_fd; /* eBPF program to attach */
+ __u32 attach_type;
+ __u32 attach_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+ __u32 prog_fd;
+ __u32 retval;
+ __u32 data_size_in; /* input: len of data_in */
+ __u32 data_size_out; /* input/output: len of data_out
+ * returns ENOSPC if data_out
+ * is too small.
+ */
+ __aligned_u64 data_in;
+ __aligned_u64 data_out;
+ __u32 repeat;
+ __u32 duration;
+ } test;
+
+ struct { /* anonymous struct used by BPF_*_GET_*_ID */
+ union {
+ __u32 start_id;
+ __u32 prog_id;
+ __u32 map_id;
+ __u32 btf_id;
+ };
+ __u32 next_id;
+ __u32 open_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
+ __u32 bpf_fd;
+ __u32 info_len;
+ __aligned_u64 info;
+ } info;
+
+ struct { /* anonymous struct used by BPF_PROG_QUERY command */
+ __u32 target_fd; /* container object to query */
+ __u32 attach_type;
+ __u32 query_flags;
+ __u32 attach_flags;
+ __aligned_u64 prog_ids;
+ __u32 prog_cnt;
+ } query;
+
+ struct {
+ __u64 name;
+ __u32 prog_fd;
+ } raw_tracepoint;
+
+ struct { /* anonymous struct for BPF_BTF_LOAD */
+ __aligned_u64 btf;
+ __aligned_u64 btf_log_buf;
+ __u32 btf_size;
+ __u32 btf_log_size;
+ __u32 btf_log_level;
+ };
+
+ struct {
+ __u32 pid; /* input: pid */
+ __u32 fd; /* input: fd */
+ __u32 flags; /* input: flags */
+ __u32 buf_len; /* input/output: buf len */
+ __aligned_u64 buf; /* input/output:
+ * tp_name for tracepoint
+ * symbol for kprobe
+ * filename for uprobe
+ */
+ __u32 prog_id; /* output: prod_id */
+ __u32 fd_type; /* output: BPF_FD_TYPE_* */
+ __u64 probe_offset; /* output: probe_offset */
+ __u64 probe_addr; /* output: probe_addr */
+ } task_fd_query;
+} __attribute__((aligned(8)));
+
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ * $ ./scripts/bpf_helpers_doc.py \
+ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ * $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Perform a lookup in *map* for an entry associated to *key*.
+ * Return
+ * Map value associated to *key*, or **NULL** if no entry was
+ * found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * Description
+ * Add or update the value of the entry associated to *key* in
+ * *map* with *value*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
+ * elements always exist), the helper would return an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Delete entry with *key* from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * Description
+ * For tracing programs, safely attempt to read *size* bytes from
+ * address *src* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_ktime_get_ns(void)
+ * Description
+ * Return the time elapsed since system boot, in nanoseconds.
+ * Return
+ * Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * Description
+ * This helper is a "printk()-like" facility for debugging. It
+ * prints a message defined by format *fmt* (of size *fmt_size*)
+ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * available. It can take up to three additional **u64**
+ * arguments (as an eBPF helpers, the total number of arguments is
+ * limited to five).
+ *
+ * Each time the helper is called, it appends a line to the trace.
+ * The format of the trace is customizable, and the exact output
+ * one will get depends on the options set in
+ * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *README* file under the same directory). However, it usually
+ * defaults to something like:
+ *
+ * ::
+ *
+ * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * In the above:
+ *
+ * * ``telnet`` is the name of the current task.
+ * * ``470`` is the PID of the current task.
+ * * ``001`` is the CPU number on which the task is
+ * running.
+ * * In ``.N..``, each character refers to a set of
+ * options (whether irqs are enabled, scheduling
+ * options, whether hard/softirqs are running, level of
+ * preempt_disabled respectively). **N** means that
+ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * are set.
+ * * ``419421.045894`` is a timestamp.
+ * * ``0x00000001`` is a fake value used by BPF for the
+ * instruction pointer register.
+ * * ``<formatted msg>`` is the message formatted with
+ * *fmt*.
+ *
+ * The conversion specifiers supported by *fmt* are similar, but
+ * more limited than for printk(). They are **%d**, **%i**,
+ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * of field, padding with zeroes, etc.) is available, and the
+ * helper will return **-EINVAL** (but print nothing) if it
+ * encounters an unknown specifier.
+ *
+ * Also, note that **bpf_trace_printk**\ () is slow, and should
+ * only be used for debugging purposes. For this reason, a notice
+ * bloc (spanning several lines) is printed to kernel logs and
+ * states that the helper should not be used "for production use"
+ * the first time this helper is used (or more precisely, when
+ * **trace_printk**\ () buffers are allocated). For passing values
+ * to user space, perf events should be preferred.
+ * Return
+ * The number of bytes written to the buffer, or a negative error
+ * in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ * Description
+ * Get a pseudo-random number.
+ *
+ * From a security point of view, this helper uses its own
+ * pseudo-random internal state, and cannot be used to infer the
+ * seed of other random functions in the kernel. However, it is
+ * essential to note that the generator used by the helper is not
+ * cryptographically secure.
+ * Return
+ * A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ * Description
+ * Get the SMP (symmetric multiprocessing) processor id. Note that
+ * all programs run with preemption disabled, which means that the
+ * SMP processor id is stable during all the execution of the
+ * program.
+ * Return
+ * The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. *flags* are a combination of
+ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * checksum for the packet after storing the bytes) and
+ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * Description
+ * Recompute the layer 3 (e.g. IP) checksum for the packet
+ * associated to *skb*. Computation is incremental, so the helper
+ * must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored in *size*.
+ * Alternatively, it is possible to store the difference between
+ * the previous and the new values of the header field in *to*, by
+ * setting *from* and *size* to 0. For both methods, *offset*
+ * indicates the location of the IP checksum within the packet.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * Description
+ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * packet associated to *skb*. Computation is incremental, so the
+ * helper must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored on the lowest
+ * four bits of *flags*. Alternatively, it is possible to store
+ * the difference between the previous and the new values of the
+ * header field in *to*, by setting *from* and the four lowest
+ * bits of *flags* to 0. For both methods, *offset* indicates the
+ * location of the IP checksum within the packet. In addition to
+ * the size of the field, *flags* can be added (bitwise OR) actual
+ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * Description
+ * This special helper is used to trigger a "tail call", or in
+ * other words, to jump into another eBPF program. The same stack
+ * frame is used (but values on stack and in registers for the
+ * caller are not accessible to the callee). This mechanism allows
+ * for program chaining, either for raising the maximum number of
+ * available eBPF instructions, or to execute given programs in
+ * conditional blocks. For security reasons, there is an upper
+ * limit to the number of successive tail calls that can be
+ * performed.
+ *
+ * Upon call of this helper, the program attempts to jump into a
+ * program referenced at index *index* in *prog_array_map*, a
+ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * *ctx*, a pointer to the context.
+ *
+ * If the call succeeds, the kernel immediately runs the first
+ * instruction of the new program. This is not a function call,
+ * and it never returns to the previous program. If the call
+ * fails, then the helper has no effect, and the caller continues
+ * to run its subsequent instructions. A call can fail if the
+ * destination program for the jump does not exist (i.e. *index*
+ * is superior to the number of entries in *prog_array_map*), or
+ * if the maximum number of tail calls has been reached for this
+ * chain of programs. This limit is defined in the kernel by the
+ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * which is currently set to 32.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * Description
+ * Clone and redirect the packet associated to *skb* to another
+ * net device of index *ifindex*. Both ingress and egress
+ * interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * value in *flags* is used to make the distinction (ingress path
+ * is selected if the flag is present, egress path otherwise).
+ * This is the only flag supported for now.
+ *
+ * In comparison with **bpf_redirect**\ () helper,
+ * **bpf_clone_redirect**\ () has the associated cost of
+ * duplicating the packet buffer, but this can be executed out of
+ * the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * efficient, but it is handled through an action code where the
+ * redirection happens only after the eBPF program has returned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_get_current_pid_tgid(void)
+ * Return
+ * A 64-bit integer containing the current tgid and pid, and
+ * created as such:
+ * *current_task*\ **->tgid << 32 \|**
+ * *current_task*\ **->pid**.
+ *
+ * u64 bpf_get_current_uid_gid(void)
+ * Return
+ * A 64-bit integer containing the current GID and UID, and
+ * created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ * Description
+ * Copy the **comm** attribute of the current task into *buf* of
+ * *size_of_buf*. The **comm** attribute contains the name of
+ * the executable (excluding the path) for the current task. The
+ * *size_of_buf* must be strictly positive. On success, the
+ * helper makes sure that the *buf* is NUL-terminated. On failure,
+ * it is filled with zeroes.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ * Description
+ * Retrieve the classid for the current task, i.e. for the net_cls
+ * cgroup to which *skb* belongs.
+ *
+ * This helper can be used on TC egress path, but not on ingress.
+ *
+ * The net_cls cgroup provides an interface to tag network packets
+ * based on a user-provided identifier for all traffic coming from
+ * the tasks belonging to the related cgroup. See also the related
+ * kernel documentation, available from the Linux sources in file
+ * *Documentation/cgroup-v1/net_cls.txt*.
+ *
+ * The Linux kernel has two versions for cgroups: there are
+ * cgroups v1 and cgroups v2. Both are available to users, who can
+ * use a mixture of them, but note that the net_cls cgroup is for
+ * cgroup v1 only. This makes it incompatible with BPF programs
+ * run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * only hold data for one version of cgroups at a time).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * "**y**" or to "**m**".
+ * Return
+ * The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * Description
+ * Push a *vlan_tci* (VLAN tag control information) of protocol
+ * *vlan_proto* to the packet associated to *skb*, then update
+ * the checksum. Note that if *vlan_proto* is different from
+ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * be **ETH_P_8021Q**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * Description
+ * Pop a VLAN header from the packet associated to *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Get tunnel metadata. This helper takes a pointer *key* to an
+ * empty **struct bpf_tunnel_key** of **size**, that will be
+ * filled with tunnel metadata for the packet associated to *skb*.
+ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * indicates that the tunnel is based on IPv6 protocol instead of
+ * IPv4.
+ *
+ * The **struct bpf_tunnel_key** is an object that generalizes the
+ * principal parameters used by various tunneling protocols into a
+ * single struct. This way, it can be used to easily make a
+ * decision based on the contents of the encapsulation header,
+ * "summarized" in this struct. In particular, it holds the IP
+ * address of the remote end (IPv4 or IPv6, depending on the case)
+ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * this struct exposes the *key*\ **->tunnel_id**, which is
+ * generally mapped to a VNI (Virtual Network Identifier), making
+ * it programmable together with the **bpf_skb_set_tunnel_key**\
+ * () helper.
+ *
+ * Let's imagine that the following code is part of a program
+ * attached to the TC ingress interface, on one end of a GRE
+ * tunnel, and is supposed to filter out all messages coming from
+ * remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * ::
+ *
+ * int ret;
+ * struct bpf_tunnel_key key = {};
+ *
+ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * if (ret < 0)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * if (key.remote_ipv4 != 0x0a000001)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * return TC_ACT_OK; // accept packet
+ *
+ * This interface can also be used with all encapsulation devices
+ * that can operate in "collect metadata" mode: instead of having
+ * one network device per specific configuration, the "collect
+ * metadata" mode only requires a single device where the
+ * configuration can be extracted from this helper.
+ *
+ * This can be used together with various tunnels such as VXLan,
+ * Geneve, GRE or IP in IP (IPIP).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Populate tunnel metadata for packet associated to *skb.* The
+ * tunnel metadata is set to the contents of *key*, of *size*. The
+ * *flags* can be set to a combination of the following values:
+ *
+ * **BPF_F_TUNINFO_IPV6**
+ * Indicate that the tunnel is based on IPv6 protocol
+ * instead of IPv4.
+ * **BPF_F_ZERO_CSUM_TX**
+ * For IPv4 packets, add a flag to tunnel metadata
+ * indicating that checksum computation should be skipped
+ * and checksum set to zeroes.
+ * **BPF_F_DONT_FRAGMENT**
+ * Add a flag to tunnel metadata indicating that the
+ * packet should not be fragmented.
+ * **BPF_F_SEQ_NUMBER**
+ * Add a flag to tunnel metadata indicating that a
+ * sequence number should be added to tunnel header before
+ * sending the packet. This flag was added for GRE
+ * encapsulation, but might be used with other protocols
+ * as well in the future.
+ *
+ * Here is a typical usage on the transmit path:
+ *
+ * ::
+ *
+ * struct bpf_tunnel_key key;
+ * populate key ...
+ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ * Description
+ * Read the value of a perf event counter. This helper relies on a
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * the perf event counter is selected when *map* is updated with
+ * perf event file descriptors. The *map* is an array whose size
+ * is the number of available CPUs, and each cell contains a value
+ * relative to one CPU. The value to retrieve is indicated by
+ * *flags*, that contains the index of the CPU to look up, masked
+ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * Note that before Linux 4.13, only hardware perf event can be
+ * retrieved.
+ *
+ * Also, be aware that the newer helper
+ * **bpf_perf_event_read_value**\ () is recommended over
+ * **bpf_perf_event_read**\ () in general. The latter has some ABI
+ * quirks where error and counter value are used as a return code
+ * (which is wrong to do since ranges may overlap). This issue is
+ * fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * time provides more features over the **bpf_perf_event_read**\
+ * () interface. Please refer to the description of
+ * **bpf_perf_event_read_value**\ () for details.
+ * Return
+ * The value of the perf event counter read from the map, or a
+ * negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_clone_redirect**\
+ * (), except that the packet is not cloned, which provides
+ * increased performance.
+ *
+ * Except for XDP, both ingress and egress interfaces can be used
+ * for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * to make the distinction (ingress path is selected if the flag
+ * is present, egress path otherwise). Currently, XDP only
+ * supports redirection to the egress interface, and accepts no
+ * flag at all.
+ *
+ * The same effect can be attained with the more generic
+ * **bpf_redirect_map**\ (), which requires specific maps to be
+ * used but offers better performance.
+ * Return
+ * For XDP, the helper returns **XDP_REDIRECT** on success or
+ * **XDP_ABORTED** on error. For other program types, the values
+ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ * Description
+ * Retrieve the realm or the route, that is to say the
+ * **tclassid** field of the destination for the *skb*. The
+ * identifier retrieved is a user-provided tag, similar to the
+ * one used with the net_cls cgroup (see description for
+ * **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * held by a route (a destination entry), not by a task.
+ *
+ * Retrieving this identifier works with the clsact TC egress hook
+ * (see also **tc-bpf(8)**), or alternatively on conventional
+ * classful egress qdiscs, but not on TC ingress path. In case of
+ * clsact TC egress hook, this has the advantage that, internally,
+ * the destination entry has not been dropped yet in the transmit
+ * path. Therefore, the destination entry does not need to be
+ * artificially held via **netif_keep_dst**\ () for a classful
+ * qdisc until the *skb* is freed.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ * Return
+ * The realm of the route for the packet associated to *skb*, or 0
+ * if none was found.
+ *
+ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * The context of the program *ctx* needs also be passed to the
+ * helper.
+ *
+ * On user space, a program willing to read the values needs to
+ * call **perf_event_open**\ () on the perf event (either for
+ * one or for all CPUs) and to store the file descriptor into the
+ * *map*. This must be done before the eBPF program can send data
+ * into it. An example is available in file
+ * *samples/bpf/trace_output_user.c* in the Linux kernel source
+ * tree (the eBPF program counterpart is in
+ * *samples/bpf/trace_output_kern.c*).
+ *
+ * **bpf_perf_event_output**\ () achieves better performance
+ * than **bpf_trace_printk**\ () for sharing data with user
+ * space, and is much better suitable for streaming data from eBPF
+ * programs.
+ *
+ * Note that this helper is not restricted to tracing use cases
+ * and can be used with programs attached to TC or XDP as well,
+ * where it allows for passing data to user space listeners. Data
+ * can be:
+ *
+ * * Only custom structs,
+ * * Only the packet payload, or
+ * * A combination of both.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ * Description
+ * This helper was provided as an easy way to load data from a
+ * packet. It can be used to load *len* bytes from *offset* from
+ * the packet associated to *skb*, into the buffer pointed by
+ * *to*.
+ *
+ * Since Linux 4.7, usage of this helper has mostly been replaced
+ * by "direct packet access", enabling packet data to be
+ * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * pointing respectively to the first byte of packet data and to
+ * the byte after the last byte of packet data. However, it
+ * remains useful if one wishes to read large quantities of data
+ * at once from a packet into the eBPF stack.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * Description
+ * Walk a user or a kernel stack and return its id. To achieve
+ * this, the helper needs *ctx*, which is a pointer to the context
+ * on which the tracing program is executed, and a pointer to a
+ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * a combination of the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_FAST_STACK_CMP**
+ * Compare stacks by hash only.
+ * **BPF_F_REUSE_STACKID**
+ * If two different stacks hash into the same *stackid*,
+ * discard the old one.
+ *
+ * The stack id retrieved is a 32 bit long integer handle which
+ * can be further combined with other data (including other stack
+ * ids) and used as a key into maps. This can be useful for
+ * generating a variety of graphs (such as flame graphs or off-cpu
+ * graphs).
+ *
+ * For walking a stack, this helper is an improvement over
+ * **bpf_probe_read**\ (), which can be used with unrolled loops
+ * but is not efficient and consumes a lot of eBPF instructions.
+ * Instead, **bpf_get_stackid**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * The positive or null stack id on success, or a negative error
+ * in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ * Description
+ * Compute a checksum difference, from the raw buffer pointed by
+ * *from*, of length *from_size* (that must be a multiple of 4),
+ * towards the raw buffer pointed by *to*, of size *to_size*
+ * (same remark). An optional *seed* can be added to the value
+ * (this can be cascaded, the seed may come from a previous call
+ * to the helper).
+ *
+ * This is flexible enough to be used in several ways:
+ *
+ * * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * checksum, it can be used when pushing new data.
+ * * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * checksum, it can be used when removing data from a packet.
+ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * can be used to compute a diff. Note that *from_size* and
+ * *to_size* do not need to be equal.
+ *
+ * This helper can be used in combination with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * which one can feed in the difference computed with
+ * **bpf_csum_diff**\ ().
+ * Return
+ * The checksum result, or a negative error code in case of
+ * failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Retrieve tunnel options metadata for the packet associated to
+ * *skb*, and store the raw tunnel option data to the buffer *opt*
+ * of *size*.
+ *
+ * This helper can be used with encapsulation devices that can
+ * operate in "collect metadata" mode (please refer to the related
+ * note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * more details). A particular example where this can be used is
+ * in combination with the Geneve encapsulation protocol, where it
+ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * the eBPF program. This allows for full customization of these
+ * headers.
+ * Return
+ * The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Set tunnel options metadata for the packet associated to *skb*
+ * to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * Description
+ * Change the protocol of the *skb* to *proto*. Currently
+ * supported are transition from IPv4 to IPv6, and from IPv6 to
+ * IPv4. The helper takes care of the groundwork for the
+ * transition, including resizing the socket buffer. The eBPF
+ * program is expected to fill the new headers, if any, via
+ * **skb_store_bytes**\ () and to recompute the checksums with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * (). The main case for this helper is to perform NAT64
+ * operations out of an eBPF program.
+ *
+ * Internally, the GSO type is marked as dodgy so that headers are
+ * checked and segments are recalculated by the GSO/GRO engine.
+ * The size for GSO target is adapted as well.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * Description
+ * Change the packet type for the packet associated to *skb*. This
+ * comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * the eBPF program does not have a write access to *skb*\
+ * **->pkt_type** beside this helper. Using a helper here allows
+ * for graceful handling of errors.
+ *
+ * The major use case is to change incoming *skb*s to
+ * **PACKET_HOST** in a programmatic way instead of having to
+ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * example.
+ *
+ * Note that *type* only allows certain values. At this time, they
+ * are:
+ *
+ * **PACKET_HOST**
+ * Packet is for us.
+ * **PACKET_BROADCAST**
+ * Send packet to all.
+ * **PACKET_MULTICAST**
+ * Send packet to group.
+ * **PACKET_OTHERHOST**
+ * Send packet to someone else.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * Description
+ * Check whether *skb* is a descendant of the cgroup2 held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* failed the cgroup2 descendant test.
+ * * 1, if the *skb* succeeded the cgroup2 descendant test.
+ * * A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ * Description
+ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * not set, in particular if the hash was cleared due to mangling,
+ * recompute this hash. Later accesses to the hash can be done
+ * directly with *skb*\ **->hash**.
+ *
+ * Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * prototype with **bpf_skb_change_proto**\ (), or calling
+ * **bpf_skb_store_bytes**\ () with the
+ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * the hash and to trigger a new computation for the next call to
+ * **bpf_get_hash_recalc**\ ().
+ * Return
+ * The 32-bit hash.
+ *
+ * u64 bpf_get_current_task(void)
+ * Return
+ * A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * Description
+ * Attempt in a safe way to write *len* bytes from the buffer
+ * *src* to *dst* in memory. It only works for threads that are in
+ * user context, and *dst* must be a valid user space address.
+ *
+ * This helper should not be used to implement any kind of
+ * security mechanism because of TOC-TOU attacks, but rather to
+ * debug, divert, and manipulate execution of semi-cooperative
+ * processes.
+ *
+ * Keep in mind that this feature is meant for experiments, and it
+ * has a risk of crashing the system and running programs.
+ * Therefore, when an eBPF program using this helper is attached,
+ * a warning including PID and process name is printed to kernel
+ * logs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * Description
+ * Check whether the probe is being run is the context of a given
+ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* task belongs to the cgroup2.
+ * * 1, if the *skb* task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Resize (trim or grow) the packet associated to *skb* to the
+ * new *len*. The *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * The basic idea is that the helper performs the needed work to
+ * change the size of the packet, then the eBPF program rewrites
+ * the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * and others. This helper is a slow path utility intended for
+ * replies with control messages. And because it is targeted for
+ * slow path, the helper itself can afford to be slow: it
+ * implicitly linearizes, unclones and drops offloads from the
+ * *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * Description
+ * Pull in non-linear data in case the *skb* is non-linear and not
+ * all of *len* are part of the linear section. Make *len* bytes
+ * from *skb* readable and writable. If a zero value is passed for
+ * *len*, then the whole length of the *skb* is pulled.
+ *
+ * This helper is only needed for reading and writing with direct
+ * packet access.
+ *
+ * For direct packet access, testing that offsets to access
+ * are within packet boundaries (test on *skb*\ **->data_end**) is
+ * susceptible to fail if offsets are invalid, or if the requested
+ * data is in non-linear parts of the *skb*. On failure the
+ * program can just bail out, or in the case of a non-linear
+ * buffer, use a helper to make the data available. The
+ * **bpf_skb_load_bytes**\ () helper is a first solution to access
+ * the data. Another one consists in using **bpf_skb_pull_data**
+ * to pull in once the non-linear parts, then retesting and
+ * eventually access the data.
+ *
+ * At the same time, this also makes sure the *skb* is uncloned,
+ * which is a necessary condition for direct write. As this needs
+ * to be an invariant for the write part only, the verifier
+ * detects writes and adds a prologue that is calling
+ * **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * the very beginning in case it is indeed cloned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ * Description
+ * Add the checksum *csum* into *skb*\ **->csum** in case the
+ * driver has supplied a checksum for the entire packet into that
+ * field. Return an error otherwise. This helper is intended to be
+ * used in combination with **bpf_csum_diff**\ (), in particular
+ * when the checksum needs to be updated after data has been
+ * written into the packet through direct packet access.
+ * Return
+ * The checksum on success, or a negative error code in case of
+ * failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ * Description
+ * Invalidate the current *skb*\ **->hash**. It can be used after
+ * mangling on headers through direct packet access, in order to
+ * indicate that the hash is outdated and to trigger a
+ * recalculation the next time the kernel tries to access this
+ * hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ * Description
+ * Return the id of the current NUMA node. The primary use case
+ * for this helper is the selection of sockets for the local NUMA
+ * node, when the program is attached to sockets using the
+ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * but the helper is also available to other eBPF program types,
+ * similarly to **bpf_get_smp_processor_id**\ ().
+ * Return
+ * The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Grows headroom of packet associated to *skb* and adjusts the
+ * offset of the MAC header accordingly, adding *len* bytes of
+ * space. It automatically extends and reallocates memory as
+ * required.
+ *
+ * This helper can be used on a layer 3 *skb* to push a MAC header
+ * for redirection into a layer 2 device.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * it is possible to use a negative value for *delta*. This helper
+ * can be used to prepare the packet for pushing or popping
+ * headers.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read()** helper here instead
+ * to read the string would require to estimate the length at
+ * compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ * Description
+ * If the **struct sk_buff** pointed by *skb* has a known socket,
+ * retrieve the cookie (generated by the kernel) of this socket.
+ * If no cookie has been set yet, generate a new cookie. Once
+ * generated, the socket cookie remains stable for the life of the
+ * socket. This helper can be useful for monitoring per socket
+ * networking traffic statistics as it provides a unique socket
+ * identifier per namespace.
+ * Return
+ * A 8-byte long non-decreasing number on success, or 0 if the
+ * socket field is missing inside *skb*.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Return
+ * The owner UID of the socket associated to *skb*. If the socket
+ * is **NULL**, or if it is not a full socket (i.e. if it is a
+ * time-wait or a request socket instead), **overflowuid** value
+ * is returned (note that **overflowuid** might also be the actual
+ * UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * Description
+ * Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * to value *hash*.
+ * Return
+ * 0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **setsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **setsockopt(2)** for more information.
+ * The option value of length *optlen* is pointed by *optval*.
+ *
+ * This helper actually implements a subset of **setsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **SOL_SOCKET**, which supports the following *optname*\ s:
+ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * **TCP_BPF_SNDCWND_CLAMP**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
+ * Description
+ * Grow or shrink the room for data in the packet associated to
+ * *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * There is a single supported mode at this time:
+ *
+ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * (room space is added or removed below the layer 3 header).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the endpoint referenced by *map* at
+ * index *key*. Depending on its type, this *map* can contain
+ * references to net devices (for forwarding packets through other
+ * ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * but this is only implemented for native XDP (with driver
+ * support) as of this writing).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * When used to redirect packets to net devices, this helper
+ * provides a high performance increase over **bpf_redirect**\ ().
+ * This is due to various implementation details of the underlying
+ * mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * () tries to send packet as a "bulk" to the device.
+ * Return
+ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a *map* referencing sockets. The
+ * *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * *delta* (which can be positive or negative). Note that this
+ * operation modifies the address stored in *xdp_md*\ **->data**,
+ * so the latter must be loaded only after the helper has been
+ * called.
+ *
+ * The use of *xdp_md*\ **->data_meta** is optional and programs
+ * are not required to use it. The rationale is that when the
+ * packet is processed with XDP (e.g. as DoS filter), it is
+ * possible to push further meta data along with it before passing
+ * to the stack, and to give the guarantee that an ingress eBPF
+ * program attached as a TC classifier on the same device can pick
+ * this up for further post-processing. Since TC works with socket
+ * buffers, it remains possible to set from XDP the **mark** or
+ * **priority** pointers, or other pointers for the socket buffer.
+ * Having this scratch space generic and programmable allows for
+ * more flexibility as the user is free to store whatever meta
+ * data they need.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * Read the value of a perf event counter, and store it into *buf*
+ * of size *buf_size*. This helper relies on a *map* of type
+ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * counter is selected when *map* is updated with perf event file
+ * descriptors. The *map* is an array whose size is the number of
+ * available CPUs, and each cell contains a value relative to one
+ * CPU. The value to retrieve is indicated by *flags*, that
+ * contains the index of the CPU to look up, masked with
+ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * This helper behaves in a way close to
+ * **bpf_perf_event_read**\ () helper, save that instead of
+ * just returning the value observed, it fills the *buf*
+ * structure. This allows for additional data to be retrieved: in
+ * particular, the enabled and running times (in *buf*\
+ * **->enabled** and *buf*\ **->running**, respectively) are
+ * copied. In general, **bpf_perf_event_read_value**\ () is
+ * recommended over **bpf_perf_event_read**\ (), which has some
+ * ABI issues and provides fewer functionalities.
+ *
+ * These values are interesting, because hardware PMU (Performance
+ * Monitoring Unit) counters are limited resources. When there are
+ * more PMU based perf events opened than available counters,
+ * kernel will multiplex these events so each event gets certain
+ * percentage (but not all) of the PMU time. In case that
+ * multiplexing happens, the number of samples or counter value
+ * will not reflect the case compared to when no multiplexing
+ * occurs. This makes comparison between different runs difficult.
+ * Typically, the counter value should be normalized before
+ * comparing to other experiments. The usual normalization is done
+ * as follows.
+ *
+ * ::
+ *
+ * normalized_counter = counter * t_enabled / t_running
+ *
+ * Where t_enabled is the time enabled for event and t_running is
+ * the time running for event since last normalization. The
+ * enabled and running times are accumulated since the perf event
+ * open. To achieve scaling factor between two invocations of an
+ * eBPF program, users can can use CPU id as the key (which is
+ * typical for perf array usage model) to remember the previous
+ * value and do the calculation inside the eBPF program.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * For en eBPF program attached to a perf event, retrieve the
+ * value of the event counter associated to *ctx* and store it in
+ * the structure pointed by *buf* and of size *buf_size*. Enabled
+ * and running times are also stored in the structure (see
+ * description of helper **bpf_perf_event_read_value**\ () for
+ * more details).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **getsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **getsockopt(2)** for more information.
+ * The retrieved value is stored in the structure pointed by
+ * *opval* and of length *optlen*.
+ *
+ * This helper actually implements a subset of **getsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **IPPROTO_TCP**, which supports *optname*
+ * **TCP_CONGESTION**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * Description
+ * Used for error injection, this helper uses kprobes to override
+ * the return value of the probed function, and to set it to *rc*.
+ * The first argument is the context *regs* on which the kprobe
+ * works.
+ *
+ * This helper works by setting setting the PC (program counter)
+ * to an override function which is run in place of the original
+ * probed function. This means the probed function is not run at
+ * all. The replacement function just returns with the required
+ * value.
+ *
+ * This helper has security implications, and thus is subject to
+ * restrictions. It is only available if the kernel was compiled
+ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * option, and in this case it only works on functions tagged with
+ * **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * Also, the helper is only available for the architectures having
+ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * x86 architecture is the only one to support this feature.
+ * Return
+ * 0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * Description
+ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * for the full TCP socket associated to *bpf_sock_ops* to
+ * *argval*.
+ *
+ * The primary use of this field is to determine if there should
+ * be calls to eBPF programs of type
+ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * code. A program of the same type can change its value, per
+ * connection and as necessary, when the connection is
+ * established. This field is directly accessible for reading, but
+ * this helper must be used for updates in order to return an
+ * error if an eBPF program tries to set a callback that is not
+ * supported in the current kernel.
+ *
+ * The supported callback values that *argval* can combine are:
+ *
+ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *
+ * Here are some examples of where one could call such eBPF
+ * program:
+ *
+ * * When RTO fires.
+ * * When a packet is retransmitted.
+ * * When the connection terminates.
+ * * When a packet is sent.
+ * * When a packet is received.
+ * Return
+ * Code **-EINVAL** if the socket is not a full TCP socket;
+ * otherwise, a positive number containing the bits that could not
+ * be set is returned (which comes down to 0 if all bits were set
+ * as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, apply the verdict of the eBPF program to
+ * the next *bytes* (number of bytes) of message *msg*.
+ *
+ * For example, this helper can be used in the following cases:
+ *
+ * * A single **sendmsg**\ () or **sendfile**\ () system call
+ * contains multiple logical messages that the eBPF program is
+ * supposed to read and for which it should apply a verdict.
+ * * An eBPF program only cares to read the first *bytes* of a
+ * *msg*. If the message has a large payload, then setting up
+ * and calling the eBPF program repeatedly for all bytes, even
+ * though the verdict is already known, would create unnecessary
+ * overhead.
+ *
+ * When called from within an eBPF program, the helper sets a
+ * counter internal to the BPF infrastructure, that is used to
+ * apply the last verdict to the next *bytes*. If *bytes* is
+ * smaller than the current data being processed from a
+ * **sendmsg**\ () or **sendfile**\ () system call, the first
+ * *bytes* will be sent and the eBPF program will be re-run with
+ * the pointer for start of data pointing to byte number *bytes*
+ * **+ 1**. If *bytes* is larger than the current data being
+ * processed, then the eBPF verdict will be applied to multiple
+ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * consumed.
+ *
+ * Note that if a socket closes with the internal counter holding
+ * a non-zero value, this is not a problem because data is not
+ * being buffered for *bytes* and is sent as it is received.
+ * Return
+ * 0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, prevent the execution of the verdict eBPF
+ * program for message *msg* until *bytes* (byte number) have been
+ * accumulated.
+ *
+ * This can be used when one needs a specific number of bytes
+ * before a verdict can be assigned, even if the data spans
+ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * case would be a user calling **sendmsg**\ () repeatedly with
+ * 1-byte long message segments. Obviously, this is bad for
+ * performance, but it is still valid. If the eBPF program needs
+ * *bytes* bytes to validate a header, this helper can be used to
+ * prevent the eBPF program to be called again until *bytes* have
+ * been accumulated.
+ * Return
+ * 0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * Description
+ * For socket policies, pull in non-linear data from user space
+ * for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * **->data_end** to *start* and *end* bytes offsets into *msg*,
+ * respectively.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it can only parse data that the (**data**, **data_end**)
+ * pointers have already consumed. For **sendmsg**\ () hooks this
+ * is likely the first scatterlist element. But for calls relying
+ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * be the range (**0**, **0**) because the data is shared with
+ * user space and by default the objective is to avoid allowing
+ * user space to modify data while (or after) eBPF verdict is
+ * being decided. This helper can be used to pull in data and to
+ * set the start and end pointer to given values. Data will be
+ * copied if necessary (i.e. if data was not linear and if start
+ * and end pointers do not point to the same chunk).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * Description
+ * Bind the socket associated to *ctx* to the address pointed by
+ * *addr*, of length *addr_len*. This allows for making outgoing
+ * connection from the desired IP address, which can be useful for
+ * example when all processes inside a cgroup should use one
+ * single IP address on a host that has multiple IP configured.
+ *
+ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * **AF_INET6**). Looking for a free port to bind to can be
+ * expensive, therefore binding to port is not permitted by the
+ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * must be set to zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * only possible to shrink the packet as of this writing,
+ * therefore *delta* must be a negative integer.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * Description
+ * Retrieve the XFRM state (IP transform framework, see also
+ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * The retrieved value is stored in the **struct bpf_xfrm_state**
+ * pointed by *xfrm_state* and of length *size*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_XFRM** configuration option.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ * Description
+ * Return a user or a kernel stack in bpf program provided buffer.
+ * To achieve this, the helper needs *ctx*, which is a pointer
+ * to the context on which the tracing program is executed.
+ * To store the stacktrace, the bpf program provides *buf* with
+ * a nonnegative *size*.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_USER_BUILD_ID**
+ * Collect buildid+offset instead of ips for user stack,
+ * only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * **bpf_get_stack**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * to sufficient large buffer size. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * Description
+ * This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * it provides an easy way to load *len* bytes from *offset*
+ * from the packet associated to *skb*, into the buffer pointed
+ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * a fifth argument *start_header* exists in order to select a
+ * base offset to start from. *start_header* can be one of:
+ *
+ * **BPF_HDR_START_MAC**
+ * Base offset to load data from is *skb*'s mac header.
+ * **BPF_HDR_START_NET**
+ * Base offset to load data from is *skb*'s network header.
+ *
+ * In general, "direct packet access" is the preferred method to
+ * access packet data, however, this helper is in particular useful
+ * in socket filters where *skb*\ **->data** does not always point
+ * to the start of the mac header and where "direct packet access"
+ * is not available.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * Description
+ * Do FIB lookup in kernel tables using parameters in *params*.
+ * If lookup is successful and result shows packet is to be
+ * forwarded, the neighbor tables are searched for the nexthop.
+ * If successful (ie., FIB lookup shows forwarding and nexthop
+ * is resolved), the nexthop address is returned in ipv4_dst
+ * or ipv6_dst based on family, smac is set to mac address of
+ * egress device, dmac is set to nexthop mac address, rt_metric
+ * is set to metric from route (IPv4/IPv6 only), and ifindex
+ * is set to the device index of the nexthop from the FIB lookup.
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be a combination of one or more of the
+ * following values:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT**
+ * Do a direct table lookup vs full lookup using FIB
+ * rules.
+ * **BPF_FIB_LOOKUP_OUTPUT**
+ * Perform lookup from an egress perspective (default is
+ * ingress).
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ * Return
+ * * < 0 if any input argument is invalid
+ * * 0 on success (packet is forwarded, nexthop neighbor exists)
+ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ * packet is not forwarded or needs assist from full stack
+ *
+ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a sockhash *map* referencing sockets.
+ * The *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ * if the verdeict eBPF program returns **SK_PASS**), redirect it
+ * to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * Description
+ * Encapsulate the packet associated to *skb* within a Layer 3
+ * protocol header. This header is provided in the buffer at
+ * address *hdr*, with *len* its size in bytes. *type* indicates
+ * the protocol of the header and can be one of:
+ *
+ * **BPF_LWT_ENCAP_SEG6**
+ * IPv6 encapsulation with Segment Routing Header
+ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ * the IPv6 header is computed by the kernel.
+ * **BPF_LWT_ENCAP_SEG6_INLINE**
+ * Only works if *skb* contains an IPv6 packet. Insert a
+ * Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ * the IPv6 header.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ * inside the outermost IPv6 Segment Routing Header can be
+ * modified through this helper.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * Description
+ * Adjust the size allocated to TLVs in the outermost IPv6
+ * Segment Routing Header contained in the packet associated to
+ * *skb*, at position *offset* by *delta* bytes. Only offsets
+ * after the segments are accepted. *delta* can be as well
+ * positive (growing) as negative (shrinking).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * Description
+ * Apply an IPv6 Segment Routing action of type *action* to the
+ * packet associated to *skb*. Each action takes a parameter
+ * contained at address *param*, and of length *param_len* bytes.
+ * *action* can be one of:
+ *
+ * **SEG6_LOCAL_ACTION_END_X**
+ * End.X action: Endpoint with Layer-3 cross-connect.
+ * Type of *param*: **struct in6_addr**.
+ * **SEG6_LOCAL_ACTION_END_T**
+ * End.T action: Endpoint with specific IPv6 table lookup.
+ * Type of *param*: **int**.
+ * **SEG6_LOCAL_ACTION_END_B6**
+ * End.B6 action: Endpoint bound to an SRv6 policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ * End.B6.Encap action: Endpoint bound to an SRv6
+ * encapsulation policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded key press with *scancode*,
+ * *toggle* value in the given *protocol*. The scancode will be
+ * translated to a keycode using the rc keymap, and reported as
+ * an input key down event. After a period a key up event is
+ * generated. This period can be extended by calling either
+ * **bpf_rc_keydown**\ () again with the same values, or calling
+ * **bpf_rc_repeat**\ ().
+ *
+ * Some protocols include a toggle bit, in case the button was
+ * released and pressed again between consecutive scancodes.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * The *protocol* is the decoded protocol number (see
+ * **enum rc_proto** for some predefined values).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * int bpf_rc_repeat(void *ctx)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded repeat key message. This delays
+ * the generation of a key up event for previously generated
+ * key down event.
+ *
+ * Some IR protocols like NEC have a special IR message for
+ * repeating last button, for when a button is held down.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * Description
+ * Return the cgroup v2 id of the socket associated with the *skb*.
+ * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * helper for cgroup v1 by providing a tag resp. identifier that
+ * can be matched on or used for map lookups e.g. to implement
+ * policy. The cgroup v2 id of a given path in the hierarchy is
+ * exposed in user space through the f_handle API in order to get
+ * to the same 64-bit id.
+ *
+ * This helper can be used on TC egress path, but not on ingress,
+ * and is available only if the kernel was compiled with the
+ * **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_get_current_cgroup_id(void)
+ * Return
+ * A 64-bit integer containing the current cgroup id based
+ * on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ * Description
+ * Get the pointer to the local storage area.
+ * The type and the size of the local storage is defined
+ * by the *map* argument.
+ * The *flags* meaning is specific for each map type,
+ * and has to be 0 for cgroup local storage.
+ *
+ * Depending on the BPF program type, a local storage area
+ * can be shared between multiple instances of the BPF program,
+ * running simultaneously.
+ *
+ * A user should care about the synchronization by himself.
+ * For example, by using the **BPF_STX_XADD** instruction to alter
+ * the shared data.
+ * Return
+ * A pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Select a **SO_REUSEPORT** socket from a
+ * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ * It checks the selected socket is matching the incoming
+ * request in the socket buffer.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for UDP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * int bpf_sk_release(struct bpf_sock *sock)
+ * Description
+ * Release the reference held by *sock*. *sock* must be a
+ * non-**NULL** pointer that was returned from
+ * **bpf_sk_lookup_xxx**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
+ * Description
+ * For socket policies, insert *len* bytes into *msg* at offset
+ * *start*.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it may want to insert metadata or options into the *msg*.
+ * This can later be read and used by any of the lower layer BPF
+ * hooks.
+ *
+ * This helper may fail if under memory pressure (a malloc
+ * fails) in these cases BPF programs will get an appropriate
+ * error and BPF programs will need to handle them.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
+ * Description
+ * Will remove *pop* bytes from a *msg* starting at byte *start*.
+ * This may result in **ENOMEM** errors under certain situations if
+ * an allocation and copy are required due to a full ring buffer.
+ * However, the helper will try to avoid doing the allocation
+ * if possible. Other errors can occur if input parameters are
+ * invalid either due to *start* byte not being valid part of *msg*
+ * payload and/or *pop* value being to large.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded pointer movement.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ */
+#define __BPF_FUNC_MAPPER(FN) \
+ FN(unspec), \
+ FN(map_lookup_elem), \
+ FN(map_update_elem), \
+ FN(map_delete_elem), \
+ FN(probe_read), \
+ FN(ktime_get_ns), \
+ FN(trace_printk), \
+ FN(get_prandom_u32), \
+ FN(get_smp_processor_id), \
+ FN(skb_store_bytes), \
+ FN(l3_csum_replace), \
+ FN(l4_csum_replace), \
+ FN(tail_call), \
+ FN(clone_redirect), \
+ FN(get_current_pid_tgid), \
+ FN(get_current_uid_gid), \
+ FN(get_current_comm), \
+ FN(get_cgroup_classid), \
+ FN(skb_vlan_push), \
+ FN(skb_vlan_pop), \
+ FN(skb_get_tunnel_key), \
+ FN(skb_set_tunnel_key), \
+ FN(perf_event_read), \
+ FN(redirect), \
+ FN(get_route_realm), \
+ FN(perf_event_output), \
+ FN(skb_load_bytes), \
+ FN(get_stackid), \
+ FN(csum_diff), \
+ FN(skb_get_tunnel_opt), \
+ FN(skb_set_tunnel_opt), \
+ FN(skb_change_proto), \
+ FN(skb_change_type), \
+ FN(skb_under_cgroup), \
+ FN(get_hash_recalc), \
+ FN(get_current_task), \
+ FN(probe_write_user), \
+ FN(current_task_under_cgroup), \
+ FN(skb_change_tail), \
+ FN(skb_pull_data), \
+ FN(csum_update), \
+ FN(set_hash_invalid), \
+ FN(get_numa_node_id), \
+ FN(skb_change_head), \
+ FN(xdp_adjust_head), \
+ FN(probe_read_str), \
+ FN(get_socket_cookie), \
+ FN(get_socket_uid), \
+ FN(set_hash), \
+ FN(setsockopt), \
+ FN(skb_adjust_room), \
+ FN(redirect_map), \
+ FN(sk_redirect_map), \
+ FN(sock_map_update), \
+ FN(xdp_adjust_meta), \
+ FN(perf_event_read_value), \
+ FN(perf_prog_read_value), \
+ FN(getsockopt), \
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set), \
+ FN(msg_redirect_map), \
+ FN(msg_apply_bytes), \
+ FN(msg_cork_bytes), \
+ FN(msg_pull_data), \
+ FN(bind), \
+ FN(xdp_adjust_tail), \
+ FN(skb_get_xfrm_state), \
+ FN(get_stack), \
+ FN(skb_load_bytes_relative), \
+ FN(fib_lookup), \
+ FN(sock_hash_update), \
+ FN(msg_redirect_hash), \
+ FN(sk_redirect_hash), \
+ FN(lwt_push_encap), \
+ FN(lwt_seg6_store_bytes), \
+ FN(lwt_seg6_adjust_srh), \
+ FN(lwt_seg6_action), \
+ FN(rc_repeat), \
+ FN(rc_keydown), \
+ FN(skb_cgroup_id), \
+ FN(get_current_cgroup_id), \
+ FN(get_local_storage), \
+ FN(sk_select_reuseport), \
+ FN(skb_ancestor_cgroup_id), \
+ FN(sk_lookup_tcp), \
+ FN(sk_lookup_udp), \
+ FN(sk_release), \
+ FN(map_push_elem), \
+ FN(map_pop_elem), \
+ FN(map_peek_elem), \
+ FN(msg_push_data), \
+ FN(msg_pop_data), \
+ FN(rc_pointer_rel),
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+enum bpf_func_id {
+ __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+ __BPF_FUNC_MAX_ID,
+};
+#undef __BPF_ENUM_FN
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
+#define BPF_F_INVALIDATE_HASH (1ULL << 1)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK 0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR (1ULL << 4)
+#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
+#define BPF_F_MARK_ENFORCE (1ULL << 6)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS (1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
+
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
+#define BPF_F_SKIP_FIELD_MASK 0xffULL
+#define BPF_F_USER_STACK (1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
+#define BPF_F_FAST_STACK_CMP (1ULL << 9)
+#define BPF_F_REUSE_STACKID (1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID (1ULL << 11)
+
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
+#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+#define BPF_F_SEQ_NUMBER (1ULL << 3)
+
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+ * BPF_FUNC_perf_event_read_value flags.
+ */
+#define BPF_F_INDEX_MASK 0xffffffffULL
+#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+
+/* Current network namespace */
+#define BPF_F_CURRENT_NETNS (-1L)
+
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+ BPF_ADJ_ROOM_NET,
+};
+
+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+ BPF_HDR_START_MAC,
+ BPF_HDR_START_NET,
+};
+
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+ BPF_LWT_ENCAP_SEG6,
+ BPF_LWT_ENCAP_SEG6_INLINE
+};
+
+#define __bpf_md_ptr(type, name) \
+union { \
+ type name; \
+ __u64 :64; \
+} __attribute__((aligned(8)))
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+ __u32 vlan_proto;
+ __u32 priority;
+ __u32 ingress_ifindex;
+ __u32 ifindex;
+ __u32 tc_index;
+ __u32 cb[5];
+ __u32 hash;
+ __u32 tc_classid;
+ __u32 data;
+ __u32 data_end;
+ __u32 napi_id;
+
+ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ /* ... here. */
+
+ __u32 data_meta;
+ __bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
+ __u64 tstamp;
+ __u32 wire_len;
+};
+
+struct bpf_tunnel_key {
+ __u32 tunnel_id;
+ union {
+ __u32 remote_ipv4;
+ __u32 remote_ipv6[4];
+ };
+ __u8 tunnel_tos;
+ __u8 tunnel_ttl;
+ __u16 tunnel_ext; /* Padding, future use. */
+ __u32 tunnel_label;
+};
+
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+ __u32 reqid;
+ __u32 spi; /* Stored in network byte order */
+ __u16 family;
+ __u16 ext; /* Padding, future use. */
+ union {
+ __u32 remote_ipv4; /* Stored in network byte order */
+ __u32 remote_ipv6[4]; /* Stored in network byte order */
+ };
+};
+
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+ BPF_OK = 0,
+ /* 1 reserved */
+ BPF_DROP = 2,
+ /* 3-6 reserved */
+ BPF_REDIRECT = 7,
+ /* >127 are reserved for prog type specific return codes */
+};
+
+struct bpf_sock {
+ __u32 bound_dev_if;
+ __u32 family;
+ __u32 type;
+ __u32 protocol;
+ __u32 mark;
+ __u32 priority;
+ __u32 src_ip4; /* Allows 1,2,4-byte read.
+ * Stored in network byte order.
+ */
+ __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
+ * Stored in network byte order.
+ */
+ __u32 src_port; /* Allows 4-byte read.
+ * Stored in host byte order
+ */
+};
+
+struct bpf_sock_tuple {
+ union {
+ struct {
+ __be32 saddr;
+ __be32 daddr;
+ __be16 sport;
+ __be16 dport;
+ } ipv4;
+ struct {
+ __be32 saddr[4];
+ __be32 daddr[4];
+ __be16 sport;
+ __be16 dport;
+ } ipv6;
+ };
+};
+
+#define XDP_PACKET_HEADROOM 256
+
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
+ */
+enum xdp_action {
+ XDP_ABORTED = 0,
+ XDP_DROP,
+ XDP_PASS,
+ XDP_TX,
+ XDP_REDIRECT,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+ __u32 data;
+ __u32 data_end;
+ __u32 data_meta;
+ /* Below access go through struct xdp_rxq_info */
+ __u32 ingress_ifindex; /* rxq->dev->ifindex */
+ __u32 rx_queue_index; /* rxq->queue_index */
+};
+
+enum sk_action {
+ SK_DROP = 0,
+ SK_PASS,
+};
+
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+ __bpf_md_ptr(void *, data);
+ __bpf_md_ptr(void *, data_end);
+
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ __u32 size; /* Total size of sk_msg */
+};
+
+struct sk_reuseport_md {
+ /*
+ * Start of directly accessible data. It begins from
+ * the tcp/udp header.
+ */
+ __bpf_md_ptr(void *, data);
+ /* End of directly accessible data */
+ __bpf_md_ptr(void *, data_end);
+ /*
+ * Total length of packet (starting from the tcp/udp header).
+ * Note that the directly accessible bytes (data_end - data)
+ * could be less than this "len". Those bytes could be
+ * indirectly read by a helper "bpf_skb_load_bytes()".
+ */
+ __u32 len;
+ /*
+ * Eth protocol in the mac header (network byte order). e.g.
+ * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+ */
+ __u32 eth_protocol;
+ __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+ __u32 bind_inany; /* Is sock bound to an INANY address? */
+ __u32 hash; /* A hash of the packet 4 tuples */
+};
+
+#define BPF_TAG_SIZE 8
+
+struct bpf_prog_info {
+ __u32 type;
+ __u32 id;
+ __u8 tag[BPF_TAG_SIZE];
+ __u32 jited_prog_len;
+ __u32 xlated_prog_len;
+ __aligned_u64 jited_prog_insns;
+ __aligned_u64 xlated_prog_insns;
+ __u64 load_time; /* ns since boottime */
+ __u32 created_by_uid;
+ __u32 nr_map_ids;
+ __aligned_u64 map_ids;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u32 gpl_compatible:1;
+ __u64 netns_dev;
+ __u64 netns_ino;
+ __u32 nr_jited_ksyms;
+ __u32 nr_jited_func_lens;
+ __aligned_u64 jited_ksyms;
+ __aligned_u64 jited_func_lens;
+ __u32 btf_id;
+ __u32 func_info_rec_size;
+ __aligned_u64 func_info;
+ __u32 nr_func_info;
+ __u32 nr_line_info;
+ __aligned_u64 line_info;
+ __aligned_u64 jited_line_info;
+ __u32 nr_jited_line_info;
+ __u32 line_info_rec_size;
+ __u32 jited_line_info_rec_size;
+ __u32 nr_prog_tags;
+ __aligned_u64 prog_tags;
+} __attribute__((aligned(8)));
+
+struct bpf_map_info {
+ __u32 type;
+ __u32 id;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u32 :32;
+ __u64 netns_dev;
+ __u64 netns_ino;
+ __u32 btf_id;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+} __attribute__((aligned(8)));
+
+struct bpf_btf_info {
+ __aligned_u64 btf;
+ __u32 btf_size;
+ __u32 id;
+} __attribute__((aligned(8)));
+
+/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+ * by user and intended to be used by socket (e.g. to bind to, depends on
+ * attach attach type).
+ */
+struct bpf_sock_addr {
+ __u32 user_family; /* Allows 4-byte read, but no write. */
+ __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_port; /* Allows 4-byte read and write.
+ * Stored in network byte order
+ */
+ __u32 family; /* Allows 4-byte read, but no write */
+ __u32 type; /* Allows 4-byte read, but no write */
+ __u32 protocol; /* Allows 4-byte read, but no write */
+ __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+};
+
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+ __u32 op;
+ union {
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
+ };
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ __u32 is_fullsock; /* Some TCP fields are only valid if
+ * there is a full socket. If not, the
+ * fields read as zero.
+ */
+ __u32 snd_cwnd;
+ __u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
+};
+
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+ BPF_SOCK_OPS_VOID,
+ BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or
+ * -1 if default value should be used
+ */
+ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized
+ * window (in packets) or -1 if default
+ * value should be used
+ */
+ BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an
+ * active connection is initialized
+ */
+ BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an
+ * active connection is
+ * established
+ */
+ BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a
+ * passive connection is
+ * established
+ */
+ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
+ * needs ECN
+ */
+ BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is
+ * based on the path and may be
+ * dependent on the congestion control
+ * algorithm. In general it indicates
+ * a congestion threshold. RTTs above
+ * this indicate congestion
+ */
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+ BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after
+ * socket transition to LISTEN state.
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
+};
+
+#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+
+struct bpf_perf_event_value {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+};
+
+#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
+#define BPF_DEVCG_ACC_READ (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+ /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+ __u32 access_type;
+ __u32 major;
+ __u32 minor;
+};
+
+struct bpf_raw_tracepoint_args {
+ __u64 args[0];
+};
+
+/* DIRECT: Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT: Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
+
+enum {
+ BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
+ BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */
+ BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */
+ BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */
+ BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */
+ BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
+ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
+ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+};
+
+struct bpf_fib_lookup {
+ /* input: network family for lookup (AF_INET, AF_INET6)
+ * output: network family of egress nexthop
+ */
+ __u8 family;
+
+ /* set if lookup is to consider L4 data - e.g., FIB rules */
+ __u8 l4_protocol;
+ __be16 sport;
+ __be16 dport;
+
+ /* total length of packet from network header - used for MTU check */
+ __u16 tot_len;
+
+ /* input: L3 device index for lookup
+ * output: device index from FIB lookup
+ */
+ __u32 ifindex;
+
+ union {
+ /* inputs to lookup */
+ __u8 tos; /* AF_INET */
+ __be32 flowinfo; /* AF_INET6, flow_label + priority */
+
+ /* output: metric of fib result (IPv4/IPv6 only) */
+ __u32 rt_metric;
+ };
+
+ union {
+ __be32 ipv4_src;
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ };
+
+ /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
+ * network header. output: bpf_fib_lookup sets to gateway address
+ * if FIB lookup returns gateway route
+ */
+ union {
+ __be32 ipv4_dst;
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+};
+
+enum bpf_task_fd_type {
+ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_UPROBE, /* filename + offset */
+ BPF_FD_TYPE_URETPROBE, /* filename + offset */
+};
+
+struct bpf_flow_keys {
+ __u16 nhoff;
+ __u16 thoff;
+ __u16 addr_proto; /* ETH_P_* of valid addrs */
+ __u8 is_frag;
+ __u8 is_first_frag;
+ __u8 is_encap;
+ __u8 ip_proto;
+ __be16 n_proto;
+ __be16 sport;
+ __be16 dport;
+ union {
+ struct {
+ __be32 ipv4_src;
+ __be32 ipv4_dst;
+ };
+ struct {
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+ };
+};
+
+struct bpf_func_info {
+ __u32 insn_off;
+ __u32 type_id;
+};
+
+#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10)
+#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff)
+
+struct bpf_line_info {
+ __u32 insn_off;
+ __u32 file_name_off;
+ __u32 line_off;
+ __u32 line_col;
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/src/shared/linux/bpf_common.h b/src/shared/linux/bpf_common.h
new file mode 100644
index 0000000..ee97668
--- /dev/null
+++ b/src/shared/linux/bpf_common.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_BPF_COMMON_H__
+#define _UAPI__LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00 /* 32-bit */
+#define BPF_H 0x08 /* 16-bit */
+#define BPF_B 0x10 /* 8-bit */
+/* eBPF BPF_DW 0x18 64-bit */
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_MOD 0x90
+#define BPF_XOR 0xa0
+
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* _UAPI__LINUX_BPF_COMMON_H__ */
diff --git a/src/shared/linux/bpf_insn.h b/src/shared/linux/bpf_insn.h
new file mode 100644
index 0000000..d8d9fb2
--- /dev/null
+++ b/src/shared/linux/bpf_insn.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* eBPF instruction mini library */
+#ifndef __BPF_INSN_H
+#define __BPF_INSN_H
+
+struct bpf_insn;
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM) \
+ BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = (__u32) (IMM) }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD 1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+#define BPF_JMP_A(OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_JA, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = CODE, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+#endif
diff --git a/src/shared/linux/dm-ioctl.h b/src/shared/linux/dm-ioctl.h
new file mode 100644
index 0000000..b3aeec7
--- /dev/null
+++ b/src/shared/linux/dm-ioctl.h
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: LGPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
+ * Copyright (C) 2004 - 2009 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DM_IOCTL_V4_H
+#define _LINUX_DM_IOCTL_V4_H
+
+#include <linux/types.h>
+
+#define DM_DIR "mapper" /* Slashes not supported */
+#define DM_CONTROL_NODE "control"
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
+#define DM_UUID_LEN 129
+
+/*
+ * A traditional ioctl interface for the device mapper.
+ *
+ * Each device can have two tables associated with it, an
+ * 'active' table which is the one currently used by io passing
+ * through the device, and an 'inactive' one which is a table
+ * that is being prepared as a replacement for the 'active' one.
+ *
+ * DM_VERSION:
+ * Just get the version information for the ioctl interface.
+ *
+ * DM_REMOVE_ALL:
+ * Remove all dm devices, destroy all tables. Only really used
+ * for debug.
+ *
+ * DM_LIST_DEVICES:
+ * Get a list of all the dm device names.
+ *
+ * DM_DEV_CREATE:
+ * Create a new device, neither the 'active' or 'inactive' table
+ * slots will be filled. The device will be in suspended state
+ * after creation, however any io to the device will get errored
+ * since it will be out-of-bounds.
+ *
+ * DM_DEV_REMOVE:
+ * Remove a device, destroy any tables.
+ *
+ * DM_DEV_RENAME:
+ * Rename a device or set its uuid if none was previously supplied.
+ *
+ * DM_SUSPEND:
+ * This performs both suspend and resume, depending which flag is
+ * passed in.
+ * Suspend: This command will not return until all pending io to
+ * the device has completed. Further io will be deferred until
+ * the device is resumed.
+ * Resume: It is no longer an error to issue this command on an
+ * unsuspended device. If a table is present in the 'inactive'
+ * slot, it will be moved to the active slot, then the old table
+ * from the active slot will be _destroyed_. Finally the device
+ * is resumed.
+ *
+ * DM_DEV_STATUS:
+ * Retrieves the status for the table in the 'active' slot.
+ *
+ * DM_DEV_WAIT:
+ * Wait for a significant event to occur to the device. This
+ * could either be caused by an event triggered by one of the
+ * targets of the table in the 'active' slot, or a table change.
+ *
+ * DM_TABLE_LOAD:
+ * Load a table into the 'inactive' slot for the device. The
+ * device does _not_ need to be suspended prior to this command.
+ *
+ * DM_TABLE_CLEAR:
+ * Destroy any table in the 'inactive' slot (ie. abort).
+ *
+ * DM_TABLE_DEPS:
+ * Return a set of device dependencies for the 'active' table.
+ *
+ * DM_TABLE_STATUS:
+ * Return the targets status for the 'active' table.
+ *
+ * DM_TARGET_MSG:
+ * Pass a message string to the target at a specific offset of a device.
+ *
+ * DM_DEV_SET_GEOMETRY:
+ * Set the geometry of a device by passing in a string in this format:
+ *
+ * "cylinders heads sectors_per_track start_sector"
+ *
+ * Beware that CHS geometry is nearly obsolete and only provided
+ * for compatibility with dm devices that can be booted by a PC
+ * BIOS. See struct hd_geometry for range limits. Also note that
+ * the geometry is erased if the device size changes.
+ */
+
+/*
+ * All ioctl arguments consist of a single chunk of memory, with
+ * this structure at the start. If a uuid is specified any
+ * lookup (eg. for a DM_INFO) will be done on that, *not* the
+ * name.
+ */
+struct dm_ioctl {
+ /*
+ * The version number is made up of three parts:
+ * major - no backward or forward compatibility,
+ * minor - only backwards compatible,
+ * patch - both backwards and forwards compatible.
+ *
+ * All clients of the ioctl interface should fill in the
+ * version number of the interface that they were
+ * compiled with.
+ *
+ * All recognised ioctl commands (ie. those that don't
+ * return -ENOTTY) fill out this field, even if the
+ * command failed.
+ */
+ __u32 version[3]; /* in/out */
+ __u32 data_size; /* total size of data passed in
+ * including this struct */
+
+ __u32 data_start; /* offset to start of data
+ * relative to start of this struct */
+
+ __u32 target_count; /* in/out */
+ __s32 open_count; /* out */
+ __u32 flags; /* in/out */
+
+ /*
+ * event_nr holds either the event number (input and output) or the
+ * udev cookie value (input only).
+ * The DM_DEV_WAIT ioctl takes an event number as input.
+ * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
+ * use the field as a cookie to return in the DM_COOKIE
+ * variable with the uevents they issue.
+ * For output, the ioctls return the event number, not the cookie.
+ */
+ __u32 event_nr; /* in/out */
+ __u32 padding;
+
+ __u64 dev; /* in/out */
+
+ char name[DM_NAME_LEN]; /* device name */
+ char uuid[DM_UUID_LEN]; /* unique identifier for
+ * the block device */
+ char data[7]; /* padding or data */
+};
+
+/*
+ * Used to specify tables. These structures appear after the
+ * dm_ioctl.
+ */
+struct dm_target_spec {
+ __u64 sector_start;
+ __u64 length;
+ __s32 status; /* used when reading from kernel only */
+
+ /*
+ * Location of the next dm_target_spec.
+ * - When specifying targets on a DM_TABLE_LOAD command, this value is
+ * the number of bytes from the start of the "current" dm_target_spec
+ * to the start of the "next" dm_target_spec.
+ * - When retrieving targets on a DM_TABLE_STATUS command, this value
+ * is the number of bytes from the start of the first dm_target_spec
+ * (that follows the dm_ioctl struct) to the start of the "next"
+ * dm_target_spec.
+ */
+ __u32 next;
+
+ char target_type[DM_MAX_TYPE_NAME];
+
+ /*
+ * Parameter string starts immediately after this object.
+ * Be careful to add padding after string to ensure correct
+ * alignment of subsequent dm_target_spec.
+ */
+};
+
+/*
+ * Used to retrieve the target dependencies.
+ */
+struct dm_target_deps {
+ __u32 count; /* Array size */
+ __u32 padding; /* unused */
+ __u64 dev[0]; /* out */
+};
+
+/*
+ * Used to get a list of all dm devices.
+ */
+struct dm_name_list {
+ __u64 dev;
+ __u32 next; /* offset to the next record from
+ the _start_ of this */
+ char name[0];
+};
+
+/*
+ * Used to retrieve the target versions
+ */
+struct dm_target_versions {
+ __u32 next;
+ __u32 version[3];
+
+ char name[0];
+};
+
+/*
+ * Used to pass message to a target
+ */
+struct dm_target_msg {
+ __u64 sector; /* Device sector */
+
+ char message[0];
+};
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to dm-ioctl.c:lookup_ioctl()
+ */
+enum {
+ /* Top level cmds */
+ DM_VERSION_CMD = 0,
+ DM_REMOVE_ALL_CMD,
+ DM_LIST_DEVICES_CMD,
+
+ /* device level cmds */
+ DM_DEV_CREATE_CMD,
+ DM_DEV_REMOVE_CMD,
+ DM_DEV_RENAME_CMD,
+ DM_DEV_SUSPEND_CMD,
+ DM_DEV_STATUS_CMD,
+ DM_DEV_WAIT_CMD,
+
+ /* Table level cmds */
+ DM_TABLE_LOAD_CMD,
+ DM_TABLE_CLEAR_CMD,
+ DM_TABLE_DEPS_CMD,
+ DM_TABLE_STATUS_CMD,
+
+ /* Added later */
+ DM_LIST_VERSIONS_CMD,
+ DM_TARGET_MSG_CMD,
+ DM_DEV_SET_GEOMETRY_CMD,
+ DM_DEV_ARM_POLL_CMD,
+};
+
+#define DM_IOCTL 0xfd
+
+#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
+#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
+
+#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
+#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
+#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
+#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
+#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
+#define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
+#define DM_DEV_ARM_POLL _IOWR(DM_IOCTL, DM_DEV_ARM_POLL_CMD, struct dm_ioctl)
+
+#define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
+#define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
+#define DM_TABLE_DEPS _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
+#define DM_TABLE_STATUS _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
+
+#define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl)
+
+#define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
+#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
+
+#define DM_VERSION_MAJOR 4
+#define DM_VERSION_MINOR 27
+#define DM_VERSION_PATCHLEVEL 0
+#define DM_VERSION_EXTRA "-ioctl (2019-01-18)"
+
+/* Status bits */
+#define DM_READONLY_FLAG (1 << 0) /* In/Out */
+#define DM_SUSPEND_FLAG (1 << 1) /* In/Out */
+#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */
+
+/*
+ * Flag passed into ioctl STATUS command to get table information
+ * rather than current status.
+ */
+#define DM_STATUS_TABLE_FLAG (1 << 4) /* In */
+
+/*
+ * Flags that indicate whether a table is present in either of
+ * the two table slots that a device has.
+ */
+#define DM_ACTIVE_PRESENT_FLAG (1 << 5) /* Out */
+#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
+
+/*
+ * Indicates that the buffer passed in wasn't big enough for the
+ * results.
+ */
+#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */
+
+/*
+ * This flag is now ignored.
+ */
+#define DM_SKIP_BDGET_FLAG (1 << 9) /* In */
+
+/*
+ * Set this to avoid attempting to freeze any filesystem when suspending.
+ */
+#define DM_SKIP_LOCKFS_FLAG (1 << 10) /* In */
+
+/*
+ * Set this to suspend without flushing queued ios.
+ * Also disables flushing uncommitted changes in the thin target before
+ * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT.
+ */
+#define DM_NOFLUSH_FLAG (1 << 11) /* In */
+
+/*
+ * If set, any table information returned will relate to the inactive
+ * table instead of the live one. Always check DM_INACTIVE_PRESENT_FLAG
+ * is set before using the data returned.
+ */
+#define DM_QUERY_INACTIVE_TABLE_FLAG (1 << 12) /* In */
+
+/*
+ * If set, a uevent was generated for which the caller may need to wait.
+ */
+#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */
+
+/*
+ * If set, rename changes the uuid not the name. Only permitted
+ * if no uuid was previously supplied: an existing uuid cannot be changed.
+ */
+#define DM_UUID_FLAG (1 << 14) /* In */
+
+/*
+ * If set, all buffers are wiped after use. Use when sending
+ * or requesting sensitive data such as an encryption key.
+ */
+#define DM_SECURE_DATA_FLAG (1 << 15) /* In */
+
+/*
+ * If set, a message generated output data.
+ */
+#define DM_DATA_OUT_FLAG (1 << 16) /* Out */
+
+/*
+ * If set with DM_DEV_REMOVE or DM_REMOVE_ALL this indicates that if
+ * the device cannot be removed immediately because it is still in use
+ * it should instead be scheduled for removal when it gets closed.
+ *
+ * On return from DM_DEV_REMOVE, DM_DEV_STATUS or other ioctls, this
+ * flag indicates that the device is scheduled to be removed when it
+ * gets closed.
+ */
+#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */
+
+/*
+ * If set, the device is suspended internally.
+ */
+#define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */
+
+#endif /* _LINUX_DM_IOCTL_H */
diff --git a/src/shared/linux/ethtool.h b/src/shared/linux/ethtool.h
new file mode 100644
index 0000000..b06c630
--- /dev/null
+++ b/src/shared/linux/ethtool.h
@@ -0,0 +1,2021 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ethtool.h: Defines for Linux ethtool.
+ *
+ * Copyright (C) 1998 David S. Miller (davem@redhat.com)
+ * Copyright 2001 Jeff Garzik <jgarzik@pobox.com>
+ * Portions Copyright 2001 Sun Microsystems (thockin@sun.com)
+ * Portions Copyright 2002 Intel (eli.kupermann@intel.com,
+ * christopher.leech@intel.com,
+ * scott.feldman@intel.com)
+ * Portions Copyright (C) Sun Microsystems 2008
+ */
+
+#ifndef _UAPI_LINUX_ETHTOOL_H
+#define _UAPI_LINUX_ETHTOOL_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#ifndef __KERNEL__
+#include <limits.h> /* for INT_MAX */
+#endif
+
+#ifndef __KERNEL_DIV_ROUND_UP
+#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#endif
+
+/* All structures exposed to userland should be defined such that they
+ * have the same layout for 32-bit and 64-bit userland.
+ */
+
+/**
+ * struct ethtool_cmd - DEPRECATED, link control and status
+ * This structure is DEPRECATED, please use struct ethtool_link_settings.
+ * @cmd: Command number = %ETHTOOL_GSET or %ETHTOOL_SSET
+ * @supported: Bitmask of %SUPPORTED_* flags for the link modes,
+ * physical connectors and other link features for which the
+ * interface supports autonegotiation or auto-detection.
+ * Read-only.
+ * @advertising: Bitmask of %ADVERTISED_* flags for the link modes,
+ * physical connectors and other link features that are
+ * advertised through autonegotiation or enabled for
+ * auto-detection.
+ * @speed: Low bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN
+ * @duplex: Duplex mode; one of %DUPLEX_*
+ * @port: Physical connector type; one of %PORT_*
+ * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not
+ * applicable. For clause 45 PHYs this is the PRTAD.
+ * @transceiver: Historically used to distinguish different possible
+ * PHY types, but not in a consistent way. Deprecated.
+ * @autoneg: Enable/disable autonegotiation and auto-detection;
+ * either %AUTONEG_DISABLE or %AUTONEG_ENABLE
+ * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO
+ * protocols supported by the interface; 0 if unknown.
+ * Read-only.
+ * @maxtxpkt: Historically used to report TX IRQ coalescing; now
+ * obsoleted by &struct ethtool_coalesce. Read-only; deprecated.
+ * @maxrxpkt: Historically used to report RX IRQ coalescing; now
+ * obsoleted by &struct ethtool_coalesce. Read-only; deprecated.
+ * @speed_hi: High bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN
+ * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of
+ * %ETH_TP_MDI_*. If the status is unknown or not applicable, the
+ * value will be %ETH_TP_MDI_INVALID. Read-only.
+ * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of
+ * %ETH_TP_MDI_*. If MDI(-X) control is not implemented, reads
+ * yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected.
+ * When written successfully, the link should be renegotiated if
+ * necessary.
+ * @lp_advertising: Bitmask of %ADVERTISED_* flags for the link modes
+ * and other link features that the link partner advertised
+ * through autonegotiation; 0 if unknown or not applicable.
+ * Read-only.
+ *
+ * The link speed in Mbps is split between @speed and @speed_hi. Use
+ * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to
+ * access it.
+ *
+ * If autonegotiation is disabled, the speed and @duplex represent the
+ * fixed link mode and are writable if the driver supports multiple
+ * link modes. If it is enabled then they are read-only; if the link
+ * is up they represent the negotiated link mode; if the link is down,
+ * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and
+ * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode.
+ *
+ * Some hardware interfaces may have multiple PHYs and/or physical
+ * connectors fitted or do not allow the driver to detect which are
+ * fitted. For these interfaces @port and/or @phy_address may be
+ * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE.
+ * Otherwise, attempts to write different values may be ignored or
+ * rejected.
+ *
+ * Users should assume that all fields not marked read-only are
+ * writable and subject to validation by the driver. They should use
+ * %ETHTOOL_GSET to get the current values before making specific
+ * changes and then applying them with %ETHTOOL_SSET.
+ *
+ * Deprecated fields should be ignored by both users and drivers.
+ */
+struct ethtool_cmd {
+ __u32 cmd;
+ __u32 supported;
+ __u32 advertising;
+ __u16 speed;
+ __u8 duplex;
+ __u8 port;
+ __u8 phy_address;
+ __u8 transceiver;
+ __u8 autoneg;
+ __u8 mdio_support;
+ __u32 maxtxpkt;
+ __u32 maxrxpkt;
+ __u16 speed_hi;
+ __u8 eth_tp_mdix;
+ __u8 eth_tp_mdix_ctrl;
+ __u32 lp_advertising;
+ __u32 reserved[2];
+};
+
+static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep,
+ __u32 speed)
+{
+ ep->speed = (__u16)(speed & 0xFFFF);
+ ep->speed_hi = (__u16)(speed >> 16);
+}
+
+static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep)
+{
+ return ((__u32) ep->speed_hi << 16) | (__u32) ep->speed;
+}
+
+/* Device supports clause 22 register access to PHY or peripherals
+ * using the interface defined in <linux/mii.h>. This should not be
+ * set if there are known to be no such peripherals present or if
+ * the driver only emulates clause 22 registers for compatibility.
+ */
+#define ETH_MDIO_SUPPORTS_C22 1
+
+/* Device supports clause 45 register access to PHY or peripherals
+ * using the interface defined in <linux/mii.h> and <linux/mdio.h>.
+ * This should not be set if there are known to be no such peripherals
+ * present.
+ */
+#define ETH_MDIO_SUPPORTS_C45 2
+
+#define ETHTOOL_FWVERS_LEN 32
+#define ETHTOOL_BUSINFO_LEN 32
+#define ETHTOOL_EROMVERS_LEN 32
+
+/**
+ * struct ethtool_drvinfo - general driver and device information
+ * @cmd: Command number = %ETHTOOL_GDRVINFO
+ * @driver: Driver short name. This should normally match the name
+ * in its bus driver structure (e.g. pci_driver::name). Must
+ * not be an empty string.
+ * @version: Driver version string; may be an empty string
+ * @fw_version: Firmware version string; may be an empty string
+ * @erom_version: Expansion ROM version string; may be an empty string
+ * @bus_info: Device bus address. This should match the dev_name()
+ * string for the underlying bus device, if there is one. May be
+ * an empty string.
+ * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
+ * %ETHTOOL_SPFLAGS commands; also the number of strings in the
+ * %ETH_SS_PRIV_FLAGS set
+ * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS
+ * command; also the number of strings in the %ETH_SS_STATS set
+ * @testinfo_len: Number of results returned by the %ETHTOOL_TEST
+ * command; also the number of strings in the %ETH_SS_TEST set
+ * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM
+ * and %ETHTOOL_SEEPROM commands, in bytes
+ * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS
+ * command, in bytes
+ *
+ * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
+ * strings in any string set (from Linux 2.6.34).
+ *
+ * Drivers should set at most @driver, @version, @fw_version and
+ * @bus_info in their get_drvinfo() implementation. The ethtool
+ * core fills in the other fields using other driver operations.
+ */
+struct ethtool_drvinfo {
+ __u32 cmd;
+ char driver[32];
+ char version[32];
+ char fw_version[ETHTOOL_FWVERS_LEN];
+ char bus_info[ETHTOOL_BUSINFO_LEN];
+ char erom_version[ETHTOOL_EROMVERS_LEN];
+ char reserved2[12];
+ __u32 n_priv_flags;
+ __u32 n_stats;
+ __u32 testinfo_len;
+ __u32 eedump_len;
+ __u32 regdump_len;
+};
+
+#define SOPASS_MAX 6
+
+/**
+ * struct ethtool_wolinfo - Wake-On-Lan configuration
+ * @cmd: Command number = %ETHTOOL_GWOL or %ETHTOOL_SWOL
+ * @supported: Bitmask of %WAKE_* flags for supported Wake-On-Lan modes.
+ * Read-only.
+ * @wolopts: Bitmask of %WAKE_* flags for enabled Wake-On-Lan modes.
+ * @sopass: SecureOn(tm) password; meaningful only if %WAKE_MAGICSECURE
+ * is set in @wolopts.
+ */
+struct ethtool_wolinfo {
+ __u32 cmd;
+ __u32 supported;
+ __u32 wolopts;
+ __u8 sopass[SOPASS_MAX];
+};
+
+/* for passing single values */
+struct ethtool_value {
+ __u32 cmd;
+ __u32 data;
+};
+
+#define PFC_STORM_PREVENTION_AUTO 0xffff
+#define PFC_STORM_PREVENTION_DISABLE 0
+
+enum tunable_id {
+ ETHTOOL_ID_UNSPEC,
+ ETHTOOL_RX_COPYBREAK,
+ ETHTOOL_TX_COPYBREAK,
+ ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
+ /*
+ * Add your fresh new tunable attribute above and remember to update
+ * tunable_strings[] in net/core/ethtool.c
+ */
+ __ETHTOOL_TUNABLE_COUNT,
+};
+
+enum tunable_type_id {
+ ETHTOOL_TUNABLE_UNSPEC,
+ ETHTOOL_TUNABLE_U8,
+ ETHTOOL_TUNABLE_U16,
+ ETHTOOL_TUNABLE_U32,
+ ETHTOOL_TUNABLE_U64,
+ ETHTOOL_TUNABLE_STRING,
+ ETHTOOL_TUNABLE_S8,
+ ETHTOOL_TUNABLE_S16,
+ ETHTOOL_TUNABLE_S32,
+ ETHTOOL_TUNABLE_S64,
+};
+
+struct ethtool_tunable {
+ __u32 cmd;
+ __u32 id;
+ __u32 type_id;
+ __u32 len;
+ void *data[0];
+};
+
+#define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff
+#define DOWNSHIFT_DEV_DISABLE 0
+
+/* Time in msecs after which link is reported as down
+ * 0 = lowest time supported by the PHY
+ * 0xff = off, link down detection according to standard
+ */
+#define ETHTOOL_PHY_FAST_LINK_DOWN_ON 0
+#define ETHTOOL_PHY_FAST_LINK_DOWN_OFF 0xff
+
+/* Energy Detect Power Down (EDPD) is a feature supported by some PHYs, where
+ * the PHY's RX & TX blocks are put into a low-power mode when there is no
+ * link detected (typically cable is un-plugged). For RX, only a minimal
+ * link-detection is available, and for TX the PHY wakes up to send link pulses
+ * to avoid any lock-ups in case the peer PHY may also be running in EDPD mode.
+ *
+ * Some PHYs may support configuration of the wake-up interval for TX pulses,
+ * and some PHYs may support only disabling TX pulses entirely. For the latter
+ * a special value is required (ETHTOOL_PHY_EDPD_NO_TX) so that this can be
+ * configured from userspace (should the user want it).
+ *
+ * The interval units for TX wake-up are in milliseconds, since this should
+ * cover a reasonable range of intervals:
+ * - from 1 millisecond, which does not sound like much of a power-saver
+ * - to ~65 seconds which is quite a lot to wait for a link to come up when
+ * plugging a cable
+ */
+#define ETHTOOL_PHY_EDPD_DFLT_TX_MSECS 0xffff
+#define ETHTOOL_PHY_EDPD_NO_TX 0xfffe
+#define ETHTOOL_PHY_EDPD_DISABLE 0
+
+enum phy_tunable_id {
+ ETHTOOL_PHY_ID_UNSPEC,
+ ETHTOOL_PHY_DOWNSHIFT,
+ ETHTOOL_PHY_FAST_LINK_DOWN,
+ ETHTOOL_PHY_EDPD,
+ /*
+ * Add your fresh new phy tunable attribute above and remember to update
+ * phy_tunable_strings[] in net/core/ethtool.c
+ */
+ __ETHTOOL_PHY_TUNABLE_COUNT,
+};
+
+/**
+ * struct ethtool_regs - hardware register dump
+ * @cmd: Command number = %ETHTOOL_GREGS
+ * @version: Dump format version. This is driver-specific and may
+ * distinguish different chips/revisions. Drivers must use new
+ * version numbers whenever the dump format changes in an
+ * incompatible way.
+ * @len: On entry, the real length of @data. On return, the number of
+ * bytes used.
+ * @data: Buffer for the register dump
+ *
+ * Users should use %ETHTOOL_GDRVINFO to find the maximum length of
+ * a register dump for the interface. They must allocate the buffer
+ * immediately following this structure.
+ */
+struct ethtool_regs {
+ __u32 cmd;
+ __u32 version;
+ __u32 len;
+ __u8 data[0];
+};
+
+/**
+ * struct ethtool_eeprom - EEPROM dump
+ * @cmd: Command number = %ETHTOOL_GEEPROM, %ETHTOOL_GMODULEEEPROM or
+ * %ETHTOOL_SEEPROM
+ * @magic: A 'magic cookie' value to guard against accidental changes.
+ * The value passed in to %ETHTOOL_SEEPROM must match the value
+ * returned by %ETHTOOL_GEEPROM for the same device. This is
+ * unused when @cmd is %ETHTOOL_GMODULEEEPROM.
+ * @offset: Offset within the EEPROM to begin reading/writing, in bytes
+ * @len: On entry, number of bytes to read/write. On successful
+ * return, number of bytes actually read/written. In case of
+ * error, this may indicate at what point the error occurred.
+ * @data: Buffer to read/write from
+ *
+ * Users may use %ETHTOOL_GDRVINFO or %ETHTOOL_GMODULEINFO to find
+ * the length of an on-board or module EEPROM, respectively. They
+ * must allocate the buffer immediately following this structure.
+ */
+struct ethtool_eeprom {
+ __u32 cmd;
+ __u32 magic;
+ __u32 offset;
+ __u32 len;
+ __u8 data[0];
+};
+
+/**
+ * struct ethtool_eee - Energy Efficient Ethernet information
+ * @cmd: ETHTOOL_{G,S}EEE
+ * @supported: Mask of %SUPPORTED_* flags for the speed/duplex combinations
+ * for which there is EEE support.
+ * @advertised: Mask of %ADVERTISED_* flags for the speed/duplex combinations
+ * advertised as eee capable.
+ * @lp_advertised: Mask of %ADVERTISED_* flags for the speed/duplex
+ * combinations advertised by the link partner as eee capable.
+ * @eee_active: Result of the eee auto negotiation.
+ * @eee_enabled: EEE configured mode (enabled/disabled).
+ * @tx_lpi_enabled: Whether the interface should assert its tx lpi, given
+ * that eee was negotiated.
+ * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting
+ * its tx lpi (after reaching 'idle' state). Effective only when eee
+ * was negotiated and tx_lpi_enabled was set.
+ */
+struct ethtool_eee {
+ __u32 cmd;
+ __u32 supported;
+ __u32 advertised;
+ __u32 lp_advertised;
+ __u32 eee_active;
+ __u32 eee_enabled;
+ __u32 tx_lpi_enabled;
+ __u32 tx_lpi_timer;
+ __u32 reserved[2];
+};
+
+/**
+ * struct ethtool_modinfo - plugin module eeprom information
+ * @cmd: %ETHTOOL_GMODULEINFO
+ * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx
+ * @eeprom_len: Length of the eeprom
+ *
+ * This structure is used to return the information to
+ * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM.
+ * The type code indicates the eeprom data format
+ */
+struct ethtool_modinfo {
+ __u32 cmd;
+ __u32 type;
+ __u32 eeprom_len;
+ __u32 reserved[8];
+};
+
+/**
+ * struct ethtool_coalesce - coalescing parameters for IRQs and stats updates
+ * @cmd: ETHTOOL_{G,S}COALESCE
+ * @rx_coalesce_usecs: How many usecs to delay an RX interrupt after
+ * a packet arrives.
+ * @rx_max_coalesced_frames: Maximum number of packets to receive
+ * before an RX interrupt.
+ * @rx_coalesce_usecs_irq: Same as @rx_coalesce_usecs, except that
+ * this value applies while an IRQ is being serviced by the host.
+ * @rx_max_coalesced_frames_irq: Same as @rx_max_coalesced_frames,
+ * except that this value applies while an IRQ is being serviced
+ * by the host.
+ * @tx_coalesce_usecs: How many usecs to delay a TX interrupt after
+ * a packet is sent.
+ * @tx_max_coalesced_frames: Maximum number of packets to be sent
+ * before a TX interrupt.
+ * @tx_coalesce_usecs_irq: Same as @tx_coalesce_usecs, except that
+ * this value applies while an IRQ is being serviced by the host.
+ * @tx_max_coalesced_frames_irq: Same as @tx_max_coalesced_frames,
+ * except that this value applies while an IRQ is being serviced
+ * by the host.
+ * @stats_block_coalesce_usecs: How many usecs to delay in-memory
+ * statistics block updates. Some drivers do not have an
+ * in-memory statistic block, and in such cases this value is
+ * ignored. This value must not be zero.
+ * @use_adaptive_rx_coalesce: Enable adaptive RX coalescing.
+ * @use_adaptive_tx_coalesce: Enable adaptive TX coalescing.
+ * @pkt_rate_low: Threshold for low packet rate (packets per second).
+ * @rx_coalesce_usecs_low: How many usecs to delay an RX interrupt after
+ * a packet arrives, when the packet rate is below @pkt_rate_low.
+ * @rx_max_coalesced_frames_low: Maximum number of packets to be received
+ * before an RX interrupt, when the packet rate is below @pkt_rate_low.
+ * @tx_coalesce_usecs_low: How many usecs to delay a TX interrupt after
+ * a packet is sent, when the packet rate is below @pkt_rate_low.
+ * @tx_max_coalesced_frames_low: Maximum nuumber of packets to be sent before
+ * a TX interrupt, when the packet rate is below @pkt_rate_low.
+ * @pkt_rate_high: Threshold for high packet rate (packets per second).
+ * @rx_coalesce_usecs_high: How many usecs to delay an RX interrupt after
+ * a packet arrives, when the packet rate is above @pkt_rate_high.
+ * @rx_max_coalesced_frames_high: Maximum number of packets to be received
+ * before an RX interrupt, when the packet rate is above @pkt_rate_high.
+ * @tx_coalesce_usecs_high: How many usecs to delay a TX interrupt after
+ * a packet is sent, when the packet rate is above @pkt_rate_high.
+ * @tx_max_coalesced_frames_high: Maximum number of packets to be sent before
+ * a TX interrupt, when the packet rate is above @pkt_rate_high.
+ * @rate_sample_interval: How often to do adaptive coalescing packet rate
+ * sampling, measured in seconds. Must not be zero.
+ *
+ * Each pair of (usecs, max_frames) fields specifies that interrupts
+ * should be coalesced until
+ * (usecs > 0 && time_since_first_completion >= usecs) ||
+ * (max_frames > 0 && completed_frames >= max_frames)
+ *
+ * It is illegal to set both usecs and max_frames to zero as this
+ * would cause interrupts to never be generated. To disable
+ * coalescing, set usecs = 0 and max_frames = 1.
+ *
+ * Some implementations ignore the value of max_frames and use the
+ * condition time_since_first_completion >= usecs
+ *
+ * This is deprecated. Drivers for hardware that does not support
+ * counting completions should validate that max_frames == !rx_usecs.
+ *
+ * Adaptive RX/TX coalescing is an algorithm implemented by some
+ * drivers to improve latency under low packet rates and improve
+ * throughput under high packet rates. Some drivers only implement
+ * one of RX or TX adaptive coalescing. Anything not implemented by
+ * the driver causes these values to be silently ignored.
+ *
+ * When the packet rate is below @pkt_rate_high but above
+ * @pkt_rate_low (both measured in packets per second) the
+ * normal {rx,tx}_* coalescing parameters are used.
+ */
+struct ethtool_coalesce {
+ __u32 cmd;
+ __u32 rx_coalesce_usecs;
+ __u32 rx_max_coalesced_frames;
+ __u32 rx_coalesce_usecs_irq;
+ __u32 rx_max_coalesced_frames_irq;
+ __u32 tx_coalesce_usecs;
+ __u32 tx_max_coalesced_frames;
+ __u32 tx_coalesce_usecs_irq;
+ __u32 tx_max_coalesced_frames_irq;
+ __u32 stats_block_coalesce_usecs;
+ __u32 use_adaptive_rx_coalesce;
+ __u32 use_adaptive_tx_coalesce;
+ __u32 pkt_rate_low;
+ __u32 rx_coalesce_usecs_low;
+ __u32 rx_max_coalesced_frames_low;
+ __u32 tx_coalesce_usecs_low;
+ __u32 tx_max_coalesced_frames_low;
+ __u32 pkt_rate_high;
+ __u32 rx_coalesce_usecs_high;
+ __u32 rx_max_coalesced_frames_high;
+ __u32 tx_coalesce_usecs_high;
+ __u32 tx_max_coalesced_frames_high;
+ __u32 rate_sample_interval;
+};
+
+/**
+ * struct ethtool_ringparam - RX/TX ring parameters
+ * @cmd: Command number = %ETHTOOL_GRINGPARAM or %ETHTOOL_SRINGPARAM
+ * @rx_max_pending: Maximum supported number of pending entries per
+ * RX ring. Read-only.
+ * @rx_mini_max_pending: Maximum supported number of pending entries
+ * per RX mini ring. Read-only.
+ * @rx_jumbo_max_pending: Maximum supported number of pending entries
+ * per RX jumbo ring. Read-only.
+ * @tx_max_pending: Maximum supported number of pending entries per
+ * TX ring. Read-only.
+ * @rx_pending: Current maximum number of pending entries per RX ring
+ * @rx_mini_pending: Current maximum number of pending entries per RX
+ * mini ring
+ * @rx_jumbo_pending: Current maximum number of pending entries per RX
+ * jumbo ring
+ * @tx_pending: Current maximum supported number of pending entries
+ * per TX ring
+ *
+ * If the interface does not have separate RX mini and/or jumbo rings,
+ * @rx_mini_max_pending and/or @rx_jumbo_max_pending will be 0.
+ *
+ * There may also be driver-dependent minimum values for the number
+ * of entries per ring.
+ */
+struct ethtool_ringparam {
+ __u32 cmd;
+ __u32 rx_max_pending;
+ __u32 rx_mini_max_pending;
+ __u32 rx_jumbo_max_pending;
+ __u32 tx_max_pending;
+ __u32 rx_pending;
+ __u32 rx_mini_pending;
+ __u32 rx_jumbo_pending;
+ __u32 tx_pending;
+};
+
+/**
+ * struct ethtool_channels - configuring number of network channel
+ * @cmd: ETHTOOL_{G,S}CHANNELS
+ * @max_rx: Read only. Maximum number of receive channel the driver support.
+ * @max_tx: Read only. Maximum number of transmit channel the driver support.
+ * @max_other: Read only. Maximum number of other channel the driver support.
+ * @max_combined: Read only. Maximum number of combined channel the driver
+ * support. Set of queues RX, TX or other.
+ * @rx_count: Valid values are in the range 1 to the max_rx.
+ * @tx_count: Valid values are in the range 1 to the max_tx.
+ * @other_count: Valid values are in the range 1 to the max_other.
+ * @combined_count: Valid values are in the range 1 to the max_combined.
+ *
+ * This can be used to configure RX, TX and other channels.
+ */
+
+struct ethtool_channels {
+ __u32 cmd;
+ __u32 max_rx;
+ __u32 max_tx;
+ __u32 max_other;
+ __u32 max_combined;
+ __u32 rx_count;
+ __u32 tx_count;
+ __u32 other_count;
+ __u32 combined_count;
+};
+
+/**
+ * struct ethtool_pauseparam - Ethernet pause (flow control) parameters
+ * @cmd: Command number = %ETHTOOL_GPAUSEPARAM or %ETHTOOL_SPAUSEPARAM
+ * @autoneg: Flag to enable autonegotiation of pause frame use
+ * @rx_pause: Flag to enable reception of pause frames
+ * @tx_pause: Flag to enable transmission of pause frames
+ *
+ * Drivers should reject a non-zero setting of @autoneg when
+ * autoneogotiation is disabled (or not supported) for the link.
+ *
+ * If the link is autonegotiated, drivers should use
+ * mii_advertise_flowctrl() or similar code to set the advertised
+ * pause frame capabilities based on the @rx_pause and @tx_pause flags,
+ * even if @autoneg is zero. They should also allow the advertised
+ * pause frame capabilities to be controlled directly through the
+ * advertising field of &struct ethtool_cmd.
+ *
+ * If @autoneg is non-zero, the MAC is configured to send and/or
+ * receive pause frames according to the result of autonegotiation.
+ * Otherwise, it is configured directly based on the @rx_pause and
+ * @tx_pause flags.
+ */
+struct ethtool_pauseparam {
+ __u32 cmd;
+ __u32 autoneg;
+ __u32 rx_pause;
+ __u32 tx_pause;
+};
+
+/**
+ * enum ethtool_link_ext_state - link extended state
+ */
+enum ethtool_link_ext_state {
+ ETHTOOL_LINK_EXT_STATE_AUTONEG,
+ ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+ ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+ ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+ ETHTOOL_LINK_EXT_STATE_NO_CABLE,
+ ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+ ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE,
+ ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE,
+ ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED,
+ ETHTOOL_LINK_EXT_STATE_OVERHEAT,
+};
+
+/**
+ * enum ethtool_link_ext_substate_autoneg - more information in addition to
+ * ETHTOOL_LINK_EXT_STATE_AUTONEG.
+ */
+enum ethtool_link_ext_substate_autoneg {
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE,
+ ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD,
+};
+
+/**
+ * enum ethtool_link_ext_substate_link_training - more information in addition to
+ * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE.
+ */
+enum ethtool_link_ext_substate_link_training {
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY,
+ ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT,
+};
+
+/**
+ * enum ethtool_link_ext_substate_logical_mismatch - more information in addition
+ * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH.
+ */
+enum ethtool_link_ext_substate_link_logical_mismatch {
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED,
+ ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED,
+};
+
+/**
+ * enum ethtool_link_ext_substate_bad_signal_integrity - more information in
+ * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY.
+ */
+enum ethtool_link_ext_substate_bad_signal_integrity {
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1,
+ ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE,
+};
+
+/**
+ * enum ethtool_link_ext_substate_cable_issue - more information in
+ * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE.
+ */
+enum ethtool_link_ext_substate_cable_issue {
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1,
+ ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE,
+};
+
+#define ETH_GSTRING_LEN 32
+
+/**
+ * enum ethtool_stringset - string set ID
+ * @ETH_SS_TEST: Self-test result names, for use with %ETHTOOL_TEST
+ * @ETH_SS_STATS: Statistic names, for use with %ETHTOOL_GSTATS
+ * @ETH_SS_PRIV_FLAGS: Driver private flag names, for use with
+ * %ETHTOOL_GPFLAGS and %ETHTOOL_SPFLAGS
+ * @ETH_SS_NTUPLE_FILTERS: Previously used with %ETHTOOL_GRXNTUPLE;
+ * now deprecated
+ * @ETH_SS_FEATURES: Device feature names
+ * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
+ * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS
+ * @ETH_SS_PHY_TUNABLES: PHY tunable names
+ * @ETH_SS_LINK_MODES: link mode names
+ * @ETH_SS_MSG_CLASSES: debug message class names
+ * @ETH_SS_WOL_MODES: wake-on-lan modes
+ * @ETH_SS_SOF_TIMESTAMPING: SOF_TIMESTAMPING_* flags
+ * @ETH_SS_TS_TX_TYPES: timestamping Tx types
+ * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters
+ * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types
+ */
+enum ethtool_stringset {
+ ETH_SS_TEST = 0,
+ ETH_SS_STATS,
+ ETH_SS_PRIV_FLAGS,
+ ETH_SS_NTUPLE_FILTERS,
+ ETH_SS_FEATURES,
+ ETH_SS_RSS_HASH_FUNCS,
+ ETH_SS_TUNABLES,
+ ETH_SS_PHY_STATS,
+ ETH_SS_PHY_TUNABLES,
+ ETH_SS_LINK_MODES,
+ ETH_SS_MSG_CLASSES,
+ ETH_SS_WOL_MODES,
+ ETH_SS_SOF_TIMESTAMPING,
+ ETH_SS_TS_TX_TYPES,
+ ETH_SS_TS_RX_FILTERS,
+ ETH_SS_UDP_TUNNEL_TYPES,
+
+ /* add new constants above here */
+ ETH_SS_COUNT
+};
+
+/**
+ * struct ethtool_gstrings - string set for data tagging
+ * @cmd: Command number = %ETHTOOL_GSTRINGS
+ * @string_set: String set ID; one of &enum ethtool_stringset
+ * @len: On return, the number of strings in the string set
+ * @data: Buffer for strings. Each string is null-padded to a size of
+ * %ETH_GSTRING_LEN.
+ *
+ * Users must use %ETHTOOL_GSSET_INFO to find the number of strings in
+ * the string set. They must allocate a buffer of the appropriate
+ * size immediately following this structure.
+ */
+struct ethtool_gstrings {
+ __u32 cmd;
+ __u32 string_set;
+ __u32 len;
+ __u8 data[0];
+};
+
+/**
+ * struct ethtool_sset_info - string set information
+ * @cmd: Command number = %ETHTOOL_GSSET_INFO
+ * @sset_mask: On entry, a bitmask of string sets to query, with bits
+ * numbered according to &enum ethtool_stringset. On return, a
+ * bitmask of those string sets queried that are supported.
+ * @data: Buffer for string set sizes. On return, this contains the
+ * size of each string set that was queried and supported, in
+ * order of ID.
+ *
+ * Example: The user passes in @sset_mask = 0x7 (sets 0, 1, 2) and on
+ * return @sset_mask == 0x6 (sets 1, 2). Then @data[0] contains the
+ * size of set 1 and @data[1] contains the size of set 2.
+ *
+ * Users must allocate a buffer of the appropriate size (4 * number of
+ * sets queried) immediately following this structure.
+ */
+struct ethtool_sset_info {
+ __u32 cmd;
+ __u32 reserved;
+ __u64 sset_mask;
+ __u32 data[0];
+};
+
+/**
+ * enum ethtool_test_flags - flags definition of ethtool_test
+ * @ETH_TEST_FL_OFFLINE: if set perform online and offline tests, otherwise
+ * only online tests.
+ * @ETH_TEST_FL_FAILED: Driver set this flag if test fails.
+ * @ETH_TEST_FL_EXTERNAL_LB: Application request to perform external loopback
+ * test.
+ * @ETH_TEST_FL_EXTERNAL_LB_DONE: Driver performed the external loopback test
+ */
+
+enum ethtool_test_flags {
+ ETH_TEST_FL_OFFLINE = (1 << 0),
+ ETH_TEST_FL_FAILED = (1 << 1),
+ ETH_TEST_FL_EXTERNAL_LB = (1 << 2),
+ ETH_TEST_FL_EXTERNAL_LB_DONE = (1 << 3),
+};
+
+/**
+ * struct ethtool_test - device self-test invocation
+ * @cmd: Command number = %ETHTOOL_TEST
+ * @flags: A bitmask of flags from &enum ethtool_test_flags. Some
+ * flags may be set by the user on entry; others may be set by
+ * the driver on return.
+ * @len: On return, the number of test results
+ * @data: Array of test results
+ *
+ * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the
+ * number of test results that will be returned. They must allocate a
+ * buffer of the appropriate size (8 * number of results) immediately
+ * following this structure.
+ */
+struct ethtool_test {
+ __u32 cmd;
+ __u32 flags;
+ __u32 reserved;
+ __u32 len;
+ __u64 data[0];
+};
+
+/**
+ * struct ethtool_stats - device-specific statistics
+ * @cmd: Command number = %ETHTOOL_GSTATS
+ * @n_stats: On return, the number of statistics
+ * @data: Array of statistics
+ *
+ * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the
+ * number of statistics that will be returned. They must allocate a
+ * buffer of the appropriate size (8 * number of statistics)
+ * immediately following this structure.
+ */
+struct ethtool_stats {
+ __u32 cmd;
+ __u32 n_stats;
+ __u64 data[0];
+};
+
+/**
+ * struct ethtool_perm_addr - permanent hardware address
+ * @cmd: Command number = %ETHTOOL_GPERMADDR
+ * @size: On entry, the size of the buffer. On return, the size of the
+ * address. The command fails if the buffer is too small.
+ * @data: Buffer for the address
+ *
+ * Users must allocate the buffer immediately following this structure.
+ * A buffer size of %MAX_ADDR_LEN should be sufficient for any address
+ * type.
+ */
+struct ethtool_perm_addr {
+ __u32 cmd;
+ __u32 size;
+ __u8 data[0];
+};
+
+/* boolean flags controlling per-interface behavior characteristics.
+ * When reading, the flag indicates whether or not a certain behavior
+ * is enabled/present. When writing, the flag indicates whether
+ * or not the driver should turn on (set) or off (clear) a behavior.
+ *
+ * Some behaviors may read-only (unconditionally absent or present).
+ * If such is the case, return EINVAL in the set-flags operation if the
+ * flag differs from the read-only value.
+ */
+enum ethtool_flags {
+ ETH_FLAG_TXVLAN = (1 << 7), /* TX VLAN offload enabled */
+ ETH_FLAG_RXVLAN = (1 << 8), /* RX VLAN offload enabled */
+ ETH_FLAG_LRO = (1 << 15), /* LRO is enabled */
+ ETH_FLAG_NTUPLE = (1 << 27), /* N-tuple filters enabled */
+ ETH_FLAG_RXHASH = (1 << 28),
+};
+
+/* The following structures are for supporting RX network flow
+ * classification and RX n-tuple configuration. Note, all multibyte
+ * fields, e.g., ip4src, ip4dst, psrc, pdst, spi, etc. are expected to
+ * be in network byte order.
+ */
+
+/**
+ * struct ethtool_tcpip4_spec - flow specification for TCP/IPv4 etc.
+ * @ip4src: Source host
+ * @ip4dst: Destination host
+ * @psrc: Source port
+ * @pdst: Destination port
+ * @tos: Type-of-service
+ *
+ * This can be used to specify a TCP/IPv4, UDP/IPv4 or SCTP/IPv4 flow.
+ */
+struct ethtool_tcpip4_spec {
+ __be32 ip4src;
+ __be32 ip4dst;
+ __be16 psrc;
+ __be16 pdst;
+ __u8 tos;
+};
+
+/**
+ * struct ethtool_ah_espip4_spec - flow specification for IPsec/IPv4
+ * @ip4src: Source host
+ * @ip4dst: Destination host
+ * @spi: Security parameters index
+ * @tos: Type-of-service
+ *
+ * This can be used to specify an IPsec transport or tunnel over IPv4.
+ */
+struct ethtool_ah_espip4_spec {
+ __be32 ip4src;
+ __be32 ip4dst;
+ __be32 spi;
+ __u8 tos;
+};
+
+#define ETH_RX_NFC_IP4 1
+
+/**
+ * struct ethtool_usrip4_spec - general flow specification for IPv4
+ * @ip4src: Source host
+ * @ip4dst: Destination host
+ * @l4_4_bytes: First 4 bytes of transport (layer 4) header
+ * @tos: Type-of-service
+ * @ip_ver: Value must be %ETH_RX_NFC_IP4; mask must be 0
+ * @proto: Transport protocol number; mask must be 0
+ */
+struct ethtool_usrip4_spec {
+ __be32 ip4src;
+ __be32 ip4dst;
+ __be32 l4_4_bytes;
+ __u8 tos;
+ __u8 ip_ver;
+ __u8 proto;
+};
+
+/**
+ * struct ethtool_tcpip6_spec - flow specification for TCP/IPv6 etc.
+ * @ip6src: Source host
+ * @ip6dst: Destination host
+ * @psrc: Source port
+ * @pdst: Destination port
+ * @tclass: Traffic Class
+ *
+ * This can be used to specify a TCP/IPv6, UDP/IPv6 or SCTP/IPv6 flow.
+ */
+struct ethtool_tcpip6_spec {
+ __be32 ip6src[4];
+ __be32 ip6dst[4];
+ __be16 psrc;
+ __be16 pdst;
+ __u8 tclass;
+};
+
+/**
+ * struct ethtool_ah_espip6_spec - flow specification for IPsec/IPv6
+ * @ip6src: Source host
+ * @ip6dst: Destination host
+ * @spi: Security parameters index
+ * @tclass: Traffic Class
+ *
+ * This can be used to specify an IPsec transport or tunnel over IPv6.
+ */
+struct ethtool_ah_espip6_spec {
+ __be32 ip6src[4];
+ __be32 ip6dst[4];
+ __be32 spi;
+ __u8 tclass;
+};
+
+/**
+ * struct ethtool_usrip6_spec - general flow specification for IPv6
+ * @ip6src: Source host
+ * @ip6dst: Destination host
+ * @l4_4_bytes: First 4 bytes of transport (layer 4) header
+ * @tclass: Traffic Class
+ * @l4_proto: Transport protocol number (nexthdr after any Extension Headers)
+ */
+struct ethtool_usrip6_spec {
+ __be32 ip6src[4];
+ __be32 ip6dst[4];
+ __be32 l4_4_bytes;
+ __u8 tclass;
+ __u8 l4_proto;
+};
+
+union ethtool_flow_union {
+ struct ethtool_tcpip4_spec tcp_ip4_spec;
+ struct ethtool_tcpip4_spec udp_ip4_spec;
+ struct ethtool_tcpip4_spec sctp_ip4_spec;
+ struct ethtool_ah_espip4_spec ah_ip4_spec;
+ struct ethtool_ah_espip4_spec esp_ip4_spec;
+ struct ethtool_usrip4_spec usr_ip4_spec;
+ struct ethtool_tcpip6_spec tcp_ip6_spec;
+ struct ethtool_tcpip6_spec udp_ip6_spec;
+ struct ethtool_tcpip6_spec sctp_ip6_spec;
+ struct ethtool_ah_espip6_spec ah_ip6_spec;
+ struct ethtool_ah_espip6_spec esp_ip6_spec;
+ struct ethtool_usrip6_spec usr_ip6_spec;
+ struct ethhdr ether_spec;
+ __u8 hdata[52];
+};
+
+/**
+ * struct ethtool_flow_ext - additional RX flow fields
+ * @h_dest: destination MAC address
+ * @vlan_etype: VLAN EtherType
+ * @vlan_tci: VLAN tag control information
+ * @data: user defined data
+ *
+ * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT
+ * is set in &struct ethtool_rx_flow_spec @flow_type.
+ * @h_dest is valid if %FLOW_MAC_EXT is set.
+ */
+struct ethtool_flow_ext {
+ __u8 padding[2];
+ unsigned char h_dest[ETH_ALEN];
+ __be16 vlan_etype;
+ __be16 vlan_tci;
+ __be32 data[2];
+};
+
+/**
+ * struct ethtool_rx_flow_spec - classification rule for RX flows
+ * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
+ * @h_u: Flow fields to match (dependent on @flow_type)
+ * @h_ext: Additional fields to match
+ * @m_u: Masks for flow field bits to be matched
+ * @m_ext: Masks for additional field bits to be matched
+ * Note, all additional fields must be ignored unless @flow_type
+ * includes the %FLOW_EXT or %FLOW_MAC_EXT flag
+ * (see &struct ethtool_flow_ext description).
+ * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC
+ * if packets should be discarded, or %RX_CLS_FLOW_WAKE if the
+ * packets should be used for Wake-on-LAN with %WAKE_FILTER
+ * @location: Location of rule in the table. Locations must be
+ * numbered such that a flow matching multiple rules will be
+ * classified according to the first (lowest numbered) rule.
+ */
+struct ethtool_rx_flow_spec {
+ __u32 flow_type;
+ union ethtool_flow_union h_u;
+ struct ethtool_flow_ext h_ext;
+ union ethtool_flow_union m_u;
+ struct ethtool_flow_ext m_ext;
+ __u64 ring_cookie;
+ __u32 location;
+};
+
+/* How rings are laid out when accessing virtual functions or
+ * offloaded queues is device specific. To allow users to do flow
+ * steering and specify these queues the ring cookie is partitioned
+ * into a 32bit queue index with an 8 bit virtual function id.
+ * This also leaves the 3bytes for further specifiers. It is possible
+ * future devices may support more than 256 virtual functions if
+ * devices start supporting PCIe w/ARI. However at the moment I
+ * do not know of any devices that support this so I do not reserve
+ * space for this at this time. If a future patch consumes the next
+ * byte it should be aware of this possibility.
+ */
+#define ETHTOOL_RX_FLOW_SPEC_RING 0x00000000FFFFFFFFLL
+#define ETHTOOL_RX_FLOW_SPEC_RING_VF 0x000000FF00000000LL
+#define ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF 32
+static inline __u64 ethtool_get_flow_spec_ring(__u64 ring_cookie)
+{
+ return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie;
+}
+
+static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
+{
+ return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >>
+ ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+}
+
+/**
+ * struct ethtool_rxnfc - command to get or set RX flow classification rules
+ * @cmd: Specific command number - %ETHTOOL_GRXFH, %ETHTOOL_SRXFH,
+ * %ETHTOOL_GRXRINGS, %ETHTOOL_GRXCLSRLCNT, %ETHTOOL_GRXCLSRULE,
+ * %ETHTOOL_GRXCLSRLALL, %ETHTOOL_SRXCLSRLDEL or %ETHTOOL_SRXCLSRLINS
+ * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW
+ * @data: Command-dependent value
+ * @fs: Flow classification rule
+ * @rss_context: RSS context to be affected
+ * @rule_cnt: Number of rules to be affected
+ * @rule_locs: Array of used rule locations
+ *
+ * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating
+ * the fields included in the flow hash, e.g. %RXH_IP_SRC. The following
+ * structure fields must not be used, except that if @flow_type includes
+ * the %FLOW_RSS flag, then @rss_context determines which RSS context to
+ * act on.
+ *
+ * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues
+ * on return.
+ *
+ * For %ETHTOOL_GRXCLSRLCNT, @rule_cnt is set to the number of defined
+ * rules on return. If @data is non-zero on return then it is the
+ * size of the rule table, plus the flag %RX_CLS_LOC_SPECIAL if the
+ * driver supports any special location values. If that flag is not
+ * set in @data then special location values should not be used.
+ *
+ * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an
+ * existing rule on entry and @fs contains the rule on return; if
+ * @fs.@flow_type includes the %FLOW_RSS flag, then @rss_context is
+ * filled with the RSS context ID associated with the rule.
+ *
+ * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the
+ * user buffer for @rule_locs on entry. On return, @data is the size
+ * of the rule table, @rule_cnt is the number of defined rules, and
+ * @rule_locs contains the locations of the defined rules. Drivers
+ * must use the second parameter to get_rxnfc() instead of @rule_locs.
+ *
+ * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update.
+ * @fs.@location either specifies the location to use or is a special
+ * location value with %RX_CLS_LOC_SPECIAL flag set. On return,
+ * @fs.@location is the actual rule location. If @fs.@flow_type
+ * includes the %FLOW_RSS flag, @rss_context is the RSS context ID to
+ * use for flow spreading traffic which matches this rule. The value
+ * from the rxfh indirection table will be added to @fs.@ring_cookie
+ * to choose which ring to deliver to.
+ *
+ * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an
+ * existing rule on entry.
+ *
+ * A driver supporting the special location values for
+ * %ETHTOOL_SRXCLSRLINS may add the rule at any suitable unused
+ * location, and may remove a rule at a later location (lower
+ * priority) that matches exactly the same set of flows. The special
+ * values are %RX_CLS_LOC_ANY, selecting any location;
+ * %RX_CLS_LOC_FIRST, selecting the first suitable location (maximum
+ * priority); and %RX_CLS_LOC_LAST, selecting the last suitable
+ * location (minimum priority). Additional special values may be
+ * defined in future and drivers must return -%EINVAL for any
+ * unrecognised value.
+ */
+struct ethtool_rxnfc {
+ __u32 cmd;
+ __u32 flow_type;
+ __u64 data;
+ struct ethtool_rx_flow_spec fs;
+ union {
+ __u32 rule_cnt;
+ __u32 rss_context;
+ };
+ __u32 rule_locs[0];
+};
+
+
+/**
+ * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection
+ * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR
+ * @size: On entry, the array size of the user buffer, which may be zero.
+ * On return from %ETHTOOL_GRXFHINDIR, the array size of the hardware
+ * indirection table.
+ * @ring_index: RX ring/queue index for each hash value
+ *
+ * For %ETHTOOL_GRXFHINDIR, a @size of zero means that only the size
+ * should be returned. For %ETHTOOL_SRXFHINDIR, a @size of zero means
+ * the table should be reset to default values. This last feature
+ * is not supported by the original implementations.
+ */
+struct ethtool_rxfh_indir {
+ __u32 cmd;
+ __u32 size;
+ __u32 ring_index[0];
+};
+
+/**
+ * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
+ * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
+ * @rss_context: RSS context identifier. Context 0 is the default for normal
+ * traffic; other contexts can be referenced as the destination for RX flow
+ * classification rules. %ETH_RXFH_CONTEXT_ALLOC is used with command
+ * %ETHTOOL_SRSSH to allocate a new RSS context; on return this field will
+ * contain the ID of the newly allocated context.
+ * @indir_size: On entry, the array size of the user buffer for the
+ * indirection table, which may be zero, or (for %ETHTOOL_SRSSH),
+ * %ETH_RXFH_INDIR_NO_CHANGE. On return from %ETHTOOL_GRSSH,
+ * the array size of the hardware indirection table.
+ * @key_size: On entry, the array size of the user buffer for the hash key,
+ * which may be zero. On return from %ETHTOOL_GRSSH, the size of the
+ * hardware hash key.
+ * @hfunc: Defines the current RSS hash function used by HW (or to be set to).
+ * Valid values are one of the %ETH_RSS_HASH_*.
+ * @rsvd: Reserved for future extensions.
+ * @rss_config: RX ring/queue index for each hash value i.e., indirection table
+ * of @indir_size __u32 elements, followed by hash key of @key_size
+ * bytes.
+ *
+ * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the
+ * size should be returned. For %ETHTOOL_SRSSH, an @indir_size of
+ * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested
+ * and a @indir_size of zero means the indir table should be reset to default
+ * values (if @rss_context == 0) or that the RSS context should be deleted.
+ * An hfunc of zero means that hash function setting is not requested.
+ */
+struct ethtool_rxfh {
+ __u32 cmd;
+ __u32 rss_context;
+ __u32 indir_size;
+ __u32 key_size;
+ __u8 hfunc;
+ __u8 rsvd8[3];
+ __u32 rsvd32;
+ __u32 rss_config[0];
+};
+#define ETH_RXFH_CONTEXT_ALLOC 0xffffffff
+#define ETH_RXFH_INDIR_NO_CHANGE 0xffffffff
+
+/**
+ * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter
+ * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
+ * @h_u: Flow field values to match (dependent on @flow_type)
+ * @m_u: Masks for flow field value bits to be ignored
+ * @vlan_tag: VLAN tag to match
+ * @vlan_tag_mask: Mask for VLAN tag bits to be ignored
+ * @data: Driver-dependent data to match
+ * @data_mask: Mask for driver-dependent data bits to be ignored
+ * @action: RX ring/queue index to deliver to (non-negative) or other action
+ * (negative, e.g. %ETHTOOL_RXNTUPLE_ACTION_DROP)
+ *
+ * For flow types %TCP_V4_FLOW, %UDP_V4_FLOW and %SCTP_V4_FLOW, where
+ * a field value and mask are both zero this is treated as if all mask
+ * bits are set i.e. the field is ignored.
+ */
+struct ethtool_rx_ntuple_flow_spec {
+ __u32 flow_type;
+ union {
+ struct ethtool_tcpip4_spec tcp_ip4_spec;
+ struct ethtool_tcpip4_spec udp_ip4_spec;
+ struct ethtool_tcpip4_spec sctp_ip4_spec;
+ struct ethtool_ah_espip4_spec ah_ip4_spec;
+ struct ethtool_ah_espip4_spec esp_ip4_spec;
+ struct ethtool_usrip4_spec usr_ip4_spec;
+ struct ethhdr ether_spec;
+ __u8 hdata[72];
+ } h_u, m_u;
+
+ __u16 vlan_tag;
+ __u16 vlan_tag_mask;
+ __u64 data;
+ __u64 data_mask;
+
+ __s32 action;
+#define ETHTOOL_RXNTUPLE_ACTION_DROP (-1) /* drop packet */
+#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) /* clear filter */
+};
+
+/**
+ * struct ethtool_rx_ntuple - command to set or clear RX flow filter
+ * @cmd: Command number - %ETHTOOL_SRXNTUPLE
+ * @fs: Flow filter specification
+ */
+struct ethtool_rx_ntuple {
+ __u32 cmd;
+ struct ethtool_rx_ntuple_flow_spec fs;
+};
+
+#define ETHTOOL_FLASH_MAX_FILENAME 128
+enum ethtool_flash_op_type {
+ ETHTOOL_FLASH_ALL_REGIONS = 0,
+};
+
+/* for passing firmware flashing related parameters */
+struct ethtool_flash {
+ __u32 cmd;
+ __u32 region;
+ char data[ETHTOOL_FLASH_MAX_FILENAME];
+};
+
+/**
+ * struct ethtool_dump - used for retrieving, setting device dump
+ * @cmd: Command number - %ETHTOOL_GET_DUMP_FLAG, %ETHTOOL_GET_DUMP_DATA, or
+ * %ETHTOOL_SET_DUMP
+ * @version: FW version of the dump, filled in by driver
+ * @flag: driver dependent flag for dump setting, filled in by driver during
+ * get and filled in by ethtool for set operation.
+ * flag must be initialized by macro ETH_FW_DUMP_DISABLE value when
+ * firmware dump is disabled.
+ * @len: length of dump data, used as the length of the user buffer on entry to
+ * %ETHTOOL_GET_DUMP_DATA and this is returned as dump length by driver
+ * for %ETHTOOL_GET_DUMP_FLAG command
+ * @data: data collected for get dump data operation
+ */
+struct ethtool_dump {
+ __u32 cmd;
+ __u32 version;
+ __u32 flag;
+ __u32 len;
+ __u8 data[0];
+};
+
+#define ETH_FW_DUMP_DISABLE 0
+
+/* for returning and changing feature sets */
+
+/**
+ * struct ethtool_get_features_block - block with state of 32 features
+ * @available: mask of changeable features
+ * @requested: mask of features requested to be enabled if possible
+ * @active: mask of currently enabled features
+ * @never_changed: mask of features not changeable for any device
+ */
+struct ethtool_get_features_block {
+ __u32 available;
+ __u32 requested;
+ __u32 active;
+ __u32 never_changed;
+};
+
+/**
+ * struct ethtool_gfeatures - command to get state of device's features
+ * @cmd: command number = %ETHTOOL_GFEATURES
+ * @size: On entry, the number of elements in the features[] array;
+ * on return, the number of elements in features[] needed to hold
+ * all features
+ * @features: state of features
+ */
+struct ethtool_gfeatures {
+ __u32 cmd;
+ __u32 size;
+ struct ethtool_get_features_block features[0];
+};
+
+/**
+ * struct ethtool_set_features_block - block with request for 32 features
+ * @valid: mask of features to be changed
+ * @requested: values of features to be changed
+ */
+struct ethtool_set_features_block {
+ __u32 valid;
+ __u32 requested;
+};
+
+/**
+ * struct ethtool_sfeatures - command to request change in device's features
+ * @cmd: command number = %ETHTOOL_SFEATURES
+ * @size: array size of the features[] array
+ * @features: feature change masks
+ */
+struct ethtool_sfeatures {
+ __u32 cmd;
+ __u32 size;
+ struct ethtool_set_features_block features[0];
+};
+
+/**
+ * struct ethtool_ts_info - holds a device's timestamping and PHC association
+ * @cmd: command number = %ETHTOOL_GET_TS_INFO
+ * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags
+ * @phc_index: device index of the associated PHC, or -1 if there is none
+ * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values
+ * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values
+ *
+ * The bits in the 'tx_types' and 'rx_filters' fields correspond to
+ * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values,
+ * respectively. For example, if the device supports HWTSTAMP_TX_ON,
+ * then (1 << HWTSTAMP_TX_ON) in 'tx_types' will be set.
+ *
+ * Drivers should only report the filters they actually support without
+ * upscaling in the SIOCSHWTSTAMP ioctl. If the SIOCSHWSTAMP request for
+ * HWTSTAMP_FILTER_V1_SYNC is supported by HWTSTAMP_FILTER_V1_EVENT, then the
+ * driver should only report HWTSTAMP_FILTER_V1_EVENT in this op.
+ */
+struct ethtool_ts_info {
+ __u32 cmd;
+ __u32 so_timestamping;
+ __s32 phc_index;
+ __u32 tx_types;
+ __u32 tx_reserved[3];
+ __u32 rx_filters;
+ __u32 rx_reserved[3];
+};
+
+/*
+ * %ETHTOOL_SFEATURES changes features present in features[].valid to the
+ * values of corresponding bits in features[].requested. Bits in .requested
+ * not set in .valid or not changeable are ignored.
+ *
+ * Returns %EINVAL when .valid contains undefined or never-changeable bits
+ * or size is not equal to required number of features words (32-bit blocks).
+ * Returns >= 0 if request was completed; bits set in the value mean:
+ * %ETHTOOL_F_UNSUPPORTED - there were bits set in .valid that are not
+ * changeable (not present in %ETHTOOL_GFEATURES' features[].available)
+ * those bits were ignored.
+ * %ETHTOOL_F_WISH - some or all changes requested were recorded but the
+ * resulting state of bits masked by .valid is not equal to .requested.
+ * Probably there are other device-specific constraints on some features
+ * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered
+ * here as though ignored bits were cleared.
+ * %ETHTOOL_F_COMPAT - some or all changes requested were made by calling
+ * compatibility functions. Requested offload state cannot be properly
+ * managed by kernel.
+ *
+ * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of
+ * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands
+ * for ETH_SS_FEATURES string set. First entry in the table corresponds to least
+ * significant bit in features[0] fields. Empty strings mark undefined features.
+ */
+enum ethtool_sfeatures_retval_bits {
+ ETHTOOL_F_UNSUPPORTED__BIT,
+ ETHTOOL_F_WISH__BIT,
+ ETHTOOL_F_COMPAT__BIT,
+};
+
+#define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT)
+#define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT)
+#define ETHTOOL_F_COMPAT (1 << ETHTOOL_F_COMPAT__BIT)
+
+#define MAX_NUM_QUEUE 4096
+
+/**
+ * struct ethtool_per_queue_op - apply sub command to the queues in mask.
+ * @cmd: ETHTOOL_PERQUEUE
+ * @sub_command: the sub command which apply to each queues
+ * @queue_mask: Bitmap of the queues which sub command apply to
+ * @data: A complete command structure following for each of the queues addressed
+ */
+struct ethtool_per_queue_op {
+ __u32 cmd;
+ __u32 sub_command;
+ __u32 queue_mask[__KERNEL_DIV_ROUND_UP(MAX_NUM_QUEUE, 32)];
+ char data[];
+};
+
+/**
+ * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
+ * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
+ * @active_fec: FEC mode which is active on porte
+ * @fec: Bitmask of supported/configured FEC modes
+ * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
+ *
+ * Drivers should reject a non-zero setting of @autoneg when
+ * autoneogotiation is disabled (or not supported) for the link.
+ *
+ */
+struct ethtool_fecparam {
+ __u32 cmd;
+ /* bitmask of FEC modes */
+ __u32 active_fec;
+ __u32 fec;
+ __u32 reserved;
+};
+
+/**
+ * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
+ * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported
+ * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver
+ * @ETHTOOL_FEC_OFF: No FEC Mode
+ * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode
+ * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode
+ */
+enum ethtool_fec_config_bits {
+ ETHTOOL_FEC_NONE_BIT,
+ ETHTOOL_FEC_AUTO_BIT,
+ ETHTOOL_FEC_OFF_BIT,
+ ETHTOOL_FEC_RS_BIT,
+ ETHTOOL_FEC_BASER_BIT,
+ ETHTOOL_FEC_LLRS_BIT,
+};
+
+#define ETHTOOL_FEC_NONE (1 << ETHTOOL_FEC_NONE_BIT)
+#define ETHTOOL_FEC_AUTO (1 << ETHTOOL_FEC_AUTO_BIT)
+#define ETHTOOL_FEC_OFF (1 << ETHTOOL_FEC_OFF_BIT)
+#define ETHTOOL_FEC_RS (1 << ETHTOOL_FEC_RS_BIT)
+#define ETHTOOL_FEC_BASER (1 << ETHTOOL_FEC_BASER_BIT)
+#define ETHTOOL_FEC_LLRS (1 << ETHTOOL_FEC_LLRS_BIT)
+
+/* CMDs currently supported */
+#define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings.
+ * Please use ETHTOOL_GLINKSETTINGS
+ */
+#define ETHTOOL_SSET 0x00000002 /* DEPRECATED, Set settings.
+ * Please use ETHTOOL_SLINKSETTINGS
+ */
+#define ETHTOOL_GDRVINFO 0x00000003 /* Get driver info. */
+#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers. */
+#define ETHTOOL_GWOL 0x00000005 /* Get wake-on-lan options. */
+#define ETHTOOL_SWOL 0x00000006 /* Set wake-on-lan options. */
+#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */
+#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level. */
+#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation. */
+/* Get link status for host, i.e. whether the interface *and* the
+ * physical port (if there is one) are up (ethtool_value). */
+#define ETHTOOL_GLINK 0x0000000a
+#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */
+#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data. */
+#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */
+#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */
+#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */
+#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters. */
+#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */
+#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */
+#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */
+#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */
+#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */
+#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */
+#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable
+ * (ethtool_value) */
+#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable
+ * (ethtool_value). */
+#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test. */
+#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */
+#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */
+#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */
+#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */
+#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */
+#define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */
+#define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */
+#define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */
+#define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */
+#define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */
+#define ETHTOOL_GFLAGS 0x00000025 /* Get flags bitmap(ethtool_value) */
+#define ETHTOOL_SFLAGS 0x00000026 /* Set flags bitmap(ethtool_value) */
+#define ETHTOOL_GPFLAGS 0x00000027 /* Get driver-private flags bitmap */
+#define ETHTOOL_SPFLAGS 0x00000028 /* Set driver-private flags bitmap */
+
+#define ETHTOOL_GRXFH 0x00000029 /* Get RX flow hash configuration */
+#define ETHTOOL_SRXFH 0x0000002a /* Set RX flow hash configuration */
+#define ETHTOOL_GGRO 0x0000002b /* Get GRO enable (ethtool_value) */
+#define ETHTOOL_SGRO 0x0000002c /* Set GRO enable (ethtool_value) */
+#define ETHTOOL_GRXRINGS 0x0000002d /* Get RX rings available for LB */
+#define ETHTOOL_GRXCLSRLCNT 0x0000002e /* Get RX class rule count */
+#define ETHTOOL_GRXCLSRULE 0x0000002f /* Get RX classification rule */
+#define ETHTOOL_GRXCLSRLALL 0x00000030 /* Get all RX classification rule */
+#define ETHTOOL_SRXCLSRLDEL 0x00000031 /* Delete RX classification rule */
+#define ETHTOOL_SRXCLSRLINS 0x00000032 /* Insert RX classification rule */
+#define ETHTOOL_FLASHDEV 0x00000033 /* Flash firmware to device */
+#define ETHTOOL_RESET 0x00000034 /* Reset hardware */
+#define ETHTOOL_SRXNTUPLE 0x00000035 /* Add an n-tuple filter to device */
+#define ETHTOOL_GRXNTUPLE 0x00000036 /* deprecated */
+#define ETHTOOL_GSSET_INFO 0x00000037 /* Get string set info */
+#define ETHTOOL_GRXFHINDIR 0x00000038 /* Get RX flow hash indir'n table */
+#define ETHTOOL_SRXFHINDIR 0x00000039 /* Set RX flow hash indir'n table */
+
+#define ETHTOOL_GFEATURES 0x0000003a /* Get device offload settings */
+#define ETHTOOL_SFEATURES 0x0000003b /* Change device offload settings */
+#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
+#define ETHTOOL_SCHANNELS 0x0000003d /* Set no of channels */
+#define ETHTOOL_SET_DUMP 0x0000003e /* Set dump settings */
+#define ETHTOOL_GET_DUMP_FLAG 0x0000003f /* Get dump settings */
+#define ETHTOOL_GET_DUMP_DATA 0x00000040 /* Get dump data */
+#define ETHTOOL_GET_TS_INFO 0x00000041 /* Get time stamping and PHC info */
+#define ETHTOOL_GMODULEINFO 0x00000042 /* Get plug-in module information */
+#define ETHTOOL_GMODULEEEPROM 0x00000043 /* Get plug-in module eeprom */
+#define ETHTOOL_GEEE 0x00000044 /* Get EEE settings */
+#define ETHTOOL_SEEE 0x00000045 /* Set EEE settings */
+
+#define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */
+#define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */
+#define ETHTOOL_GTUNABLE 0x00000048 /* Get tunable configuration */
+#define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */
+#define ETHTOOL_GPHYSTATS 0x0000004a /* get PHY-specific statistics */
+
+#define ETHTOOL_PERQUEUE 0x0000004b /* Set per queue options */
+
+#define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */
+#define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */
+#define ETHTOOL_PHY_GTUNABLE 0x0000004e /* Get PHY tunable configuration */
+#define ETHTOOL_PHY_STUNABLE 0x0000004f /* Set PHY tunable configuration */
+#define ETHTOOL_GFECPARAM 0x00000050 /* Get FEC settings */
+#define ETHTOOL_SFECPARAM 0x00000051 /* Set FEC settings */
+
+/* compatibility with older code */
+#define SPARC_ETH_GSET ETHTOOL_GSET
+#define SPARC_ETH_SSET ETHTOOL_SSET
+
+/* Link mode bit indices */
+enum ethtool_link_mode_bit_indices {
+ ETHTOOL_LINK_MODE_10baseT_Half_BIT = 0,
+ ETHTOOL_LINK_MODE_10baseT_Full_BIT = 1,
+ ETHTOOL_LINK_MODE_100baseT_Half_BIT = 2,
+ ETHTOOL_LINK_MODE_100baseT_Full_BIT = 3,
+ ETHTOOL_LINK_MODE_1000baseT_Half_BIT = 4,
+ ETHTOOL_LINK_MODE_1000baseT_Full_BIT = 5,
+ ETHTOOL_LINK_MODE_Autoneg_BIT = 6,
+ ETHTOOL_LINK_MODE_TP_BIT = 7,
+ ETHTOOL_LINK_MODE_AUI_BIT = 8,
+ ETHTOOL_LINK_MODE_MII_BIT = 9,
+ ETHTOOL_LINK_MODE_FIBRE_BIT = 10,
+ ETHTOOL_LINK_MODE_BNC_BIT = 11,
+ ETHTOOL_LINK_MODE_10000baseT_Full_BIT = 12,
+ ETHTOOL_LINK_MODE_Pause_BIT = 13,
+ ETHTOOL_LINK_MODE_Asym_Pause_BIT = 14,
+ ETHTOOL_LINK_MODE_2500baseX_Full_BIT = 15,
+ ETHTOOL_LINK_MODE_Backplane_BIT = 16,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT = 17,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT = 18,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT = 19,
+ ETHTOOL_LINK_MODE_10000baseR_FEC_BIT = 20,
+ ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT = 21,
+ ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT = 22,
+ ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT = 23,
+ ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT = 24,
+ ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT = 25,
+ ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT = 26,
+ ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT = 27,
+ ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT = 28,
+ ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29,
+ ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30,
+ ETHTOOL_LINK_MODE_25000baseCR_Full_BIT = 31,
+
+ /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
+ * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
+ * macro for bits > 31. The only way to use indices > 31 is to
+ * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API.
+ */
+
+ ETHTOOL_LINK_MODE_25000baseKR_Full_BIT = 32,
+ ETHTOOL_LINK_MODE_25000baseSR_Full_BIT = 33,
+ ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT = 34,
+ ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT = 35,
+ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT = 36,
+ ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37,
+ ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38,
+ ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39,
+ ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40,
+ ETHTOOL_LINK_MODE_1000baseX_Full_BIT = 41,
+ ETHTOOL_LINK_MODE_10000baseCR_Full_BIT = 42,
+ ETHTOOL_LINK_MODE_10000baseSR_Full_BIT = 43,
+ ETHTOOL_LINK_MODE_10000baseLR_Full_BIT = 44,
+ ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45,
+ ETHTOOL_LINK_MODE_10000baseER_Full_BIT = 46,
+ ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47,
+ ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48,
+
+ ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49,
+ ETHTOOL_LINK_MODE_FEC_RS_BIT = 50,
+ ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51,
+ ETHTOOL_LINK_MODE_50000baseKR_Full_BIT = 52,
+ ETHTOOL_LINK_MODE_50000baseSR_Full_BIT = 53,
+ ETHTOOL_LINK_MODE_50000baseCR_Full_BIT = 54,
+ ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT = 55,
+ ETHTOOL_LINK_MODE_50000baseDR_Full_BIT = 56,
+ ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT = 57,
+ ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT = 58,
+ ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT = 59,
+ ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT = 60,
+ ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT = 61,
+ ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT = 62,
+ ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT = 63,
+ ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT = 64,
+ ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT = 65,
+ ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT = 66,
+ ETHTOOL_LINK_MODE_100baseT1_Full_BIT = 67,
+ ETHTOOL_LINK_MODE_1000baseT1_Full_BIT = 68,
+ ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT = 69,
+ ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT = 70,
+ ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT = 71,
+ ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT = 72,
+ ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT = 73,
+ ETHTOOL_LINK_MODE_FEC_LLRS_BIT = 74,
+ ETHTOOL_LINK_MODE_100000baseKR_Full_BIT = 75,
+ ETHTOOL_LINK_MODE_100000baseSR_Full_BIT = 76,
+ ETHTOOL_LINK_MODE_100000baseLR_ER_FR_Full_BIT = 77,
+ ETHTOOL_LINK_MODE_100000baseCR_Full_BIT = 78,
+ ETHTOOL_LINK_MODE_100000baseDR_Full_BIT = 79,
+ ETHTOOL_LINK_MODE_200000baseKR2_Full_BIT = 80,
+ ETHTOOL_LINK_MODE_200000baseSR2_Full_BIT = 81,
+ ETHTOOL_LINK_MODE_200000baseLR2_ER2_FR2_Full_BIT = 82,
+ ETHTOOL_LINK_MODE_200000baseDR2_Full_BIT = 83,
+ ETHTOOL_LINK_MODE_200000baseCR2_Full_BIT = 84,
+ ETHTOOL_LINK_MODE_400000baseKR4_Full_BIT = 85,
+ ETHTOOL_LINK_MODE_400000baseSR4_Full_BIT = 86,
+ ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT = 87,
+ ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT = 88,
+ ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89,
+ /* must be last entry */
+ __ETHTOOL_LINK_MODE_MASK_NBITS
+};
+
+#define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \
+ (1UL << (ETHTOOL_LINK_MODE_ ## base_name ## _BIT))
+
+/* DEPRECATED macros. Please migrate to
+ * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT
+ * define any new SUPPORTED_* macro for bits > 31.
+ */
+#define SUPPORTED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half)
+#define SUPPORTED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full)
+#define SUPPORTED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half)
+#define SUPPORTED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full)
+#define SUPPORTED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half)
+#define SUPPORTED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full)
+#define SUPPORTED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg)
+#define SUPPORTED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP)
+#define SUPPORTED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI)
+#define SUPPORTED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII)
+#define SUPPORTED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE)
+#define SUPPORTED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC)
+#define SUPPORTED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full)
+#define SUPPORTED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause)
+#define SUPPORTED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause)
+#define SUPPORTED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full)
+#define SUPPORTED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane)
+#define SUPPORTED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full)
+#define SUPPORTED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full)
+#define SUPPORTED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full)
+#define SUPPORTED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC)
+#define SUPPORTED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full)
+#define SUPPORTED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full)
+#define SUPPORTED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full)
+#define SUPPORTED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full)
+#define SUPPORTED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full)
+#define SUPPORTED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full)
+#define SUPPORTED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full)
+#define SUPPORTED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full)
+#define SUPPORTED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full)
+#define SUPPORTED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full)
+/* Please do not define any new SUPPORTED_* macro for bits > 31, see
+ * notice above.
+ */
+
+/*
+ * DEPRECATED macros. Please migrate to
+ * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT
+ * define any new ADERTISE_* macro for bits > 31.
+ */
+#define ADVERTISED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half)
+#define ADVERTISED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full)
+#define ADVERTISED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half)
+#define ADVERTISED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full)
+#define ADVERTISED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half)
+#define ADVERTISED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full)
+#define ADVERTISED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg)
+#define ADVERTISED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP)
+#define ADVERTISED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI)
+#define ADVERTISED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII)
+#define ADVERTISED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE)
+#define ADVERTISED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC)
+#define ADVERTISED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full)
+#define ADVERTISED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause)
+#define ADVERTISED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause)
+#define ADVERTISED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full)
+#define ADVERTISED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane)
+#define ADVERTISED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full)
+#define ADVERTISED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full)
+#define ADVERTISED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full)
+#define ADVERTISED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC)
+#define ADVERTISED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full)
+#define ADVERTISED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full)
+#define ADVERTISED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full)
+#define ADVERTISED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full)
+#define ADVERTISED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full)
+#define ADVERTISED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full)
+#define ADVERTISED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full)
+#define ADVERTISED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full)
+#define ADVERTISED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full)
+#define ADVERTISED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full)
+/* Please do not define any new ADVERTISED_* macro for bits > 31, see
+ * notice above.
+ */
+
+/* The following are all involved in forcing a particular link
+ * mode for the device for setting things. When getting the
+ * devices settings, these indicate the current mode and whether
+ * it was forced up into this mode or autonegotiated.
+ */
+
+/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal.
+ * Update drivers/net/phy/phy.c:phy_speed_to_str() and
+ * drivers/net/bonding/bond_3ad.c:__get_link_speed() when adding new values.
+ */
+#define SPEED_10 10
+#define SPEED_100 100
+#define SPEED_1000 1000
+#define SPEED_2500 2500
+#define SPEED_5000 5000
+#define SPEED_10000 10000
+#define SPEED_14000 14000
+#define SPEED_20000 20000
+#define SPEED_25000 25000
+#define SPEED_40000 40000
+#define SPEED_50000 50000
+#define SPEED_56000 56000
+#define SPEED_100000 100000
+#define SPEED_200000 200000
+#define SPEED_400000 400000
+
+#define SPEED_UNKNOWN -1
+
+static inline int ethtool_validate_speed(__u32 speed)
+{
+ return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN;
+}
+
+/* Duplex, half or full. */
+#define DUPLEX_HALF 0x00
+#define DUPLEX_FULL 0x01
+#define DUPLEX_UNKNOWN 0xff
+
+static inline int ethtool_validate_duplex(__u8 duplex)
+{
+ switch (duplex) {
+ case DUPLEX_HALF:
+ case DUPLEX_FULL:
+ case DUPLEX_UNKNOWN:
+ return 1;
+ }
+
+ return 0;
+}
+
+#define MASTER_SLAVE_CFG_UNSUPPORTED 0
+#define MASTER_SLAVE_CFG_UNKNOWN 1
+#define MASTER_SLAVE_CFG_MASTER_PREFERRED 2
+#define MASTER_SLAVE_CFG_SLAVE_PREFERRED 3
+#define MASTER_SLAVE_CFG_MASTER_FORCE 4
+#define MASTER_SLAVE_CFG_SLAVE_FORCE 5
+#define MASTER_SLAVE_STATE_UNSUPPORTED 0
+#define MASTER_SLAVE_STATE_UNKNOWN 1
+#define MASTER_SLAVE_STATE_MASTER 2
+#define MASTER_SLAVE_STATE_SLAVE 3
+#define MASTER_SLAVE_STATE_ERR 4
+
+/* Which connector port. */
+#define PORT_TP 0x00
+#define PORT_AUI 0x01
+#define PORT_MII 0x02
+#define PORT_FIBRE 0x03
+#define PORT_BNC 0x04
+#define PORT_DA 0x05
+#define PORT_NONE 0xef
+#define PORT_OTHER 0xff
+
+/* Which transceiver to use. */
+#define XCVR_INTERNAL 0x00 /* PHY and MAC are in the same package */
+#define XCVR_EXTERNAL 0x01 /* PHY and MAC are in different packages */
+#define XCVR_DUMMY1 0x02
+#define XCVR_DUMMY2 0x03
+#define XCVR_DUMMY3 0x04
+
+/* Enable or disable autonegotiation. */
+#define AUTONEG_DISABLE 0x00
+#define AUTONEG_ENABLE 0x01
+
+/* MDI or MDI-X status/control - if MDI/MDI_X/AUTO is set then
+ * the driver is required to renegotiate link
+ */
+#define ETH_TP_MDI_INVALID 0x00 /* status: unknown; control: unsupported */
+#define ETH_TP_MDI 0x01 /* status: MDI; control: force MDI */
+#define ETH_TP_MDI_X 0x02 /* status: MDI-X; control: force MDI-X */
+#define ETH_TP_MDI_AUTO 0x03 /* control: auto-select */
+
+/* Wake-On-Lan options. */
+#define WAKE_PHY (1 << 0)
+#define WAKE_UCAST (1 << 1)
+#define WAKE_MCAST (1 << 2)
+#define WAKE_BCAST (1 << 3)
+#define WAKE_ARP (1 << 4)
+#define WAKE_MAGIC (1 << 5)
+#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */
+#define WAKE_FILTER (1 << 7)
+
+#define WOL_MODE_COUNT 8
+
+/* L2-L4 network traffic flow types */
+#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */
+#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */
+#define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */
+#define AH_ESP_V4_FLOW 0x04 /* hash only */
+#define TCP_V6_FLOW 0x05 /* hash or spec (tcp_ip6_spec; nfc only) */
+#define UDP_V6_FLOW 0x06 /* hash or spec (udp_ip6_spec; nfc only) */
+#define SCTP_V6_FLOW 0x07 /* hash or spec (sctp_ip6_spec; nfc only) */
+#define AH_ESP_V6_FLOW 0x08 /* hash only */
+#define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */
+#define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */
+#define AH_V6_FLOW 0x0b /* hash or spec (ah_ip6_spec; nfc only) */
+#define ESP_V6_FLOW 0x0c /* hash or spec (esp_ip6_spec; nfc only) */
+#define IPV4_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */
+#define IP_USER_FLOW IPV4_USER_FLOW
+#define IPV6_USER_FLOW 0x0e /* spec only (usr_ip6_spec; nfc only) */
+#define IPV4_FLOW 0x10 /* hash only */
+#define IPV6_FLOW 0x11 /* hash only */
+#define ETHER_FLOW 0x12 /* spec only (ether_spec) */
+/* Flag to enable additional fields in struct ethtool_rx_flow_spec */
+#define FLOW_EXT 0x80000000
+#define FLOW_MAC_EXT 0x40000000
+/* Flag to enable RSS spreading of traffic matching rule (nfc only) */
+#define FLOW_RSS 0x20000000
+
+/* L3-L4 network traffic flow hash options */
+#define RXH_L2DA (1 << 1)
+#define RXH_VLAN (1 << 2)
+#define RXH_L3_PROTO (1 << 3)
+#define RXH_IP_SRC (1 << 4)
+#define RXH_IP_DST (1 << 5)
+#define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */
+#define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */
+#define RXH_DISCARD (1 << 31)
+
+#define RX_CLS_FLOW_DISC 0xffffffffffffffffULL
+#define RX_CLS_FLOW_WAKE 0xfffffffffffffffeULL
+
+/* Special RX classification rule insert location values */
+#define RX_CLS_LOC_SPECIAL 0x80000000 /* flag */
+#define RX_CLS_LOC_ANY 0xffffffff
+#define RX_CLS_LOC_FIRST 0xfffffffe
+#define RX_CLS_LOC_LAST 0xfffffffd
+
+/* EEPROM Standards for plug in modules */
+#define ETH_MODULE_SFF_8079 0x1
+#define ETH_MODULE_SFF_8079_LEN 256
+#define ETH_MODULE_SFF_8472 0x2
+#define ETH_MODULE_SFF_8472_LEN 512
+#define ETH_MODULE_SFF_8636 0x3
+#define ETH_MODULE_SFF_8636_LEN 256
+#define ETH_MODULE_SFF_8436 0x4
+#define ETH_MODULE_SFF_8436_LEN 256
+
+#define ETH_MODULE_SFF_8636_MAX_LEN 640
+#define ETH_MODULE_SFF_8436_MAX_LEN 640
+
+/* Reset flags */
+/* The reset() operation must clear the flags for the components which
+ * were actually reset. On successful return, the flags indicate the
+ * components which were not reset, either because they do not exist
+ * in the hardware or because they cannot be reset independently. The
+ * driver must never reset any components that were not requested.
+ */
+enum ethtool_reset_flags {
+ /* These flags represent components dedicated to the interface
+ * the command is addressed to. Shift any flag left by
+ * ETH_RESET_SHARED_SHIFT to reset a shared component of the
+ * same type.
+ */
+ ETH_RESET_MGMT = 1 << 0, /* Management processor */
+ ETH_RESET_IRQ = 1 << 1, /* Interrupt requester */
+ ETH_RESET_DMA = 1 << 2, /* DMA engine */
+ ETH_RESET_FILTER = 1 << 3, /* Filtering/flow direction */
+ ETH_RESET_OFFLOAD = 1 << 4, /* Protocol offload */
+ ETH_RESET_MAC = 1 << 5, /* Media access controller */
+ ETH_RESET_PHY = 1 << 6, /* Transceiver/PHY */
+ ETH_RESET_RAM = 1 << 7, /* RAM shared between
+ * multiple components */
+ ETH_RESET_AP = 1 << 8, /* Application processor */
+
+ ETH_RESET_DEDICATED = 0x0000ffff, /* All components dedicated to
+ * this interface */
+ ETH_RESET_ALL = 0xffffffff, /* All components used by this
+ * interface, even if shared */
+};
+#define ETH_RESET_SHARED_SHIFT 16
+
+
+/**
+ * struct ethtool_link_settings - link control and status
+ *
+ * IMPORTANT, Backward compatibility notice: When implementing new
+ * user-space tools, please first try %ETHTOOL_GLINKSETTINGS, and
+ * if it succeeds use %ETHTOOL_SLINKSETTINGS to change link
+ * settings; do not use %ETHTOOL_SSET if %ETHTOOL_GLINKSETTINGS
+ * succeeded: stick to %ETHTOOL_GLINKSETTINGS/%SLINKSETTINGS in
+ * that case. Conversely, if %ETHTOOL_GLINKSETTINGS fails, use
+ * %ETHTOOL_GSET to query and %ETHTOOL_SSET to change link
+ * settings; do not use %ETHTOOL_SLINKSETTINGS if
+ * %ETHTOOL_GLINKSETTINGS failed: stick to
+ * %ETHTOOL_GSET/%ETHTOOL_SSET in that case.
+ *
+ * @cmd: Command number = %ETHTOOL_GLINKSETTINGS or %ETHTOOL_SLINKSETTINGS
+ * @speed: Link speed (Mbps)
+ * @duplex: Duplex mode; one of %DUPLEX_*
+ * @port: Physical connector type; one of %PORT_*
+ * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not
+ * applicable. For clause 45 PHYs this is the PRTAD.
+ * @autoneg: Enable/disable autonegotiation and auto-detection;
+ * either %AUTONEG_DISABLE or %AUTONEG_ENABLE
+ * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO
+ * protocols supported by the interface; 0 if unknown.
+ * Read-only.
+ * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of
+ * %ETH_TP_MDI_*. If the status is unknown or not applicable, the
+ * value will be %ETH_TP_MDI_INVALID. Read-only.
+ * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of
+ * %ETH_TP_MDI_*. If MDI(-X) control is not implemented, reads
+ * yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected.
+ * When written successfully, the link should be renegotiated if
+ * necessary.
+ * @link_mode_masks_nwords: Number of 32-bit words for each of the
+ * supported, advertising, lp_advertising link mode bitmaps. For
+ * %ETHTOOL_GLINKSETTINGS: on entry, number of words passed by user
+ * (>= 0); on return, if handshake in progress, negative if
+ * request size unsupported by kernel: absolute value indicates
+ * kernel expected size and all the other fields but cmd
+ * are 0; otherwise (handshake completed), strictly positive
+ * to indicate size used by kernel and cmd field stays
+ * %ETHTOOL_GLINKSETTINGS, all other fields populated by driver. For
+ * %ETHTOOL_SLINKSETTINGS: must be valid on entry, ie. a positive
+ * value returned previously by %ETHTOOL_GLINKSETTINGS, otherwise
+ * refused. For drivers: ignore this field (use kernel's
+ * __ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will
+ * be overwritten by kernel.
+ * @supported: Bitmap with each bit meaning given by
+ * %ethtool_link_mode_bit_indices for the link modes, physical
+ * connectors and other link features for which the interface
+ * supports autonegotiation or auto-detection. Read-only.
+ * @advertising: Bitmap with each bit meaning given by
+ * %ethtool_link_mode_bit_indices for the link modes, physical
+ * connectors and other link features that are advertised through
+ * autonegotiation or enabled for auto-detection.
+ * @lp_advertising: Bitmap with each bit meaning given by
+ * %ethtool_link_mode_bit_indices for the link modes, and other
+ * link features that the link partner advertised through
+ * autonegotiation; 0 if unknown or not applicable. Read-only.
+ * @transceiver: Used to distinguish different possible PHY types,
+ * reported consistently by PHYLIB. Read-only.
+ *
+ * If autonegotiation is disabled, the speed and @duplex represent the
+ * fixed link mode and are writable if the driver supports multiple
+ * link modes. If it is enabled then they are read-only; if the link
+ * is up they represent the negotiated link mode; if the link is down,
+ * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and
+ * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode.
+ *
+ * Some hardware interfaces may have multiple PHYs and/or physical
+ * connectors fitted or do not allow the driver to detect which are
+ * fitted. For these interfaces @port and/or @phy_address may be
+ * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE.
+ * Otherwise, attempts to write different values may be ignored or
+ * rejected.
+ *
+ * Deprecated %ethtool_cmd fields transceiver, maxtxpkt and maxrxpkt
+ * are not available in %ethtool_link_settings. These fields will be
+ * always set to zero in %ETHTOOL_GSET reply and %ETHTOOL_SSET will
+ * fail if any of them is set to non-zero value.
+ *
+ * Users should assume that all fields not marked read-only are
+ * writable and subject to validation by the driver. They should use
+ * %ETHTOOL_GLINKSETTINGS to get the current values before making specific
+ * changes and then applying them with %ETHTOOL_SLINKSETTINGS.
+ *
+ * Drivers that implement %get_link_ksettings and/or
+ * %set_link_ksettings should ignore the @cmd
+ * and @link_mode_masks_nwords fields (any change to them overwritten
+ * by kernel), and rely only on kernel's internal
+ * %__ETHTOOL_LINK_MODE_MASK_NBITS and
+ * %ethtool_link_mode_mask_t. Drivers that implement
+ * %set_link_ksettings() should validate all fields other than @cmd
+ * and @link_mode_masks_nwords that are not described as read-only or
+ * deprecated, and must ignore all fields described as read-only.
+ */
+struct ethtool_link_settings {
+ __u32 cmd;
+ __u32 speed;
+ __u8 duplex;
+ __u8 port;
+ __u8 phy_address;
+ __u8 autoneg;
+ __u8 mdio_support;
+ __u8 eth_tp_mdix;
+ __u8 eth_tp_mdix_ctrl;
+ __s8 link_mode_masks_nwords;
+ __u8 transceiver;
+ __u8 master_slave_cfg;
+ __u8 master_slave_state;
+ __u8 reserved1[1];
+ __u32 reserved[7];
+ __u32 link_mode_masks[0];
+ /* layout of link_mode_masks fields:
+ * __u32 map_supported[link_mode_masks_nwords];
+ * __u32 map_advertising[link_mode_masks_nwords];
+ * __u32 map_lp_advertising[link_mode_masks_nwords];
+ */
+};
+#endif /* _UAPI_LINUX_ETHTOOL_H */
diff --git a/src/shared/linux/nl80211.h b/src/shared/linux/nl80211.h
new file mode 100644
index 0000000..65edfff
--- /dev/null
+++ b/src/shared/linux/nl80211.h
@@ -0,0 +1,6554 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __LINUX_NL80211_H
+#define __LINUX_NL80211_H
+/*
+ * 802.11 netlink interface public header
+ *
+ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2008 Michael Wu <flamingice@sourmilk.net>
+ * Copyright 2008 Luis Carlos Cobo <luisca@cozybit.com>
+ * Copyright 2008 Michael Buesch <m@bues.ch>
+ * Copyright 2008, 2009 Luis R. Rodriguez <lrodriguez@atheros.com>
+ * Copyright 2008 Jouni Malinen <jouni.malinen@atheros.com>
+ * Copyright 2008 Colin McCabe <colin@cozybit.com>
+ * Copyright 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018-2019 Intel Corporation
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+/*
+ * This header file defines the userspace API to the wireless stack. Please
+ * be careful not to break things - i.e. don't move anything around or so
+ * unless you can demonstrate that it breaks neither API nor ABI.
+ *
+ * Additions to the API should be accompanied by actual implementations in
+ * an upstream driver, so that example implementations exist in case there
+ * are ever concerns about the precise semantics of the API or changes are
+ * needed, and to ensure that code for dead (no longer implemented) API
+ * can actually be identified and removed.
+ * Nonetheless, semantics should also be documented carefully in this file.
+ */
+
+#include <linux/types.h>
+
+#define NL80211_GENL_NAME "nl80211"
+
+#define NL80211_MULTICAST_GROUP_CONFIG "config"
+#define NL80211_MULTICAST_GROUP_SCAN "scan"
+#define NL80211_MULTICAST_GROUP_REG "regulatory"
+#define NL80211_MULTICAST_GROUP_MLME "mlme"
+#define NL80211_MULTICAST_GROUP_VENDOR "vendor"
+#define NL80211_MULTICAST_GROUP_NAN "nan"
+#define NL80211_MULTICAST_GROUP_TESTMODE "testmode"
+
+#define NL80211_EDMG_BW_CONFIG_MIN 4
+#define NL80211_EDMG_BW_CONFIG_MAX 15
+#define NL80211_EDMG_CHANNELS_MIN 1
+#define NL80211_EDMG_CHANNELS_MAX 0x3c /* 0b00111100 */
+
+/**
+ * DOC: Station handling
+ *
+ * Stations are added per interface, but a special case exists with VLAN
+ * interfaces. When a station is bound to an AP interface, it may be moved
+ * into a VLAN identified by a VLAN interface index (%NL80211_ATTR_STA_VLAN).
+ * The station is still assumed to belong to the AP interface it was added
+ * to.
+ *
+ * Station handling varies per interface type and depending on the driver's
+ * capabilities.
+ *
+ * For drivers supporting TDLS with external setup (WIPHY_FLAG_SUPPORTS_TDLS
+ * and WIPHY_FLAG_TDLS_EXTERNAL_SETUP), the station lifetime is as follows:
+ * - a setup station entry is added, not yet authorized, without any rate
+ * or capability information, this just exists to avoid race conditions
+ * - when the TDLS setup is done, a single NL80211_CMD_SET_STATION is valid
+ * to add rate and capability information to the station and at the same
+ * time mark it authorized.
+ * - %NL80211_TDLS_ENABLE_LINK is then used
+ * - after this, the only valid operation is to remove it by tearing down
+ * the TDLS link (%NL80211_TDLS_DISABLE_LINK)
+ *
+ * TODO: need more info for other interface types
+ */
+
+/**
+ * DOC: Frame transmission/registration support
+ *
+ * Frame transmission and registration support exists to allow userspace
+ * management entities such as wpa_supplicant react to management frames
+ * that are not being handled by the kernel. This includes, for example,
+ * certain classes of action frames that cannot be handled in the kernel
+ * for various reasons.
+ *
+ * Frame registration is done on a per-interface basis and registrations
+ * cannot be removed other than by closing the socket. It is possible to
+ * specify a registration filter to register, for example, only for a
+ * certain type of action frame. In particular with action frames, those
+ * that userspace registers for will not be returned as unhandled by the
+ * driver, so that the registered application has to take responsibility
+ * for doing that.
+ *
+ * The type of frame that can be registered for is also dependent on the
+ * driver and interface type. The frame types are advertised in wiphy
+ * attributes so applications know what to expect.
+ *
+ * NOTE: When an interface changes type while registrations are active,
+ * these registrations are ignored until the interface type is
+ * changed again. This means that changing the interface type can
+ * lead to a situation that couldn't otherwise be produced, but
+ * any such registrations will be dormant in the sense that they
+ * will not be serviced, i.e. they will not receive any frames.
+ *
+ * Frame transmission allows userspace to send for example the required
+ * responses to action frames. It is subject to some sanity checking,
+ * but many frames can be transmitted. When a frame was transmitted, its
+ * status is indicated to the sending socket.
+ *
+ * For more technical details, see the corresponding command descriptions
+ * below.
+ */
+
+/**
+ * DOC: Virtual interface / concurrency capabilities
+ *
+ * Some devices are able to operate with virtual MACs, they can have
+ * more than one virtual interface. The capability handling for this
+ * is a bit complex though, as there may be a number of restrictions
+ * on the types of concurrency that are supported.
+ *
+ * To start with, each device supports the interface types listed in
+ * the %NL80211_ATTR_SUPPORTED_IFTYPES attribute, but by listing the
+ * types there no concurrency is implied.
+ *
+ * Once concurrency is desired, more attributes must be observed:
+ * To start with, since some interface types are purely managed in
+ * software, like the AP-VLAN type in mac80211 for example, there's
+ * an additional list of these, they can be added at any time and
+ * are only restricted by some semantic restrictions (e.g. AP-VLAN
+ * cannot be added without a corresponding AP interface). This list
+ * is exported in the %NL80211_ATTR_SOFTWARE_IFTYPES attribute.
+ *
+ * Further, the list of supported combinations is exported. This is
+ * in the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute. Basically,
+ * it exports a list of "groups", and at any point in time the
+ * interfaces that are currently active must fall into any one of
+ * the advertised groups. Within each group, there are restrictions
+ * on the number of interfaces of different types that are supported
+ * and also the number of different channels, along with potentially
+ * some other restrictions. See &enum nl80211_if_combination_attrs.
+ *
+ * All together, these attributes define the concurrency of virtual
+ * interfaces that a given device supports.
+ */
+
+/**
+ * DOC: packet coalesce support
+ *
+ * In most cases, host that receives IPv4 and IPv6 multicast/broadcast
+ * packets does not do anything with these packets. Therefore the
+ * reception of these unwanted packets causes unnecessary processing
+ * and power consumption.
+ *
+ * Packet coalesce feature helps to reduce number of received interrupts
+ * to host by buffering these packets in firmware/hardware for some
+ * predefined time. Received interrupt will be generated when one of the
+ * following events occur.
+ * a) Expiration of hardware timer whose expiration time is set to maximum
+ * coalescing delay of matching coalesce rule.
+ * b) Coalescing buffer in hardware reaches it's limit.
+ * c) Packet doesn't match any of the configured coalesce rules.
+ *
+ * User needs to configure following parameters for creating a coalesce
+ * rule.
+ * a) Maximum coalescing delay
+ * b) List of packet patterns which needs to be matched
+ * c) Condition for coalescence. pattern 'match' or 'no match'
+ * Multiple such rules can be created.
+ */
+
+/**
+ * DOC: WPA/WPA2 EAPOL handshake offload
+ *
+ * By setting @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK flag drivers
+ * can indicate they support offloading EAPOL handshakes for WPA/WPA2
+ * preshared key authentication. In %NL80211_CMD_CONNECT the preshared
+ * key should be specified using %NL80211_ATTR_PMK. Drivers supporting
+ * this offload may reject the %NL80211_CMD_CONNECT when no preshared
+ * key material is provided, for example when that driver does not
+ * support setting the temporal keys through %CMD_NEW_KEY.
+ *
+ * Similarly @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X flag can be
+ * set by drivers indicating offload support of the PTK/GTK EAPOL
+ * handshakes during 802.1X authentication. In order to use the offload
+ * the %NL80211_CMD_CONNECT should have %NL80211_ATTR_WANT_1X_4WAY_HS
+ * attribute flag. Drivers supporting this offload may reject the
+ * %NL80211_CMD_CONNECT when the attribute flag is not present.
+ *
+ * For 802.1X the PMK or PMK-R0 are set by providing %NL80211_ATTR_PMK
+ * using %NL80211_CMD_SET_PMK. For offloaded FT support also
+ * %NL80211_ATTR_PMKR0_NAME must be provided.
+ */
+
+/**
+ * DOC: FILS shared key authentication offload
+ *
+ * FILS shared key authentication offload can be advertized by drivers by
+ * setting @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD flag. The drivers that support
+ * FILS shared key authentication offload should be able to construct the
+ * authentication and association frames for FILS shared key authentication and
+ * eventually do a key derivation as per IEEE 802.11ai. The below additional
+ * parameters should be given to driver in %NL80211_CMD_CONNECT and/or in
+ * %NL80211_CMD_UPDATE_CONNECT_PARAMS.
+ * %NL80211_ATTR_FILS_ERP_USERNAME - used to construct keyname_nai
+ * %NL80211_ATTR_FILS_ERP_REALM - used to construct keyname_nai
+ * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used to construct erp message
+ * %NL80211_ATTR_FILS_ERP_RRK - used to generate the rIK and rMSK
+ * rIK should be used to generate an authentication tag on the ERP message and
+ * rMSK should be used to derive a PMKSA.
+ * rIK, rMSK should be generated and keyname_nai, sequence number should be used
+ * as specified in IETF RFC 6696.
+ *
+ * When FILS shared key authentication is completed, driver needs to provide the
+ * below additional parameters to userspace, which can be either after setting
+ * up a connection or after roaming.
+ * %NL80211_ATTR_FILS_KEK - used for key renewal
+ * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used in further EAP-RP exchanges
+ * %NL80211_ATTR_PMKID - used to identify the PMKSA used/generated
+ * %Nl80211_ATTR_PMK - used to update PMKSA cache in userspace
+ * The PMKSA can be maintained in userspace persistently so that it can be used
+ * later after reboots or wifi turn off/on also.
+ *
+ * %NL80211_ATTR_FILS_CACHE_ID is the cache identifier advertized by a FILS
+ * capable AP supporting PMK caching. It specifies the scope within which the
+ * PMKSAs are cached in an ESS. %NL80211_CMD_SET_PMKSA and
+ * %NL80211_CMD_DEL_PMKSA are enhanced to allow support for PMKSA caching based
+ * on FILS cache identifier. Additionally %NL80211_ATTR_PMK is used with
+ * %NL80211_SET_PMKSA to specify the PMK corresponding to a PMKSA for driver to
+ * use in a FILS shared key connection with PMKSA caching.
+ */
+
+/**
+ * DOC: SAE authentication offload
+ *
+ * By setting @NL80211_EXT_FEATURE_SAE_OFFLOAD flag drivers can indicate they
+ * support offloading SAE authentication for WPA3-Personal networks. In
+ * %NL80211_CMD_CONNECT the password for SAE should be specified using
+ * %NL80211_ATTR_SAE_PASSWORD.
+ */
+
+/**
+ * enum nl80211_commands - supported nl80211 commands
+ *
+ * @NL80211_CMD_UNSPEC: unspecified command to catch errors
+ *
+ * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request
+ * to get a list of all present wiphys.
+ * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or
+ * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME,
+ * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ (and the
+ * attributes determining the channel width; this is used for setting
+ * monitor mode channel), %NL80211_ATTR_WIPHY_RETRY_SHORT,
+ * %NL80211_ATTR_WIPHY_RETRY_LONG, %NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
+ * and/or %NL80211_ATTR_WIPHY_RTS_THRESHOLD.
+ * However, for setting the channel, see %NL80211_CMD_SET_CHANNEL
+ * instead, the support here is for backward compatibility only.
+ * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request
+ * or rename notification. Has attributes %NL80211_ATTR_WIPHY and
+ * %NL80211_ATTR_WIPHY_NAME.
+ * @NL80211_CMD_DEL_WIPHY: Wiphy deleted. Has attributes
+ * %NL80211_ATTR_WIPHY and %NL80211_ATTR_WIPHY_NAME.
+ *
+ * @NL80211_CMD_GET_INTERFACE: Request an interface's configuration;
+ * either a dump request for all interfaces or a specific get with a
+ * single %NL80211_ATTR_IFINDEX is supported.
+ * @NL80211_CMD_SET_INTERFACE: Set type of a virtual interface, requires
+ * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_IFTYPE.
+ * @NL80211_CMD_NEW_INTERFACE: Newly created virtual interface or response
+ * to %NL80211_CMD_GET_INTERFACE. Has %NL80211_ATTR_IFINDEX,
+ * %NL80211_ATTR_WIPHY and %NL80211_ATTR_IFTYPE attributes. Can also
+ * be sent from userspace to request creation of a new virtual interface,
+ * then requires attributes %NL80211_ATTR_WIPHY, %NL80211_ATTR_IFTYPE and
+ * %NL80211_ATTR_IFNAME.
+ * @NL80211_CMD_DEL_INTERFACE: Virtual interface was deleted, has attributes
+ * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_WIPHY. Can also be sent from
+ * userspace to request deletion of a virtual interface, then requires
+ * attribute %NL80211_ATTR_IFINDEX.
+ *
+ * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified
+ * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC.
+ * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT,
+ * %NL80211_ATTR_KEY_DEFAULT_MGMT, or %NL80211_ATTR_KEY_THRESHOLD.
+ * @NL80211_CMD_NEW_KEY: add a key with given %NL80211_ATTR_KEY_DATA,
+ * %NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC, %NL80211_ATTR_KEY_CIPHER,
+ * and %NL80211_ATTR_KEY_SEQ attributes.
+ * @NL80211_CMD_DEL_KEY: delete a key identified by %NL80211_ATTR_KEY_IDX
+ * or %NL80211_ATTR_MAC.
+ *
+ * @NL80211_CMD_GET_BEACON: (not used)
+ * @NL80211_CMD_SET_BEACON: change the beacon on an access point interface
+ * using the %NL80211_ATTR_BEACON_HEAD and %NL80211_ATTR_BEACON_TAIL
+ * attributes. For drivers that generate the beacon and probe responses
+ * internally, the following attributes must be provided: %NL80211_ATTR_IE,
+ * %NL80211_ATTR_IE_PROBE_RESP and %NL80211_ATTR_IE_ASSOC_RESP.
+ * @NL80211_CMD_START_AP: Start AP operation on an AP interface, parameters
+ * are like for %NL80211_CMD_SET_BEACON, and additionally parameters that
+ * do not change are used, these include %NL80211_ATTR_BEACON_INTERVAL,
+ * %NL80211_ATTR_DTIM_PERIOD, %NL80211_ATTR_SSID,
+ * %NL80211_ATTR_HIDDEN_SSID, %NL80211_ATTR_CIPHERS_PAIRWISE,
+ * %NL80211_ATTR_CIPHER_GROUP, %NL80211_ATTR_WPA_VERSIONS,
+ * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY,
+ * %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_INACTIVITY_TIMEOUT,
+ * %NL80211_ATTR_ACL_POLICY and %NL80211_ATTR_MAC_ADDRS.
+ * The channel to use can be set on the interface or be given using the
+ * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel width.
+ * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP
+ * @NL80211_CMD_STOP_AP: Stop AP operation on the given interface
+ * @NL80211_CMD_DEL_BEACON: old alias for %NL80211_CMD_STOP_AP
+ *
+ * @NL80211_CMD_GET_STATION: Get station attributes for station identified by
+ * %NL80211_ATTR_MAC on the interface identified by %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_SET_STATION: Set station attributes for station identified by
+ * %NL80211_ATTR_MAC on the interface identified by %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_NEW_STATION: Add a station with given attributes to the
+ * the interface identified by %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_DEL_STATION: Remove a station identified by %NL80211_ATTR_MAC
+ * or, if no MAC address given, all stations, on the interface identified
+ * by %NL80211_ATTR_IFINDEX. %NL80211_ATTR_MGMT_SUBTYPE and
+ * %NL80211_ATTR_REASON_CODE can optionally be used to specify which type
+ * of disconnection indication should be sent to the station
+ * (Deauthentication or Disassociation frame and reason code for that
+ * frame).
+ *
+ * @NL80211_CMD_GET_MPATH: Get mesh path attributes for mesh path to
+ * destination %NL80211_ATTR_MAC on the interface identified by
+ * %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_SET_MPATH: Set mesh path attributes for mesh path to
+ * destination %NL80211_ATTR_MAC on the interface identified by
+ * %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_NEW_MPATH: Create a new mesh path for the destination given by
+ * %NL80211_ATTR_MAC via %NL80211_ATTR_MPATH_NEXT_HOP.
+ * @NL80211_CMD_DEL_MPATH: Delete a mesh path to the destination given by
+ * %NL80211_ATTR_MAC.
+ * @NL80211_CMD_NEW_PATH: Add a mesh path with given attributes to the
+ * the interface identified by %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC
+ * or, if no MAC address given, all mesh paths, on the interface identified
+ * by %NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_SET_BSS: Set BSS attributes for BSS identified by
+ * %NL80211_ATTR_IFINDEX.
+ *
+ * @NL80211_CMD_GET_REG: ask the wireless core to send us its currently set
+ * regulatory domain. If %NL80211_ATTR_WIPHY is specified and the device
+ * has a private regulatory domain, it will be returned. Otherwise, the
+ * global regdomain will be returned.
+ * A device will have a private regulatory domain if it uses the
+ * regulatory_hint() API. Even when a private regdomain is used the channel
+ * information will still be mended according to further hints from
+ * the regulatory core to help with compliance. A dump version of this API
+ * is now available which will returns the global regdomain as well as
+ * all private regdomains of present wiphys (for those that have it).
+ * If a wiphy is self-managed (%NL80211_ATTR_WIPHY_SELF_MANAGED_REG), then
+ * its private regdomain is the only valid one for it. The regulatory
+ * core is not used to help with compliance in this case.
+ * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command
+ * after being queried by the kernel. CRDA replies by sending a regulatory
+ * domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our
+ * current alpha2 if it found a match. It also provides
+ * NL80211_ATTR_REG_RULE_FLAGS, and a set of regulatory rules. Each
+ * regulatory rule is a nested set of attributes given by
+ * %NL80211_ATTR_REG_RULE_FREQ_[START|END] and
+ * %NL80211_ATTR_FREQ_RANGE_MAX_BW with an attached power rule given by
+ * %NL80211_ATTR_REG_RULE_POWER_MAX_ANT_GAIN and
+ * %NL80211_ATTR_REG_RULE_POWER_MAX_EIRP.
+ * @NL80211_CMD_REQ_SET_REG: ask the wireless core to set the regulatory domain
+ * to the specified ISO/IEC 3166-1 alpha2 country code. The core will
+ * store this as a valid request and then query userspace for it.
+ *
+ * @NL80211_CMD_GET_MESH_CONFIG: Get mesh networking properties for the
+ * interface identified by %NL80211_ATTR_IFINDEX
+ *
+ * @NL80211_CMD_SET_MESH_CONFIG: Set mesh networking properties for the
+ * interface identified by %NL80211_ATTR_IFINDEX
+ *
+ * @NL80211_CMD_SET_MGMT_EXTRA_IE: Set extra IEs for management frames. The
+ * interface is identified with %NL80211_ATTR_IFINDEX and the management
+ * frame subtype with %NL80211_ATTR_MGMT_SUBTYPE. The extra IE data to be
+ * added to the end of the specified management frame is specified with
+ * %NL80211_ATTR_IE. If the command succeeds, the requested data will be
+ * added to all specified management frames generated by
+ * kernel/firmware/driver.
+ * Note: This command has been removed and it is only reserved at this
+ * point to avoid re-using existing command number. The functionality this
+ * command was planned for has been provided with cleaner design with the
+ * option to specify additional IEs in NL80211_CMD_TRIGGER_SCAN,
+ * NL80211_CMD_AUTHENTICATE, NL80211_CMD_ASSOCIATE,
+ * NL80211_CMD_DEAUTHENTICATE, and NL80211_CMD_DISASSOCIATE.
+ *
+ * @NL80211_CMD_GET_SCAN: get scan results
+ * @NL80211_CMD_TRIGGER_SCAN: trigger a new scan with the given parameters
+ * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the
+ * probe requests at CCK rate or not. %NL80211_ATTR_BSSID can be used to
+ * specify a BSSID to scan for; if not included, the wildcard BSSID will
+ * be used.
+ * @NL80211_CMD_NEW_SCAN_RESULTS: scan notification (as a reply to
+ * NL80211_CMD_GET_SCAN and on the "scan" multicast group)
+ * @NL80211_CMD_SCAN_ABORTED: scan was aborted, for unspecified reasons,
+ * partial scan results may be available
+ *
+ * @NL80211_CMD_START_SCHED_SCAN: start a scheduled scan at certain
+ * intervals and certain number of cycles, as specified by
+ * %NL80211_ATTR_SCHED_SCAN_PLANS. If %NL80211_ATTR_SCHED_SCAN_PLANS is
+ * not specified and only %NL80211_ATTR_SCHED_SCAN_INTERVAL is specified,
+ * scheduled scan will run in an infinite loop with the specified interval.
+ * These attributes are mutually exculsive,
+ * i.e. NL80211_ATTR_SCHED_SCAN_INTERVAL must not be passed if
+ * NL80211_ATTR_SCHED_SCAN_PLANS is defined.
+ * If for some reason scheduled scan is aborted by the driver, all scan
+ * plans are canceled (including scan plans that did not start yet).
+ * Like with normal scans, if SSIDs (%NL80211_ATTR_SCAN_SSIDS)
+ * are passed, they are used in the probe requests. For
+ * broadcast, a broadcast SSID must be passed (ie. an empty
+ * string). If no SSID is passed, no probe requests are sent and
+ * a passive scan is performed. %NL80211_ATTR_SCAN_FREQUENCIES,
+ * if passed, define which channels should be scanned; if not
+ * passed, all channels allowed for the current regulatory domain
+ * are used. Extra IEs can also be passed from the userspace by
+ * using the %NL80211_ATTR_IE attribute. The first cycle of the
+ * scheduled scan can be delayed by %NL80211_ATTR_SCHED_SCAN_DELAY
+ * is supplied. If the device supports multiple concurrent scheduled
+ * scans, it will allow such when the caller provides the flag attribute
+ * %NL80211_ATTR_SCHED_SCAN_MULTI to indicate user-space support for it.
+ * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan. Returns -ENOENT if
+ * scheduled scan is not running. The caller may assume that as soon
+ * as the call returns, it is safe to start a new scheduled scan again.
+ * @NL80211_CMD_SCHED_SCAN_RESULTS: indicates that there are scheduled scan
+ * results available.
+ * @NL80211_CMD_SCHED_SCAN_STOPPED: indicates that the scheduled scan has
+ * stopped. The driver may issue this event at any time during a
+ * scheduled scan. One reason for stopping the scan is if the hardware
+ * does not support starting an association or a normal scan while running
+ * a scheduled scan. This event is also sent when the
+ * %NL80211_CMD_STOP_SCHED_SCAN command is received or when the interface
+ * is brought down while a scheduled scan was running.
+ *
+ * @NL80211_CMD_GET_SURVEY: get survey resuls, e.g. channel occupation
+ * or noise level
+ * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to
+ * NL80211_CMD_GET_SURVEY and on the "scan" multicast group)
+ *
+ * @NL80211_CMD_SET_PMKSA: Add a PMKSA cache entry using %NL80211_ATTR_MAC
+ * (for the BSSID), %NL80211_ATTR_PMKID, and optionally %NL80211_ATTR_PMK
+ * (PMK is used for PTKSA derivation in case of FILS shared key offload) or
+ * using %NL80211_ATTR_SSID, %NL80211_ATTR_FILS_CACHE_ID,
+ * %NL80211_ATTR_PMKID, and %NL80211_ATTR_PMK in case of FILS
+ * authentication where %NL80211_ATTR_FILS_CACHE_ID is the identifier
+ * advertized by a FILS capable AP identifying the scope of PMKSA in an
+ * ESS.
+ * @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC
+ * (for the BSSID) and %NL80211_ATTR_PMKID or using %NL80211_ATTR_SSID,
+ * %NL80211_ATTR_FILS_CACHE_ID, and %NL80211_ATTR_PMKID in case of FILS
+ * authentication.
+ * @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries.
+ *
+ * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain
+ * has been changed and provides details of the request information
+ * that caused the change such as who initiated the regulatory request
+ * (%NL80211_ATTR_REG_INITIATOR), the wiphy_idx
+ * (%NL80211_ATTR_REG_ALPHA2) on which the request was made from if
+ * the initiator was %NL80211_REGDOM_SET_BY_COUNTRY_IE or
+ * %NL80211_REGDOM_SET_BY_DRIVER, the type of regulatory domain
+ * set (%NL80211_ATTR_REG_TYPE), if the type of regulatory domain is
+ * %NL80211_REG_TYPE_COUNTRY the alpha2 to which we have moved on
+ * to (%NL80211_ATTR_REG_ALPHA2).
+ * @NL80211_CMD_REG_BEACON_HINT: indicates to userspace that an AP beacon
+ * has been found while world roaming thus enabling active scan or
+ * any mode of operation that initiates TX (beacons) on a channel
+ * where we would not have been able to do either before. As an example
+ * if you are world roaming (regulatory domain set to world or if your
+ * driver is using a custom world roaming regulatory domain) and while
+ * doing a passive scan on the 5 GHz band you find an AP there (if not
+ * on a DFS channel) you will now be able to actively scan for that AP
+ * or use AP mode on your card on that same channel. Note that this will
+ * never be used for channels 1-11 on the 2 GHz band as they are always
+ * enabled world wide. This beacon hint is only sent if your device had
+ * either disabled active scanning or beaconing on a channel. We send to
+ * userspace the wiphy on which we removed a restriction from
+ * (%NL80211_ATTR_WIPHY) and the channel on which this occurred
+ * before (%NL80211_ATTR_FREQ_BEFORE) and after (%NL80211_ATTR_FREQ_AFTER)
+ * the beacon hint was processed.
+ *
+ * @NL80211_CMD_AUTHENTICATE: authentication request and notification.
+ * This command is used both as a command (request to authenticate) and
+ * as an event on the "mlme" multicast group indicating completion of the
+ * authentication process.
+ * When used as a command, %NL80211_ATTR_IFINDEX is used to identify the
+ * interface. %NL80211_ATTR_MAC is used to specify PeerSTAAddress (and
+ * BSSID in case of station mode). %NL80211_ATTR_SSID is used to specify
+ * the SSID (mainly for association, but is included in authentication
+ * request, too, to help BSS selection. %NL80211_ATTR_WIPHY_FREQ is used
+ * to specify the frequence of the channel in MHz. %NL80211_ATTR_AUTH_TYPE
+ * is used to specify the authentication type. %NL80211_ATTR_IE is used to
+ * define IEs (VendorSpecificInfo, but also including RSN IE and FT IEs)
+ * to be added to the frame.
+ * When used as an event, this reports reception of an Authentication
+ * frame in station and IBSS modes when the local MLME processed the
+ * frame, i.e., it was for the local STA and was received in correct
+ * state. This is similar to MLME-AUTHENTICATE.confirm primitive in the
+ * MLME SAP interface (kernel providing MLME, userspace SME). The
+ * included %NL80211_ATTR_FRAME attribute contains the management frame
+ * (including both the header and frame body, but not FCS). This event is
+ * also used to indicate if the authentication attempt timed out. In that
+ * case the %NL80211_ATTR_FRAME attribute is replaced with a
+ * %NL80211_ATTR_TIMED_OUT flag (and %NL80211_ATTR_MAC to indicate which
+ * pending authentication timed out).
+ * @NL80211_CMD_ASSOCIATE: association request and notification; like
+ * NL80211_CMD_AUTHENTICATE but for Association and Reassociation
+ * (similar to MLME-ASSOCIATE.request, MLME-REASSOCIATE.request,
+ * MLME-ASSOCIATE.confirm or MLME-REASSOCIATE.confirm primitives). The
+ * %NL80211_ATTR_PREV_BSSID attribute is used to specify whether the
+ * request is for the initial association to an ESS (that attribute not
+ * included) or for reassociation within the ESS (that attribute is
+ * included).
+ * @NL80211_CMD_DEAUTHENTICATE: deauthentication request and notification; like
+ * NL80211_CMD_AUTHENTICATE but for Deauthentication frames (similar to
+ * MLME-DEAUTHENTICATION.request and MLME-DEAUTHENTICATE.indication
+ * primitives).
+ * @NL80211_CMD_DISASSOCIATE: disassociation request and notification; like
+ * NL80211_CMD_AUTHENTICATE but for Disassociation frames (similar to
+ * MLME-DISASSOCIATE.request and MLME-DISASSOCIATE.indication primitives).
+ *
+ * @NL80211_CMD_MICHAEL_MIC_FAILURE: notification of a locally detected Michael
+ * MIC (part of TKIP) failure; sent on the "mlme" multicast group; the
+ * event includes %NL80211_ATTR_MAC to describe the source MAC address of
+ * the frame with invalid MIC, %NL80211_ATTR_KEY_TYPE to show the key
+ * type, %NL80211_ATTR_KEY_IDX to indicate the key identifier, and
+ * %NL80211_ATTR_KEY_SEQ to indicate the TSC value of the frame; this
+ * event matches with MLME-MICHAELMICFAILURE.indication() primitive
+ *
+ * @NL80211_CMD_JOIN_IBSS: Join a new IBSS -- given at least an SSID and a
+ * FREQ attribute (for the initial frequency if no peer can be found)
+ * and optionally a MAC (as BSSID) and FREQ_FIXED attribute if those
+ * should be fixed rather than automatically determined. Can only be
+ * executed on a network interface that is UP, and fixed BSSID/FREQ
+ * may be rejected. Another optional parameter is the beacon interval,
+ * given in the %NL80211_ATTR_BEACON_INTERVAL attribute, which if not
+ * given defaults to 100 TU (102.4ms).
+ * @NL80211_CMD_LEAVE_IBSS: Leave the IBSS -- no special arguments, the IBSS is
+ * determined by the network interface.
+ *
+ * @NL80211_CMD_TESTMODE: testmode command, takes a wiphy (or ifindex) attribute
+ * to identify the device, and the TESTDATA blob attribute to pass through
+ * to the driver.
+ *
+ * @NL80211_CMD_CONNECT: connection request and notification; this command
+ * requests to connect to a specified network but without separating
+ * auth and assoc steps. For this, you need to specify the SSID in a
+ * %NL80211_ATTR_SSID attribute, and can optionally specify the association
+ * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP,
+ * %NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT,
+ * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
+ * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT,
+ * %NL80211_ATTR_CONTROL_PORT_OVER_NL80211, %NL80211_ATTR_MAC_HINT, and
+ * %NL80211_ATTR_WIPHY_FREQ_HINT.
+ * If included, %NL80211_ATTR_MAC and %NL80211_ATTR_WIPHY_FREQ are
+ * restrictions on BSS selection, i.e., they effectively prevent roaming
+ * within the ESS. %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT
+ * can be included to provide a recommendation of the initial BSS while
+ * allowing the driver to roam to other BSSes within the ESS and also to
+ * ignore this recommendation if the indicated BSS is not ideal. Only one
+ * set of BSSID,frequency parameters is used (i.e., either the enforcing
+ * %NL80211_ATTR_MAC,%NL80211_ATTR_WIPHY_FREQ or the less strict
+ * %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT).
+ * %NL80211_ATTR_PREV_BSSID can be used to request a reassociation within
+ * the ESS in case the device is already associated and an association with
+ * a different BSS is desired.
+ * Background scan period can optionally be
+ * specified in %NL80211_ATTR_BG_SCAN_PERIOD,
+ * if not specified default background scan configuration
+ * in driver is used and if period value is 0, bg scan will be disabled.
+ * This attribute is ignored if driver does not support roam scan.
+ * It is also sent as an event, with the BSSID and response IEs when the
+ * connection is established or failed to be established. This can be
+ * determined by the %NL80211_ATTR_STATUS_CODE attribute (0 = success,
+ * non-zero = failure). If %NL80211_ATTR_TIMED_OUT is included in the
+ * event, the connection attempt failed due to not being able to initiate
+ * authentication/association or not receiving a response from the AP.
+ * Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as
+ * well to remain backwards compatible.
+ * When establishing a security association, drivers that support 4 way
+ * handshake offload should send %NL80211_CMD_PORT_AUTHORIZED event when
+ * the 4 way handshake is completed successfully.
+ * @NL80211_CMD_ROAM: Notification indicating the card/driver roamed by itself.
+ * When a security association was established with the new AP (e.g. if
+ * the FT protocol was used for roaming or the driver completed the 4 way
+ * handshake), this event should be followed by an
+ * %NL80211_CMD_PORT_AUTHORIZED event.
+ * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify
+ * userspace that a connection was dropped by the AP or due to other
+ * reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and
+ * %NL80211_ATTR_REASON_CODE attributes are used.
+ *
+ * @NL80211_CMD_SET_WIPHY_NETNS: Set a wiphy's netns. Note that all devices
+ * associated with this wiphy must be down and will follow.
+ *
+ * @NL80211_CMD_REMAIN_ON_CHANNEL: Request to remain awake on the specified
+ * channel for the specified amount of time. This can be used to do
+ * off-channel operations like transmit a Public Action frame and wait for
+ * a response while being associated to an AP on another channel.
+ * %NL80211_ATTR_IFINDEX is used to specify which interface (and thus
+ * radio) is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the
+ * frequency for the operation.
+ * %NL80211_ATTR_DURATION is used to specify the duration in milliseconds
+ * to remain on the channel. This command is also used as an event to
+ * notify when the requested duration starts (it may take a while for the
+ * driver to schedule this time due to other concurrent needs for the
+ * radio).
+ * When called, this operation returns a cookie (%NL80211_ATTR_COOKIE)
+ * that will be included with any events pertaining to this request;
+ * the cookie is also used to cancel the request.
+ * @NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL: This command can be used to cancel a
+ * pending remain-on-channel duration if the desired operation has been
+ * completed prior to expiration of the originally requested duration.
+ * %NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify the
+ * radio. The %NL80211_ATTR_COOKIE attribute must be given as well to
+ * uniquely identify the request.
+ * This command is also used as an event to notify when a requested
+ * remain-on-channel duration has expired.
+ *
+ * @NL80211_CMD_SET_TX_BITRATE_MASK: Set the mask of rates to be used in TX
+ * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface
+ * and @NL80211_ATTR_TX_RATES the set of allowed rates.
+ *
+ * @NL80211_CMD_REGISTER_FRAME: Register for receiving certain mgmt frames
+ * (via @NL80211_CMD_FRAME) for processing in userspace. This command
+ * requires an interface index, a frame type attribute (optional for
+ * backward compatibility reasons, if not given assumes action frames)
+ * and a match attribute containing the first few bytes of the frame
+ * that should match, e.g. a single byte for only a category match or
+ * four bytes for vendor frames including the OUI. The registration
+ * cannot be dropped, but is removed automatically when the netlink
+ * socket is closed. Multiple registrations can be made.
+ * @NL80211_CMD_REGISTER_ACTION: Alias for @NL80211_CMD_REGISTER_FRAME for
+ * backward compatibility
+ * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This
+ * command is used both as a request to transmit a management frame and
+ * as an event indicating reception of a frame that was not processed in
+ * kernel code, but is for us (i.e., which may need to be processed in a
+ * user space application). %NL80211_ATTR_FRAME is used to specify the
+ * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ is used
+ * to indicate on which channel the frame is to be transmitted or was
+ * received. If this channel is not the current channel (remain-on-channel
+ * or the operational channel) the device will switch to the given channel
+ * and transmit the frame, optionally waiting for a response for the time
+ * specified using %NL80211_ATTR_DURATION. When called, this operation
+ * returns a cookie (%NL80211_ATTR_COOKIE) that will be included with the
+ * TX status event pertaining to the TX request.
+ * %NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the
+ * management frames at CCK rate or not in 2GHz band.
+ * %NL80211_ATTR_CSA_C_OFFSETS_TX is an array of offsets to CSA
+ * counters which will be updated to the current value. This attribute
+ * is used during CSA period.
+ * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this
+ * command may be used with the corresponding cookie to cancel the wait
+ * time if it is known that it is no longer necessary. This command is
+ * also sent as an event whenever the driver has completed the off-channel
+ * wait time.
+ * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility.
+ * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame
+ * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies
+ * the TX command and %NL80211_ATTR_FRAME includes the contents of the
+ * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged
+ * the frame.
+ * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for
+ * backward compatibility.
+ *
+ * @NL80211_CMD_SET_POWER_SAVE: Set powersave, using %NL80211_ATTR_PS_STATE
+ * @NL80211_CMD_GET_POWER_SAVE: Get powersave status in %NL80211_ATTR_PS_STATE
+ *
+ * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command
+ * is used to configure connection quality monitoring notification trigger
+ * levels.
+ * @NL80211_CMD_NOTIFY_CQM: Connection quality monitor notification. This
+ * command is used as an event to indicate the that a trigger level was
+ * reached.
+ * @NL80211_CMD_SET_CHANNEL: Set the channel (using %NL80211_ATTR_WIPHY_FREQ
+ * and the attributes determining channel width) the given interface
+ * (identifed by %NL80211_ATTR_IFINDEX) shall operate on.
+ * In case multiple channels are supported by the device, the mechanism
+ * with which it switches channels is implementation-defined.
+ * When a monitor interface is given, it can only switch channel while
+ * no other interfaces are operating to avoid disturbing the operation
+ * of any other interfaces, and other interfaces will again take
+ * precedence when they are used.
+ *
+ * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface.
+ *
+ * @NL80211_CMD_SET_MULTICAST_TO_UNICAST: Configure if this AP should perform
+ * multicast to unicast conversion. When enabled, all multicast packets
+ * with ethertype ARP, IPv4 or IPv6 (possibly within an 802.1Q header)
+ * will be sent out to each station once with the destination (multicast)
+ * MAC address replaced by the station's MAC address. Note that this may
+ * break certain expectations of the receiver, e.g. the ability to drop
+ * unicast IP packets encapsulated in multicast L2 frames, or the ability
+ * to not send destination unreachable messages in such cases.
+ * This can only be toggled per BSS. Configure this on an interface of
+ * type %NL80211_IFTYPE_AP. It applies to all its VLAN interfaces
+ * (%NL80211_IFTYPE_AP_VLAN), except for those in 4addr (WDS) mode.
+ * If %NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED is not present with this
+ * command, the feature is disabled.
+ *
+ * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial
+ * mesh config parameters may be given.
+ * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the
+ * network is determined by the network interface.
+ *
+ * @NL80211_CMD_UNPROT_DEAUTHENTICATE: Unprotected deauthentication frame
+ * notification. This event is used to indicate that an unprotected
+ * deauthentication frame was dropped when MFP is in use.
+ * @NL80211_CMD_UNPROT_DISASSOCIATE: Unprotected disassociation frame
+ * notification. This event is used to indicate that an unprotected
+ * disassociation frame was dropped when MFP is in use.
+ *
+ * @NL80211_CMD_NEW_PEER_CANDIDATE: Notification on the reception of a
+ * beacon or probe response from a compatible mesh peer. This is only
+ * sent while no station information (sta_info) exists for the new peer
+ * candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH,
+ * @NL80211_MESH_SETUP_USERSPACE_AMPE, or
+ * @NL80211_MESH_SETUP_USERSPACE_MPM is set. On reception of this
+ * notification, userspace may decide to create a new station
+ * (@NL80211_CMD_NEW_STATION). To stop this notification from
+ * reoccurring, the userspace authentication daemon may want to create the
+ * new station with the AUTHENTICATED flag unset and maybe change it later
+ * depending on the authentication result.
+ *
+ * @NL80211_CMD_GET_WOWLAN: get Wake-on-Wireless-LAN (WoWLAN) settings.
+ * @NL80211_CMD_SET_WOWLAN: set Wake-on-Wireless-LAN (WoWLAN) settings.
+ * Since wireless is more complex than wired ethernet, it supports
+ * various triggers. These triggers can be configured through this
+ * command with the %NL80211_ATTR_WOWLAN_TRIGGERS attribute. For
+ * more background information, see
+ * http://wireless.kernel.org/en/users/Documentation/WoWLAN.
+ * The @NL80211_CMD_SET_WOWLAN command can also be used as a notification
+ * from the driver reporting the wakeup reason. In this case, the
+ * @NL80211_ATTR_WOWLAN_TRIGGERS attribute will contain the reason
+ * for the wakeup, if it was caused by wireless. If it is not present
+ * in the wakeup notification, the wireless device didn't cause the
+ * wakeup but reports that it was woken up.
+ *
+ * @NL80211_CMD_SET_REKEY_OFFLOAD: This command is used give the driver
+ * the necessary information for supporting GTK rekey offload. This
+ * feature is typically used during WoWLAN. The configuration data
+ * is contained in %NL80211_ATTR_REKEY_DATA (which is nested and
+ * contains the data in sub-attributes). After rekeying happened,
+ * this command may also be sent by the driver as an MLME event to
+ * inform userspace of the new replay counter.
+ *
+ * @NL80211_CMD_PMKSA_CANDIDATE: This is used as an event to inform userspace
+ * of PMKSA caching dandidates.
+ *
+ * @NL80211_CMD_TDLS_OPER: Perform a high-level TDLS command (e.g. link setup).
+ * In addition, this can be used as an event to request userspace to take
+ * actions on TDLS links (set up a new link or tear down an existing one).
+ * In such events, %NL80211_ATTR_TDLS_OPERATION indicates the requested
+ * operation, %NL80211_ATTR_MAC contains the peer MAC address, and
+ * %NL80211_ATTR_REASON_CODE the reason code to be used (only with
+ * %NL80211_TDLS_TEARDOWN).
+ * @NL80211_CMD_TDLS_MGMT: Send a TDLS management frame. The
+ * %NL80211_ATTR_TDLS_ACTION attribute determines the type of frame to be
+ * sent. Public Action codes (802.11-2012 8.1.5.1) will be sent as
+ * 802.11 management frames, while TDLS action codes (802.11-2012
+ * 8.5.13.1) will be encapsulated and sent as data frames. The currently
+ * supported Public Action code is %WLAN_PUB_ACTION_TDLS_DISCOVER_RES
+ * and the currently supported TDLS actions codes are given in
+ * &enum ieee80211_tdls_actioncode.
+ *
+ * @NL80211_CMD_UNEXPECTED_FRAME: Used by an application controlling an AP
+ * (or GO) interface (i.e. hostapd) to ask for unexpected frames to
+ * implement sending deauth to stations that send unexpected class 3
+ * frames. Also used as the event sent by the kernel when such a frame
+ * is received.
+ * For the event, the %NL80211_ATTR_MAC attribute carries the TA and
+ * other attributes like the interface index are present.
+ * If used as the command it must have an interface index and you can
+ * only unsubscribe from the event by closing the socket. Subscription
+ * is also for %NL80211_CMD_UNEXPECTED_4ADDR_FRAME events.
+ *
+ * @NL80211_CMD_UNEXPECTED_4ADDR_FRAME: Sent as an event indicating that the
+ * associated station identified by %NL80211_ATTR_MAC sent a 4addr frame
+ * and wasn't already in a 4-addr VLAN. The event will be sent similarly
+ * to the %NL80211_CMD_UNEXPECTED_FRAME event, to the same listener.
+ *
+ * @NL80211_CMD_PROBE_CLIENT: Probe an associated station on an AP interface
+ * by sending a null data frame to it and reporting when the frame is
+ * acknowleged. This is used to allow timing out inactive clients. Uses
+ * %NL80211_ATTR_IFINDEX and %NL80211_ATTR_MAC. The command returns a
+ * direct reply with an %NL80211_ATTR_COOKIE that is later used to match
+ * up the event with the request. The event includes the same data and
+ * has %NL80211_ATTR_ACK set if the frame was ACKed.
+ *
+ * @NL80211_CMD_REGISTER_BEACONS: Register this socket to receive beacons from
+ * other BSSes when any interfaces are in AP mode. This helps implement
+ * OLBC handling in hostapd. Beacons are reported in %NL80211_CMD_FRAME
+ * messages. Note that per PHY only one application may register.
+ *
+ * @NL80211_CMD_SET_NOACK_MAP: sets a bitmap for the individual TIDs whether
+ * No Acknowledgement Policy should be applied.
+ *
+ * @NL80211_CMD_CH_SWITCH_NOTIFY: An AP or GO may decide to switch channels
+ * independently of the userspace SME, send this event indicating
+ * %NL80211_ATTR_IFINDEX is now on %NL80211_ATTR_WIPHY_FREQ and the
+ * attributes determining channel width. This indication may also be
+ * sent when a remotely-initiated switch (e.g., when a STA receives a CSA
+ * from the remote AP) is completed;
+ *
+ * @NL80211_CMD_CH_SWITCH_STARTED_NOTIFY: Notify that a channel switch
+ * has been started on an interface, regardless of the initiator
+ * (ie. whether it was requested from a remote device or
+ * initiated on our own). It indicates that
+ * %NL80211_ATTR_IFINDEX will be on %NL80211_ATTR_WIPHY_FREQ
+ * after %NL80211_ATTR_CH_SWITCH_COUNT TBTT's. The userspace may
+ * decide to react to this indication by requesting other
+ * interfaces to change channel as well.
+ *
+ * @NL80211_CMD_START_P2P_DEVICE: Start the given P2P Device, identified by
+ * its %NL80211_ATTR_WDEV identifier. It must have been created with
+ * %NL80211_CMD_NEW_INTERFACE previously. After it has been started, the
+ * P2P Device can be used for P2P operations, e.g. remain-on-channel and
+ * public action frame TX.
+ * @NL80211_CMD_STOP_P2P_DEVICE: Stop the given P2P Device, identified by
+ * its %NL80211_ATTR_WDEV identifier.
+ *
+ * @NL80211_CMD_CONN_FAILED: connection request to an AP failed; used to
+ * notify userspace that AP has rejected the connection request from a
+ * station, due to particular reason. %NL80211_ATTR_CONN_FAILED_REASON
+ * is used for this.
+ *
+ * @NL80211_CMD_SET_MCAST_RATE: Change the rate used to send multicast frames
+ * for IBSS or MESH vif.
+ *
+ * @NL80211_CMD_SET_MAC_ACL: sets ACL for MAC address based access control.
+ * This is to be used with the drivers advertising the support of MAC
+ * address based access control. List of MAC addresses is passed in
+ * %NL80211_ATTR_MAC_ADDRS and ACL policy is passed in
+ * %NL80211_ATTR_ACL_POLICY. Driver will enable ACL with this list, if it
+ * is not already done. The new list will replace any existing list. Driver
+ * will clear its ACL when the list of MAC addresses passed is empty. This
+ * command is used in AP/P2P GO mode. Driver has to make sure to clear its
+ * ACL list during %NL80211_CMD_STOP_AP.
+ *
+ * @NL80211_CMD_RADAR_DETECT: Start a Channel availability check (CAC). Once
+ * a radar is detected or the channel availability scan (CAC) has finished
+ * or was aborted, or a radar was detected, usermode will be notified with
+ * this event. This command is also used to notify userspace about radars
+ * while operating on this channel.
+ * %NL80211_ATTR_RADAR_EVENT is used to inform about the type of the
+ * event.
+ *
+ * @NL80211_CMD_GET_PROTOCOL_FEATURES: Get global nl80211 protocol features,
+ * i.e. features for the nl80211 protocol rather than device features.
+ * Returns the features in the %NL80211_ATTR_PROTOCOL_FEATURES bitmap.
+ *
+ * @NL80211_CMD_UPDATE_FT_IES: Pass down the most up-to-date Fast Transition
+ * Information Element to the WLAN driver
+ *
+ * @NL80211_CMD_FT_EVENT: Send a Fast transition event from the WLAN driver
+ * to the supplicant. This will carry the target AP's MAC address along
+ * with the relevant Information Elements. This event is used to report
+ * received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE).
+ *
+ * @NL80211_CMD_CRIT_PROTOCOL_START: Indicates user-space will start running
+ * a critical protocol that needs more reliability in the connection to
+ * complete.
+ *
+ * @NL80211_CMD_CRIT_PROTOCOL_STOP: Indicates the connection reliability can
+ * return back to normal.
+ *
+ * @NL80211_CMD_GET_COALESCE: Get currently supported coalesce rules.
+ * @NL80211_CMD_SET_COALESCE: Configure coalesce rules or clear existing rules.
+ *
+ * @NL80211_CMD_CHANNEL_SWITCH: Perform a channel switch by announcing the
+ * the new channel information (Channel Switch Announcement - CSA)
+ * in the beacon for some time (as defined in the
+ * %NL80211_ATTR_CH_SWITCH_COUNT parameter) and then change to the
+ * new channel. Userspace provides the new channel information (using
+ * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel
+ * width). %NL80211_ATTR_CH_SWITCH_BLOCK_TX may be supplied to inform
+ * other station that transmission must be blocked until the channel
+ * switch is complete.
+ *
+ * @NL80211_CMD_VENDOR: Vendor-specified command/event. The command is specified
+ * by the %NL80211_ATTR_VENDOR_ID attribute and a sub-command in
+ * %NL80211_ATTR_VENDOR_SUBCMD. Parameter(s) can be transported in
+ * %NL80211_ATTR_VENDOR_DATA.
+ * For feature advertisement, the %NL80211_ATTR_VENDOR_DATA attribute is
+ * used in the wiphy data as a nested attribute containing descriptions
+ * (&struct nl80211_vendor_cmd_info) of the supported vendor commands.
+ * This may also be sent as an event with the same attributes.
+ *
+ * @NL80211_CMD_SET_QOS_MAP: Set Interworking QoS mapping for IP DSCP values.
+ * The QoS mapping information is included in %NL80211_ATTR_QOS_MAP. If
+ * that attribute is not included, QoS mapping is disabled. Since this
+ * QoS mapping is relevant for IP packets, it is only valid during an
+ * association. This is cleared on disassociation and AP restart.
+ *
+ * @NL80211_CMD_ADD_TX_TS: Ask the kernel to add a traffic stream for the given
+ * %NL80211_ATTR_TSID and %NL80211_ATTR_MAC with %NL80211_ATTR_USER_PRIO
+ * and %NL80211_ATTR_ADMITTED_TIME parameters.
+ * Note that the action frame handshake with the AP shall be handled by
+ * userspace via the normal management RX/TX framework, this only sets
+ * up the TX TS in the driver/device.
+ * If the admitted time attribute is not added then the request just checks
+ * if a subsequent setup could be successful, the intent is to use this to
+ * avoid setting up a session with the AP when local restrictions would
+ * make that impossible. However, the subsequent "real" setup may still
+ * fail even if the check was successful.
+ * @NL80211_CMD_DEL_TX_TS: Remove an existing TS with the %NL80211_ATTR_TSID
+ * and %NL80211_ATTR_MAC parameters. It isn't necessary to call this
+ * before removing a station entry entirely, or before disassociating
+ * or similar, cleanup will happen in the driver/device in this case.
+ *
+ * @NL80211_CMD_GET_MPP: Get mesh path attributes for mesh proxy path to
+ * destination %NL80211_ATTR_MAC on the interface identified by
+ * %NL80211_ATTR_IFINDEX.
+ *
+ * @NL80211_CMD_JOIN_OCB: Join the OCB network. The center frequency and
+ * bandwidth of a channel must be given.
+ * @NL80211_CMD_LEAVE_OCB: Leave the OCB network -- no special arguments, the
+ * network is determined by the network interface.
+ *
+ * @NL80211_CMD_TDLS_CHANNEL_SWITCH: Start channel-switching with a TDLS peer,
+ * identified by the %NL80211_ATTR_MAC parameter. A target channel is
+ * provided via %NL80211_ATTR_WIPHY_FREQ and other attributes determining
+ * channel width/type. The target operating class is given via
+ * %NL80211_ATTR_OPER_CLASS.
+ * The driver is responsible for continually initiating channel-switching
+ * operations and returning to the base channel for communication with the
+ * AP.
+ * @NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH: Stop channel-switching with a TDLS
+ * peer given by %NL80211_ATTR_MAC. Both peers must be on the base channel
+ * when this command completes.
+ *
+ * @NL80211_CMD_WIPHY_REG_CHANGE: Similar to %NL80211_CMD_REG_CHANGE, but used
+ * as an event to indicate changes for devices with wiphy-specific regdom
+ * management.
+ *
+ * @NL80211_CMD_ABORT_SCAN: Stop an ongoing scan. Returns -ENOENT if a scan is
+ * not running. The driver indicates the status of the scan through
+ * cfg80211_scan_done().
+ *
+ * @NL80211_CMD_START_NAN: Start NAN operation, identified by its
+ * %NL80211_ATTR_WDEV interface. This interface must have been
+ * previously created with %NL80211_CMD_NEW_INTERFACE. After it
+ * has been started, the NAN interface will create or join a
+ * cluster. This command must have a valid
+ * %NL80211_ATTR_NAN_MASTER_PREF attribute and optional
+ * %NL80211_ATTR_BANDS attributes. If %NL80211_ATTR_BANDS is
+ * omitted or set to 0, it means don't-care and the device will
+ * decide what to use. After this command NAN functions can be
+ * added.
+ * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by
+ * its %NL80211_ATTR_WDEV interface.
+ * @NL80211_CMD_ADD_NAN_FUNCTION: Add a NAN function. The function is defined
+ * with %NL80211_ATTR_NAN_FUNC nested attribute. When called, this
+ * operation returns the strictly positive and unique instance id
+ * (%NL80211_ATTR_NAN_FUNC_INST_ID) and a cookie (%NL80211_ATTR_COOKIE)
+ * of the function upon success.
+ * Since instance ID's can be re-used, this cookie is the right
+ * way to identify the function. This will avoid races when a termination
+ * event is handled by the user space after it has already added a new
+ * function that got the same instance id from the kernel as the one
+ * which just terminated.
+ * This cookie may be used in NAN events even before the command
+ * returns, so userspace shouldn't process NAN events until it processes
+ * the response to this command.
+ * Look at %NL80211_ATTR_SOCKET_OWNER as well.
+ * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie.
+ * This command is also used as a notification sent when a NAN function is
+ * terminated. This will contain a %NL80211_ATTR_NAN_FUNC_INST_ID
+ * and %NL80211_ATTR_COOKIE attributes.
+ * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN
+ * configuration. NAN must be operational (%NL80211_CMD_START_NAN
+ * was executed). It must contain at least one of the following
+ * attributes: %NL80211_ATTR_NAN_MASTER_PREF,
+ * %NL80211_ATTR_BANDS. If %NL80211_ATTR_BANDS is omitted, the
+ * current configuration is not changed. If it is present but
+ * set to zero, the configuration is changed to don't-care
+ * (i.e. the device can decide what to do).
+ * @NL80211_CMD_NAN_FUNC_MATCH: Notification sent when a match is reported.
+ * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and
+ * %NL80211_ATTR_COOKIE.
+ *
+ * @NL80211_CMD_UPDATE_CONNECT_PARAMS: Update one or more connect parameters
+ * for subsequent roaming cases if the driver or firmware uses internal
+ * BSS selection. This command can be issued only while connected and it
+ * does not result in a change for the current association. Currently,
+ * only the %NL80211_ATTR_IE data is used and updated with this command.
+ *
+ * @NL80211_CMD_SET_PMK: For offloaded 4-Way handshake, set the PMK or PMK-R0
+ * for the given authenticator address (specified with %NL80211_ATTR_MAC).
+ * When %NL80211_ATTR_PMKR0_NAME is set, %NL80211_ATTR_PMK specifies the
+ * PMK-R0, otherwise it specifies the PMK.
+ * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously
+ * configured PMK for the authenticator address identified by
+ * %NL80211_ATTR_MAC.
+ * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way
+ * handshake was completed successfully by the driver. The BSSID is
+ * specified with %NL80211_ATTR_MAC. Drivers that support 4 way handshake
+ * offload should send this event after indicating 802.11 association with
+ * %NL80211_CMD_CONNECT or %NL80211_CMD_ROAM. If the 4 way handshake failed
+ * %NL80211_CMD_DISCONNECT should be indicated instead.
+ *
+ * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request
+ * and RX notification. This command is used both as a request to transmit
+ * a control port frame and as a notification that a control port frame
+ * has been received. %NL80211_ATTR_FRAME is used to specify the
+ * frame contents. The frame is the raw EAPoL data, without ethernet or
+ * 802.11 headers.
+ * When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
+ * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added
+ * indicating the protocol type of the received frame; whether the frame
+ * was received unencrypted and the MAC address of the peer respectively.
+ *
+ * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded.
+ *
+ * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host
+ * drivers that do not define separate commands for authentication and
+ * association, but rely on user space for the authentication to happen.
+ * This interface acts both as the event request (driver to user space)
+ * to trigger the authentication and command response (userspace to
+ * driver) to indicate the authentication status.
+ *
+ * User space uses the %NL80211_CMD_CONNECT command to the host driver to
+ * trigger a connection. The host driver selects a BSS and further uses
+ * this interface to offload only the authentication part to the user
+ * space. Authentication frames are passed between the driver and user
+ * space through the %NL80211_CMD_FRAME interface. Host driver proceeds
+ * further with the association after getting successful authentication
+ * status. User space indicates the authentication status through
+ * %NL80211_ATTR_STATUS_CODE attribute in %NL80211_CMD_EXTERNAL_AUTH
+ * command interface.
+ *
+ * Host driver reports this status on an authentication failure to the
+ * user space through the connect result as the user space would have
+ * initiated the connection through the connect request.
+ *
+ * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's
+ * ht opmode or vht opmode changes using any of %NL80211_ATTR_SMPS_MODE,
+ * %NL80211_ATTR_CHANNEL_WIDTH,%NL80211_ATTR_NSS attributes with its
+ * address(specified in %NL80211_ATTR_MAC).
+ *
+ * @NL80211_CMD_GET_FTM_RESPONDER_STATS: Retrieve FTM responder statistics, in
+ * the %NL80211_ATTR_FTM_RESPONDER_STATS attribute.
+ *
+ * @NL80211_CMD_PEER_MEASUREMENT_START: start a (set of) peer measurement(s)
+ * with the given parameters, which are encapsulated in the nested
+ * %NL80211_ATTR_PEER_MEASUREMENTS attribute. Optionally, MAC address
+ * randomization may be enabled and configured by specifying the
+ * %NL80211_ATTR_MAC and %NL80211_ATTR_MAC_MASK attributes.
+ * If a timeout is requested, use the %NL80211_ATTR_TIMEOUT attribute.
+ * A u64 cookie for further %NL80211_ATTR_COOKIE use is is returned in
+ * the netlink extended ack message.
+ *
+ * To cancel a measurement, close the socket that requested it.
+ *
+ * Measurement results are reported to the socket that requested the
+ * measurement using @NL80211_CMD_PEER_MEASUREMENT_RESULT when they
+ * become available, so applications must ensure a large enough socket
+ * buffer size.
+ *
+ * Depending on driver support it may or may not be possible to start
+ * multiple concurrent measurements.
+ * @NL80211_CMD_PEER_MEASUREMENT_RESULT: This command number is used for the
+ * result notification from the driver to the requesting socket.
+ * @NL80211_CMD_PEER_MEASUREMENT_COMPLETE: Notification only, indicating that
+ * the measurement completed, using the measurement cookie
+ * (%NL80211_ATTR_COOKIE).
+ *
+ * @NL80211_CMD_NOTIFY_RADAR: Notify the kernel that a radar signal was
+ * detected and reported by a neighboring device on the channel
+ * indicated by %NL80211_ATTR_WIPHY_FREQ and other attributes
+ * determining the width and type.
+ *
+ * @NL80211_CMD_UPDATE_OWE_INFO: This interface allows the host driver to
+ * offload OWE processing to user space. This intends to support
+ * OWE AKM by the host drivers that implement SME but rely
+ * on the user space for the cryptographic/DH IE processing in AP mode.
+ *
+ * @NL80211_CMD_PROBE_MESH_LINK: The requirement for mesh link metric
+ * refreshing, is that from one mesh point we be able to send some data
+ * frames to other mesh points which are not currently selected as a
+ * primary traffic path, but which are only 1 hop away. The absence of
+ * the primary path to the chosen node makes it necessary to apply some
+ * form of marking on a chosen packet stream so that the packets can be
+ * properly steered to the selected node for testing, and not by the
+ * regular mesh path lookup. Further, the packets must be of type data
+ * so that the rate control (often embedded in firmware) is used for
+ * rate selection.
+ *
+ * Here attribute %NL80211_ATTR_MAC is used to specify connected mesh
+ * peer MAC address and %NL80211_ATTR_FRAME is used to specify the frame
+ * content. The frame is ethernet data.
+ *
+ * @NL80211_CMD_MAX: highest used command number
+ * @__NL80211_CMD_AFTER_LAST: internal use
+ */
+enum nl80211_commands {
+/* don't change the order or add anything between, this is ABI! */
+ NL80211_CMD_UNSPEC,
+
+ NL80211_CMD_GET_WIPHY, /* can dump */
+ NL80211_CMD_SET_WIPHY,
+ NL80211_CMD_NEW_WIPHY,
+ NL80211_CMD_DEL_WIPHY,
+
+ NL80211_CMD_GET_INTERFACE, /* can dump */
+ NL80211_CMD_SET_INTERFACE,
+ NL80211_CMD_NEW_INTERFACE,
+ NL80211_CMD_DEL_INTERFACE,
+
+ NL80211_CMD_GET_KEY,
+ NL80211_CMD_SET_KEY,
+ NL80211_CMD_NEW_KEY,
+ NL80211_CMD_DEL_KEY,
+
+ NL80211_CMD_GET_BEACON,
+ NL80211_CMD_SET_BEACON,
+ NL80211_CMD_START_AP,
+ NL80211_CMD_NEW_BEACON = NL80211_CMD_START_AP,
+ NL80211_CMD_STOP_AP,
+ NL80211_CMD_DEL_BEACON = NL80211_CMD_STOP_AP,
+
+ NL80211_CMD_GET_STATION,
+ NL80211_CMD_SET_STATION,
+ NL80211_CMD_NEW_STATION,
+ NL80211_CMD_DEL_STATION,
+
+ NL80211_CMD_GET_MPATH,
+ NL80211_CMD_SET_MPATH,
+ NL80211_CMD_NEW_MPATH,
+ NL80211_CMD_DEL_MPATH,
+
+ NL80211_CMD_SET_BSS,
+
+ NL80211_CMD_SET_REG,
+ NL80211_CMD_REQ_SET_REG,
+
+ NL80211_CMD_GET_MESH_CONFIG,
+ NL80211_CMD_SET_MESH_CONFIG,
+
+ NL80211_CMD_SET_MGMT_EXTRA_IE /* reserved; not used */,
+
+ NL80211_CMD_GET_REG,
+
+ NL80211_CMD_GET_SCAN,
+ NL80211_CMD_TRIGGER_SCAN,
+ NL80211_CMD_NEW_SCAN_RESULTS,
+ NL80211_CMD_SCAN_ABORTED,
+
+ NL80211_CMD_REG_CHANGE,
+
+ NL80211_CMD_AUTHENTICATE,
+ NL80211_CMD_ASSOCIATE,
+ NL80211_CMD_DEAUTHENTICATE,
+ NL80211_CMD_DISASSOCIATE,
+
+ NL80211_CMD_MICHAEL_MIC_FAILURE,
+
+ NL80211_CMD_REG_BEACON_HINT,
+
+ NL80211_CMD_JOIN_IBSS,
+ NL80211_CMD_LEAVE_IBSS,
+
+ NL80211_CMD_TESTMODE,
+
+ NL80211_CMD_CONNECT,
+ NL80211_CMD_ROAM,
+ NL80211_CMD_DISCONNECT,
+
+ NL80211_CMD_SET_WIPHY_NETNS,
+
+ NL80211_CMD_GET_SURVEY,
+ NL80211_CMD_NEW_SURVEY_RESULTS,
+
+ NL80211_CMD_SET_PMKSA,
+ NL80211_CMD_DEL_PMKSA,
+ NL80211_CMD_FLUSH_PMKSA,
+
+ NL80211_CMD_REMAIN_ON_CHANNEL,
+ NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
+
+ NL80211_CMD_SET_TX_BITRATE_MASK,
+
+ NL80211_CMD_REGISTER_FRAME,
+ NL80211_CMD_REGISTER_ACTION = NL80211_CMD_REGISTER_FRAME,
+ NL80211_CMD_FRAME,
+ NL80211_CMD_ACTION = NL80211_CMD_FRAME,
+ NL80211_CMD_FRAME_TX_STATUS,
+ NL80211_CMD_ACTION_TX_STATUS = NL80211_CMD_FRAME_TX_STATUS,
+
+ NL80211_CMD_SET_POWER_SAVE,
+ NL80211_CMD_GET_POWER_SAVE,
+
+ NL80211_CMD_SET_CQM,
+ NL80211_CMD_NOTIFY_CQM,
+
+ NL80211_CMD_SET_CHANNEL,
+ NL80211_CMD_SET_WDS_PEER,
+
+ NL80211_CMD_FRAME_WAIT_CANCEL,
+
+ NL80211_CMD_JOIN_MESH,
+ NL80211_CMD_LEAVE_MESH,
+
+ NL80211_CMD_UNPROT_DEAUTHENTICATE,
+ NL80211_CMD_UNPROT_DISASSOCIATE,
+
+ NL80211_CMD_NEW_PEER_CANDIDATE,
+
+ NL80211_CMD_GET_WOWLAN,
+ NL80211_CMD_SET_WOWLAN,
+
+ NL80211_CMD_START_SCHED_SCAN,
+ NL80211_CMD_STOP_SCHED_SCAN,
+ NL80211_CMD_SCHED_SCAN_RESULTS,
+ NL80211_CMD_SCHED_SCAN_STOPPED,
+
+ NL80211_CMD_SET_REKEY_OFFLOAD,
+
+ NL80211_CMD_PMKSA_CANDIDATE,
+
+ NL80211_CMD_TDLS_OPER,
+ NL80211_CMD_TDLS_MGMT,
+
+ NL80211_CMD_UNEXPECTED_FRAME,
+
+ NL80211_CMD_PROBE_CLIENT,
+
+ NL80211_CMD_REGISTER_BEACONS,
+
+ NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
+
+ NL80211_CMD_SET_NOACK_MAP,
+
+ NL80211_CMD_CH_SWITCH_NOTIFY,
+
+ NL80211_CMD_START_P2P_DEVICE,
+ NL80211_CMD_STOP_P2P_DEVICE,
+
+ NL80211_CMD_CONN_FAILED,
+
+ NL80211_CMD_SET_MCAST_RATE,
+
+ NL80211_CMD_SET_MAC_ACL,
+
+ NL80211_CMD_RADAR_DETECT,
+
+ NL80211_CMD_GET_PROTOCOL_FEATURES,
+
+ NL80211_CMD_UPDATE_FT_IES,
+ NL80211_CMD_FT_EVENT,
+
+ NL80211_CMD_CRIT_PROTOCOL_START,
+ NL80211_CMD_CRIT_PROTOCOL_STOP,
+
+ NL80211_CMD_GET_COALESCE,
+ NL80211_CMD_SET_COALESCE,
+
+ NL80211_CMD_CHANNEL_SWITCH,
+
+ NL80211_CMD_VENDOR,
+
+ NL80211_CMD_SET_QOS_MAP,
+
+ NL80211_CMD_ADD_TX_TS,
+ NL80211_CMD_DEL_TX_TS,
+
+ NL80211_CMD_GET_MPP,
+
+ NL80211_CMD_JOIN_OCB,
+ NL80211_CMD_LEAVE_OCB,
+
+ NL80211_CMD_CH_SWITCH_STARTED_NOTIFY,
+
+ NL80211_CMD_TDLS_CHANNEL_SWITCH,
+ NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH,
+
+ NL80211_CMD_WIPHY_REG_CHANGE,
+
+ NL80211_CMD_ABORT_SCAN,
+
+ NL80211_CMD_START_NAN,
+ NL80211_CMD_STOP_NAN,
+ NL80211_CMD_ADD_NAN_FUNCTION,
+ NL80211_CMD_DEL_NAN_FUNCTION,
+ NL80211_CMD_CHANGE_NAN_CONFIG,
+ NL80211_CMD_NAN_MATCH,
+
+ NL80211_CMD_SET_MULTICAST_TO_UNICAST,
+
+ NL80211_CMD_UPDATE_CONNECT_PARAMS,
+
+ NL80211_CMD_SET_PMK,
+ NL80211_CMD_DEL_PMK,
+
+ NL80211_CMD_PORT_AUTHORIZED,
+
+ NL80211_CMD_RELOAD_REGDB,
+
+ NL80211_CMD_EXTERNAL_AUTH,
+
+ NL80211_CMD_STA_OPMODE_CHANGED,
+
+ NL80211_CMD_CONTROL_PORT_FRAME,
+
+ NL80211_CMD_GET_FTM_RESPONDER_STATS,
+
+ NL80211_CMD_PEER_MEASUREMENT_START,
+ NL80211_CMD_PEER_MEASUREMENT_RESULT,
+ NL80211_CMD_PEER_MEASUREMENT_COMPLETE,
+
+ NL80211_CMD_NOTIFY_RADAR,
+
+ NL80211_CMD_UPDATE_OWE_INFO,
+
+ NL80211_CMD_PROBE_MESH_LINK,
+
+ /* add new commands above here */
+
+ /* used to define NL80211_CMD_MAX below */
+ __NL80211_CMD_AFTER_LAST,
+ NL80211_CMD_MAX = __NL80211_CMD_AFTER_LAST - 1
+};
+
+/*
+ * Allow user space programs to use #ifdef on new commands by defining them
+ * here
+ */
+#define NL80211_CMD_SET_BSS NL80211_CMD_SET_BSS
+#define NL80211_CMD_SET_MGMT_EXTRA_IE NL80211_CMD_SET_MGMT_EXTRA_IE
+#define NL80211_CMD_REG_CHANGE NL80211_CMD_REG_CHANGE
+#define NL80211_CMD_AUTHENTICATE NL80211_CMD_AUTHENTICATE
+#define NL80211_CMD_ASSOCIATE NL80211_CMD_ASSOCIATE
+#define NL80211_CMD_DEAUTHENTICATE NL80211_CMD_DEAUTHENTICATE
+#define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE
+#define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT
+
+#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
+
+/* source-level API compatibility */
+#define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG
+#define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG
+#define NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE NL80211_MESH_SETUP_IE
+
+/**
+ * enum nl80211_attrs - nl80211 netlink attributes
+ *
+ * @NL80211_ATTR_UNSPEC: unspecified attribute to catch errors
+ *
+ * @NL80211_ATTR_WIPHY: index of wiphy to operate on, cf.
+ * /sys/class/ieee80211/<phyname>/index
+ * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming)
+ * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters
+ * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz,
+ * defines the channel together with the (deprecated)
+ * %NL80211_ATTR_WIPHY_CHANNEL_TYPE attribute or the attributes
+ * %NL80211_ATTR_CHANNEL_WIDTH and if needed %NL80211_ATTR_CENTER_FREQ1
+ * and %NL80211_ATTR_CENTER_FREQ2
+ * @NL80211_ATTR_CHANNEL_WIDTH: u32 attribute containing one of the values
+ * of &enum nl80211_chan_width, describing the channel width. See the
+ * documentation of the enum for more information.
+ * @NL80211_ATTR_CENTER_FREQ1: Center frequency of the first part of the
+ * channel, used for anything but 20 MHz bandwidth
+ * @NL80211_ATTR_CENTER_FREQ2: Center frequency of the second part of the
+ * channel, used only for 80+80 MHz bandwidth
+ * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ
+ * if HT20 or HT40 are to be used (i.e., HT disabled if not included):
+ * NL80211_CHAN_NO_HT = HT not allowed (i.e., same as not including
+ * this attribute)
+ * NL80211_CHAN_HT20 = HT20 only
+ * NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel
+ * NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel
+ * This attribute is now deprecated.
+ * @NL80211_ATTR_WIPHY_RETRY_SHORT: TX retry limit for frames whose length is
+ * less than or equal to the RTS threshold; allowed range: 1..255;
+ * dot11ShortRetryLimit; u8
+ * @NL80211_ATTR_WIPHY_RETRY_LONG: TX retry limit for frames whose length is
+ * greater than the RTS threshold; allowed range: 1..255;
+ * dot11ShortLongLimit; u8
+ * @NL80211_ATTR_WIPHY_FRAG_THRESHOLD: fragmentation threshold, i.e., maximum
+ * length in octets for frames; allowed range: 256..8000, disable
+ * fragmentation with (u32)-1; dot11FragmentationThreshold; u32
+ * @NL80211_ATTR_WIPHY_RTS_THRESHOLD: RTS threshold (TX frames with length
+ * larger than or equal to this use RTS/CTS handshake); allowed range:
+ * 0..65536, disable with (u32)-1; dot11RTSThreshold; u32
+ * @NL80211_ATTR_WIPHY_COVERAGE_CLASS: Coverage Class as defined by IEEE 802.11
+ * section 7.3.2.9; dot11CoverageClass; u8
+ *
+ * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on
+ * @NL80211_ATTR_IFNAME: network interface name
+ * @NL80211_ATTR_IFTYPE: type of virtual interface, see &enum nl80211_iftype
+ *
+ * @NL80211_ATTR_WDEV: wireless device identifier, used for pseudo-devices
+ * that don't have a netdev (u64)
+ *
+ * @NL80211_ATTR_MAC: MAC address (various uses)
+ *
+ * @NL80211_ATTR_KEY_DATA: (temporal) key data; for TKIP this consists of
+ * 16 bytes encryption key followed by 8 bytes each for TX and RX MIC
+ * keys
+ * @NL80211_ATTR_KEY_IDX: key ID (u8, 0-3)
+ * @NL80211_ATTR_KEY_CIPHER: key cipher suite (u32, as defined by IEEE 802.11
+ * section 7.3.2.25.1, e.g. 0x000FAC04)
+ * @NL80211_ATTR_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and
+ * CCMP keys, each six bytes in little endian
+ * @NL80211_ATTR_KEY_DEFAULT: Flag attribute indicating the key is default key
+ * @NL80211_ATTR_KEY_DEFAULT_MGMT: Flag attribute indicating the key is the
+ * default management key
+ * @NL80211_ATTR_CIPHER_SUITES_PAIRWISE: For crypto settings for connect or
+ * other commands, indicates which pairwise cipher suites are used
+ * @NL80211_ATTR_CIPHER_SUITE_GROUP: For crypto settings for connect or
+ * other commands, indicates which group cipher suite is used
+ *
+ * @NL80211_ATTR_BEACON_INTERVAL: beacon interval in TU
+ * @NL80211_ATTR_DTIM_PERIOD: DTIM period for beaconing
+ * @NL80211_ATTR_BEACON_HEAD: portion of the beacon before the TIM IE
+ * @NL80211_ATTR_BEACON_TAIL: portion of the beacon after the TIM IE
+ *
+ * @NL80211_ATTR_STA_AID: Association ID for the station (u16)
+ * @NL80211_ATTR_STA_FLAGS: flags, nested element with NLA_FLAG attributes of
+ * &enum nl80211_sta_flags (deprecated, use %NL80211_ATTR_STA_FLAGS2)
+ * @NL80211_ATTR_STA_LISTEN_INTERVAL: listen interval as defined by
+ * IEEE 802.11 7.3.1.6 (u16).
+ * @NL80211_ATTR_STA_SUPPORTED_RATES: supported rates, array of supported
+ * rates as defined by IEEE 802.11 7.3.2.2 but without the length
+ * restriction (at most %NL80211_MAX_SUPP_RATES).
+ * @NL80211_ATTR_STA_VLAN: interface index of VLAN interface to move station
+ * to, or the AP interface the station was originally added to.
+ * @NL80211_ATTR_STA_INFO: information about a station, part of station info
+ * given for %NL80211_CMD_GET_STATION, nested attribute containing
+ * info as possible, see &enum nl80211_sta_info.
+ *
+ * @NL80211_ATTR_WIPHY_BANDS: Information about an operating bands,
+ * consisting of a nested array.
+ *
+ * @NL80211_ATTR_MESH_ID: mesh id (1-32 bytes).
+ * @NL80211_ATTR_STA_PLINK_ACTION: action to perform on the mesh peer link
+ * (see &enum nl80211_plink_action).
+ * @NL80211_ATTR_MPATH_NEXT_HOP: MAC address of the next hop for a mesh path.
+ * @NL80211_ATTR_MPATH_INFO: information about a mesh_path, part of mesh path
+ * info given for %NL80211_CMD_GET_MPATH, nested attribute described at
+ * &enum nl80211_mpath_info.
+ *
+ * @NL80211_ATTR_MNTR_FLAGS: flags, nested element with NLA_FLAG attributes of
+ * &enum nl80211_mntr_flags.
+ *
+ * @NL80211_ATTR_REG_ALPHA2: an ISO-3166-alpha2 country code for which the
+ * current regulatory domain should be set to or is already set to.
+ * For example, 'CR', for Costa Rica. This attribute is used by the kernel
+ * to query the CRDA to retrieve one regulatory domain. This attribute can
+ * also be used by userspace to query the kernel for the currently set
+ * regulatory domain. We chose an alpha2 as that is also used by the
+ * IEEE-802.11 country information element to identify a country.
+ * Users can also simply ask the wireless core to set regulatory domain
+ * to a specific alpha2.
+ * @NL80211_ATTR_REG_RULES: a nested array of regulatory domain regulatory
+ * rules.
+ *
+ * @NL80211_ATTR_BSS_CTS_PROT: whether CTS protection is enabled (u8, 0 or 1)
+ * @NL80211_ATTR_BSS_SHORT_PREAMBLE: whether short preamble is enabled
+ * (u8, 0 or 1)
+ * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled
+ * (u8, 0 or 1)
+ * @NL80211_ATTR_BSS_BASIC_RATES: basic rates, array of basic
+ * rates in format defined by IEEE 802.11 7.3.2.2 but without the length
+ * restriction (at most %NL80211_MAX_SUPP_RATES).
+ *
+ * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from
+ * association request when used with NL80211_CMD_NEW_STATION)
+ *
+ * @NL80211_ATTR_SUPPORTED_IFTYPES: nested attribute containing all
+ * supported interface types, each a flag attribute with the number
+ * of the interface mode.
+ *
+ * @NL80211_ATTR_MGMT_SUBTYPE: Management frame subtype for
+ * %NL80211_CMD_SET_MGMT_EXTRA_IE.
+ *
+ * @NL80211_ATTR_IE: Information element(s) data (used, e.g., with
+ * %NL80211_CMD_SET_MGMT_EXTRA_IE).
+ *
+ * @NL80211_ATTR_MAX_NUM_SCAN_SSIDS: number of SSIDs you can scan with
+ * a single scan request, a wiphy attribute.
+ * @NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS: number of SSIDs you can
+ * scan with a single scheduled scan request, a wiphy attribute.
+ * @NL80211_ATTR_MAX_SCAN_IE_LEN: maximum length of information elements
+ * that can be added to a scan request
+ * @NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN: maximum length of information
+ * elements that can be added to a scheduled scan request
+ * @NL80211_ATTR_MAX_MATCH_SETS: maximum number of sets that can be
+ * used with @NL80211_ATTR_SCHED_SCAN_MATCH, a wiphy attribute.
+ *
+ * @NL80211_ATTR_SCAN_FREQUENCIES: nested attribute with frequencies (in MHz)
+ * @NL80211_ATTR_SCAN_SSIDS: nested attribute with SSIDs, leave out for passive
+ * scanning and include a zero-length SSID (wildcard) for wildcard scan
+ * @NL80211_ATTR_BSS: scan result BSS
+ *
+ * @NL80211_ATTR_REG_INITIATOR: indicates who requested the regulatory domain
+ * currently in effect. This could be any of the %NL80211_REGDOM_SET_BY_*
+ * @NL80211_ATTR_REG_TYPE: indicates the type of the regulatory domain currently
+ * set. This can be one of the nl80211_reg_type (%NL80211_REGDOM_TYPE_*)
+ *
+ * @NL80211_ATTR_SUPPORTED_COMMANDS: wiphy attribute that specifies
+ * an array of command numbers (i.e. a mapping index to command number)
+ * that the driver for the given wiphy supports.
+ *
+ * @NL80211_ATTR_FRAME: frame data (binary attribute), including frame header
+ * and body, but not FCS; used, e.g., with NL80211_CMD_AUTHENTICATE and
+ * NL80211_CMD_ASSOCIATE events
+ * @NL80211_ATTR_SSID: SSID (binary attribute, 0..32 octets)
+ * @NL80211_ATTR_AUTH_TYPE: AuthenticationType, see &enum nl80211_auth_type,
+ * represented as a u32
+ * @NL80211_ATTR_REASON_CODE: ReasonCode for %NL80211_CMD_DEAUTHENTICATE and
+ * %NL80211_CMD_DISASSOCIATE, u16
+ *
+ * @NL80211_ATTR_KEY_TYPE: Key Type, see &enum nl80211_key_type, represented as
+ * a u32
+ *
+ * @NL80211_ATTR_FREQ_BEFORE: A channel which has suffered a regulatory change
+ * due to considerations from a beacon hint. This attribute reflects
+ * the state of the channel _before_ the beacon hint processing. This
+ * attributes consists of a nested attribute containing
+ * NL80211_FREQUENCY_ATTR_*
+ * @NL80211_ATTR_FREQ_AFTER: A channel which has suffered a regulatory change
+ * due to considerations from a beacon hint. This attribute reflects
+ * the state of the channel _after_ the beacon hint processing. This
+ * attributes consists of a nested attribute containing
+ * NL80211_FREQUENCY_ATTR_*
+ *
+ * @NL80211_ATTR_CIPHER_SUITES: a set of u32 values indicating the supported
+ * cipher suites
+ *
+ * @NL80211_ATTR_FREQ_FIXED: a flag indicating the IBSS should not try to look
+ * for other networks on different channels
+ *
+ * @NL80211_ATTR_TIMED_OUT: a flag indicating than an operation timed out; this
+ * is used, e.g., with %NL80211_CMD_AUTHENTICATE event
+ *
+ * @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is
+ * used for the association (&enum nl80211_mfp, represented as a u32);
+ * this attribute can be used with %NL80211_CMD_ASSOCIATE and
+ * %NL80211_CMD_CONNECT requests. %NL80211_MFP_OPTIONAL is not allowed for
+ * %NL80211_CMD_ASSOCIATE since user space SME is expected and hence, it
+ * must have decided whether to use management frame protection or not.
+ * Setting %NL80211_MFP_OPTIONAL with a %NL80211_CMD_CONNECT request will
+ * let the driver (or the firmware) decide whether to use MFP or not.
+ *
+ * @NL80211_ATTR_STA_FLAGS2: Attribute containing a
+ * &struct nl80211_sta_flag_update.
+ *
+ * @NL80211_ATTR_CONTROL_PORT: A flag indicating whether user space controls
+ * IEEE 802.1X port, i.e., sets/clears %NL80211_STA_FLAG_AUTHORIZED, in
+ * station mode. If the flag is included in %NL80211_CMD_ASSOCIATE
+ * request, the driver will assume that the port is unauthorized until
+ * authorized by user space. Otherwise, port is marked authorized by
+ * default in station mode.
+ * @NL80211_ATTR_CONTROL_PORT_ETHERTYPE: A 16-bit value indicating the
+ * ethertype that will be used for key negotiation. It can be
+ * specified with the associate and connect commands. If it is not
+ * specified, the value defaults to 0x888E (PAE, 802.1X). This
+ * attribute is also used as a flag in the wiphy information to
+ * indicate that protocols other than PAE are supported.
+ * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with
+ * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom
+ * ethertype frames used for key negotiation must not be encrypted.
+ * @NL80211_ATTR_CONTROL_PORT_OVER_NL80211: A flag indicating whether control
+ * port frames (e.g. of type given in %NL80211_ATTR_CONTROL_PORT_ETHERTYPE)
+ * will be sent directly to the network interface or sent via the NL80211
+ * socket. If this attribute is missing, then legacy behavior of sending
+ * control port frames directly to the network interface is used. If the
+ * flag is included, then control port frames are sent over NL80211 instead
+ * using %CMD_CONTROL_PORT_FRAME. If control port routing over NL80211 is
+ * to be used then userspace must also use the %NL80211_ATTR_SOCKET_OWNER
+ * flag.
+ *
+ * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver.
+ * We recommend using nested, driver-specific attributes within this.
+ *
+ * @NL80211_ATTR_DISCONNECTED_BY_AP: A flag indicating that the DISCONNECT
+ * event was due to the AP disconnecting the station, and not due to
+ * a local disconnect request.
+ * @NL80211_ATTR_STATUS_CODE: StatusCode for the %NL80211_CMD_CONNECT
+ * event (u16)
+ * @NL80211_ATTR_PRIVACY: Flag attribute, used with connect(), indicating
+ * that protected APs should be used. This is also used with NEW_BEACON to
+ * indicate that the BSS is to use protection.
+ *
+ * @NL80211_ATTR_CIPHERS_PAIRWISE: Used with CONNECT, ASSOCIATE, and NEW_BEACON
+ * to indicate which unicast key ciphers will be used with the connection
+ * (an array of u32).
+ * @NL80211_ATTR_CIPHER_GROUP: Used with CONNECT, ASSOCIATE, and NEW_BEACON to
+ * indicate which group key cipher will be used with the connection (a
+ * u32).
+ * @NL80211_ATTR_WPA_VERSIONS: Used with CONNECT, ASSOCIATE, and NEW_BEACON to
+ * indicate which WPA version(s) the AP we want to associate with is using
+ * (a u32 with flags from &enum nl80211_wpa_versions).
+ * @NL80211_ATTR_AKM_SUITES: Used with CONNECT, ASSOCIATE, and NEW_BEACON to
+ * indicate which key management algorithm(s) to use (an array of u32).
+ * This attribute is also sent in response to @NL80211_CMD_GET_WIPHY,
+ * indicating the supported AKM suites, intended for specific drivers which
+ * implement SME and have constraints on which AKMs are supported and also
+ * the cases where an AKM support is offloaded to the driver/firmware.
+ * If there is no such notification from the driver, user space should
+ * assume the driver supports all the AKM suites.
+ *
+ * @NL80211_ATTR_REQ_IE: (Re)association request information elements as
+ * sent out by the card, for ROAM and successful CONNECT events.
+ * @NL80211_ATTR_RESP_IE: (Re)association response information elements as
+ * sent by peer, for ROAM and successful CONNECT events.
+ *
+ * @NL80211_ATTR_PREV_BSSID: previous BSSID, to be used in ASSOCIATE and CONNECT
+ * commands to specify a request to reassociate within an ESS, i.e., to use
+ * Reassociate Request frame (with the value of this attribute in the
+ * Current AP address field) instead of Association Request frame which is
+ * used for the initial association to an ESS.
+ *
+ * @NL80211_ATTR_KEY: key information in a nested attribute with
+ * %NL80211_KEY_* sub-attributes
+ * @NL80211_ATTR_KEYS: array of keys for static WEP keys for connect()
+ * and join_ibss(), key information is in a nested attribute each
+ * with %NL80211_KEY_* sub-attributes
+ *
+ * @NL80211_ATTR_PID: Process ID of a network namespace.
+ *
+ * @NL80211_ATTR_GENERATION: Used to indicate consistent snapshots for
+ * dumps. This number increases whenever the object list being
+ * dumped changes, and as such userspace can verify that it has
+ * obtained a complete and consistent snapshot by verifying that
+ * all dump messages contain the same generation number. If it
+ * changed then the list changed and the dump should be repeated
+ * completely from scratch.
+ *
+ * @NL80211_ATTR_4ADDR: Use 4-address frames on a virtual interface
+ *
+ * @NL80211_ATTR_SURVEY_INFO: survey information about a channel, part of
+ * the survey response for %NL80211_CMD_GET_SURVEY, nested attribute
+ * containing info as possible, see &enum survey_info.
+ *
+ * @NL80211_ATTR_PMKID: PMK material for PMKSA caching.
+ * @NL80211_ATTR_MAX_NUM_PMKIDS: maximum number of PMKIDs a firmware can
+ * cache, a wiphy attribute.
+ *
+ * @NL80211_ATTR_DURATION: Duration of an operation in milliseconds, u32.
+ * @NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION: Device attribute that
+ * specifies the maximum duration that can be requested with the
+ * remain-on-channel operation, in milliseconds, u32.
+ *
+ * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects.
+ *
+ * @NL80211_ATTR_TX_RATES: Nested set of attributes
+ * (enum nl80211_tx_rate_attributes) describing TX rates per band. The
+ * enum nl80211_band value is used as the index (nla_type() of the nested
+ * data. If a band is not included, it will be configured to allow all
+ * rates based on negotiated supported rates information. This attribute
+ * is used with %NL80211_CMD_SET_TX_BITRATE_MASK and with starting AP,
+ * and joining mesh networks (not IBSS yet). In the later case, it must
+ * specify just a single bitrate, which is to be used for the beacon.
+ * The driver must also specify support for this with the extended
+ * features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY,
+ * NL80211_EXT_FEATURE_BEACON_RATE_HT and
+ * NL80211_EXT_FEATURE_BEACON_RATE_VHT.
+ *
+ * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain
+ * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME.
+ * @NL80211_ATTR_FRAME_TYPE: A u16 indicating the frame type/subtype for the
+ * @NL80211_CMD_REGISTER_FRAME command.
+ * @NL80211_ATTR_TX_FRAME_TYPES: wiphy capability attribute, which is a
+ * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing
+ * information about which frame types can be transmitted with
+ * %NL80211_CMD_FRAME.
+ * @NL80211_ATTR_RX_FRAME_TYPES: wiphy capability attribute, which is a
+ * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing
+ * information about which frame types can be registered for RX.
+ *
+ * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was
+ * acknowledged by the recipient.
+ *
+ * @NL80211_ATTR_PS_STATE: powersave state, using &enum nl80211_ps_state values.
+ *
+ * @NL80211_ATTR_CQM: connection quality monitor configuration in a
+ * nested attribute with %NL80211_ATTR_CQM_* sub-attributes.
+ *
+ * @NL80211_ATTR_LOCAL_STATE_CHANGE: Flag attribute to indicate that a command
+ * is requesting a local authentication/association state change without
+ * invoking actual management frame exchange. This can be used with
+ * NL80211_CMD_AUTHENTICATE, NL80211_CMD_DEAUTHENTICATE,
+ * NL80211_CMD_DISASSOCIATE.
+ *
+ * @NL80211_ATTR_AP_ISOLATE: (AP mode) Do not forward traffic between stations
+ * connected to this BSS.
+ *
+ * @NL80211_ATTR_WIPHY_TX_POWER_SETTING: Transmit power setting type. See
+ * &enum nl80211_tx_power_setting for possible values.
+ * @NL80211_ATTR_WIPHY_TX_POWER_LEVEL: Transmit power level in signed mBm units.
+ * This is used in association with @NL80211_ATTR_WIPHY_TX_POWER_SETTING
+ * for non-automatic settings.
+ *
+ * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly
+ * means support for per-station GTKs.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_TX: Bitmap of allowed antennas for transmitting.
+ * This can be used to mask out antennas which are not attached or should
+ * not be used for transmitting. If an antenna is not selected in this
+ * bitmap the hardware is not allowed to transmit on this antenna.
+ *
+ * Each bit represents one antenna, starting with antenna 1 at the first
+ * bit. Depending on which antennas are selected in the bitmap, 802.11n
+ * drivers can derive which chainmasks to use (if all antennas belonging to
+ * a particular chain are disabled this chain should be disabled) and if
+ * a chain has diversity antennas wether diversity should be used or not.
+ * HT capabilities (STBC, TX Beamforming, Antenna selection) can be
+ * derived from the available chains after applying the antenna mask.
+ * Non-802.11n drivers can derive wether to use diversity or not.
+ * Drivers may reject configurations or RX/TX mask combinations they cannot
+ * support by returning -EINVAL.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_RX: Bitmap of allowed antennas for receiving.
+ * This can be used to mask out antennas which are not attached or should
+ * not be used for receiving. If an antenna is not selected in this bitmap
+ * the hardware should not be configured to receive on this antenna.
+ * For a more detailed description see @NL80211_ATTR_WIPHY_ANTENNA_TX.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX: Bitmap of antennas which are available
+ * for configuration as TX antennas via the above parameters.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX: Bitmap of antennas which are available
+ * for configuration as RX antennas via the above parameters.
+ *
+ * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS
+ *
+ * @NL80211_ATTR_OFFCHANNEL_TX_OK: For management frame TX, the frame may be
+ * transmitted on another channel when the channel given doesn't match
+ * the current channel. If the current channel doesn't match and this
+ * flag isn't set, the frame will be rejected. This is also used as an
+ * nl80211 capability flag.
+ *
+ * @NL80211_ATTR_BSS_HT_OPMODE: HT operation mode (u16)
+ *
+ * @NL80211_ATTR_KEY_DEFAULT_TYPES: A nested attribute containing flags
+ * attributes, specifying what a key should be set as default as.
+ * See &enum nl80211_key_default_types.
+ *
+ * @NL80211_ATTR_MESH_SETUP: Optional mesh setup parameters. These cannot be
+ * changed once the mesh is active.
+ * @NL80211_ATTR_MESH_CONFIG: Mesh configuration parameters, a nested attribute
+ * containing attributes from &enum nl80211_meshconf_params.
+ * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver
+ * allows auth frames in a mesh to be passed to userspace for processing via
+ * the @NL80211_MESH_SETUP_USERSPACE_AUTH flag.
+ * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as defined in
+ * &enum nl80211_plink_state. Used when userspace is driving the peer link
+ * management state machine. @NL80211_MESH_SETUP_USERSPACE_AMPE or
+ * @NL80211_MESH_SETUP_USERSPACE_MPM must be enabled.
+ *
+ * @NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED: indicates, as part of the wiphy
+ * capabilities, the supported WoWLAN triggers
+ * @NL80211_ATTR_WOWLAN_TRIGGERS: used by %NL80211_CMD_SET_WOWLAN to
+ * indicate which WoW triggers should be enabled. This is also
+ * used by %NL80211_CMD_GET_WOWLAN to get the currently enabled WoWLAN
+ * triggers.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_INTERVAL: Interval between scheduled scan
+ * cycles, in msecs.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_MATCH: Nested attribute with one or more
+ * sets of attributes to match during scheduled scans. Only BSSs
+ * that match any of the sets will be reported. These are
+ * pass-thru filter rules.
+ * For a match to succeed, the BSS must match all attributes of a
+ * set. Since not every hardware supports matching all types of
+ * attributes, there is no guarantee that the reported BSSs are
+ * fully complying with the match sets and userspace needs to be
+ * able to ignore them by itself.
+ * Thus, the implementation is somewhat hardware-dependent, but
+ * this is only an optimization and the userspace application
+ * needs to handle all the non-filtered results anyway.
+ * If the match attributes don't make sense when combined with
+ * the values passed in @NL80211_ATTR_SCAN_SSIDS (eg. if an SSID
+ * is included in the probe request, but the match attributes
+ * will never let it go through), -EINVAL may be returned.
+ * If omitted, no filtering is done.
+ *
+ * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported
+ * interface combinations. In each nested item, it contains attributes
+ * defined in &enum nl80211_if_combination_attrs.
+ * @NL80211_ATTR_SOFTWARE_IFTYPES: Nested attribute (just like
+ * %NL80211_ATTR_SUPPORTED_IFTYPES) containing the interface types that
+ * are managed in software: interfaces of these types aren't subject to
+ * any restrictions in their number or combinations.
+ *
+ * @NL80211_ATTR_REKEY_DATA: nested attribute containing the information
+ * necessary for GTK rekeying in the device, see &enum nl80211_rekey_data.
+ *
+ * @NL80211_ATTR_SCAN_SUPP_RATES: rates per to be advertised as supported in scan,
+ * nested array attribute containing an entry for each band, with the entry
+ * being a list of supported rates as defined by IEEE 802.11 7.3.2.2 but
+ * without the length restriction (at most %NL80211_MAX_SUPP_RATES).
+ *
+ * @NL80211_ATTR_HIDDEN_SSID: indicates whether SSID is to be hidden from Beacon
+ * and Probe Response (when response to wildcard Probe Request); see
+ * &enum nl80211_hidden_ssid, represented as a u32
+ *
+ * @NL80211_ATTR_IE_PROBE_RESP: Information element(s) for Probe Response frame.
+ * This is used with %NL80211_CMD_NEW_BEACON and %NL80211_CMD_SET_BEACON to
+ * provide extra IEs (e.g., WPS/P2P IE) into Probe Response frames when the
+ * driver (or firmware) replies to Probe Request frames.
+ * @NL80211_ATTR_IE_ASSOC_RESP: Information element(s) for (Re)Association
+ * Response frames. This is used with %NL80211_CMD_NEW_BEACON and
+ * %NL80211_CMD_SET_BEACON to provide extra IEs (e.g., WPS/P2P IE) into
+ * (Re)Association Response frames when the driver (or firmware) replies to
+ * (Re)Association Request frames.
+ *
+ * @NL80211_ATTR_STA_WME: Nested attribute containing the wme configuration
+ * of the station, see &enum nl80211_sta_wme_attr.
+ * @NL80211_ATTR_SUPPORT_AP_UAPSD: the device supports uapsd when working
+ * as AP.
+ *
+ * @NL80211_ATTR_ROAM_SUPPORT: Indicates whether the firmware is capable of
+ * roaming to another AP in the same ESS if the signal lever is low.
+ *
+ * @NL80211_ATTR_PMKSA_CANDIDATE: Nested attribute containing the PMKSA caching
+ * candidate information, see &enum nl80211_pmksa_candidate_attr.
+ *
+ * @NL80211_ATTR_TX_NO_CCK_RATE: Indicates whether to use CCK rate or not
+ * for management frames transmission. In order to avoid p2p probe/action
+ * frames are being transmitted at CCK rate in 2GHz band, the user space
+ * applications use this attribute.
+ * This attribute is used with %NL80211_CMD_TRIGGER_SCAN and
+ * %NL80211_CMD_FRAME commands.
+ *
+ * @NL80211_ATTR_TDLS_ACTION: Low level TDLS action code (e.g. link setup
+ * request, link setup confirm, link teardown, etc.). Values are
+ * described in the TDLS (802.11z) specification.
+ * @NL80211_ATTR_TDLS_DIALOG_TOKEN: Non-zero token for uniquely identifying a
+ * TDLS conversation between two devices.
+ * @NL80211_ATTR_TDLS_OPERATION: High level TDLS operation; see
+ * &enum nl80211_tdls_operation, represented as a u8.
+ * @NL80211_ATTR_TDLS_SUPPORT: A flag indicating the device can operate
+ * as a TDLS peer sta.
+ * @NL80211_ATTR_TDLS_EXTERNAL_SETUP: The TDLS discovery/setup and teardown
+ * procedures should be performed by sending TDLS packets via
+ * %NL80211_CMD_TDLS_MGMT. Otherwise %NL80211_CMD_TDLS_OPER should be
+ * used for asking the driver to perform a TDLS operation.
+ *
+ * @NL80211_ATTR_DEVICE_AP_SME: This u32 attribute may be listed for devices
+ * that have AP support to indicate that they have the AP SME integrated
+ * with support for the features listed in this attribute, see
+ * &enum nl80211_ap_sme_features.
+ *
+ * @NL80211_ATTR_DONT_WAIT_FOR_ACK: Used with %NL80211_CMD_FRAME, this tells
+ * the driver to not wait for an acknowledgement. Note that due to this,
+ * it will also not give a status callback nor return a cookie. This is
+ * mostly useful for probe responses to save airtime.
+ *
+ * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from
+ * &enum nl80211_feature_flags and is advertised in wiphy information.
+ * @NL80211_ATTR_PROBE_RESP_OFFLOAD: Indicates that the HW responds to probe
+ * requests while operating in AP-mode.
+ * This attribute holds a bitmap of the supported protocols for
+ * offloading (see &enum nl80211_probe_resp_offload_support_attr).
+ *
+ * @NL80211_ATTR_PROBE_RESP: Probe Response template data. Contains the entire
+ * probe-response frame. The DA field in the 802.11 header is zero-ed out,
+ * to be filled by the FW.
+ * @NL80211_ATTR_DISABLE_HT: Force HT capable interfaces to disable
+ * this feature. Currently, only supported in mac80211 drivers.
+ * @NL80211_ATTR_HT_CAPABILITY_MASK: Specify which bits of the
+ * ATTR_HT_CAPABILITY to which attention should be paid.
+ * Currently, only mac80211 NICs support this feature.
+ * The values that may be configured are:
+ * MCS rates, MAX-AMSDU, HT-20-40 and HT_CAP_SGI_40
+ * AMPDU density and AMPDU factor.
+ * All values are treated as suggestions and may be ignored
+ * by the driver as required. The actual values may be seen in
+ * the station debugfs ht_caps file.
+ *
+ * @NL80211_ATTR_DFS_REGION: region for regulatory rules which this country
+ * abides to when initiating radiation on DFS channels. A country maps
+ * to one DFS region.
+ *
+ * @NL80211_ATTR_NOACK_MAP: This u16 bitmap contains the No Ack Policy of
+ * up to 16 TIDs.
+ *
+ * @NL80211_ATTR_INACTIVITY_TIMEOUT: timeout value in seconds, this can be
+ * used by the drivers which has MLME in firmware and does not have support
+ * to report per station tx/rx activity to free up the station entry from
+ * the list. This needs to be used when the driver advertises the
+ * capability to timeout the stations.
+ *
+ * @NL80211_ATTR_RX_SIGNAL_DBM: signal strength in dBm (as a 32-bit int);
+ * this attribute is (depending on the driver capabilities) added to
+ * received frames indicated with %NL80211_CMD_FRAME.
+ *
+ * @NL80211_ATTR_BG_SCAN_PERIOD: Background scan period in seconds
+ * or 0 to disable background scan.
+ *
+ * @NL80211_ATTR_USER_REG_HINT_TYPE: type of regulatory hint passed from
+ * userspace. If unset it is assumed the hint comes directly from
+ * a user. If set code could specify exactly what type of source
+ * was used to provide the hint. For the different types of
+ * allowed user regulatory hints see nl80211_user_reg_hint_type.
+ *
+ * @NL80211_ATTR_CONN_FAILED_REASON: The reason for which AP has rejected
+ * the connection request from a station. nl80211_connect_failed_reason
+ * enum has different reasons of connection failure.
+ *
+ * @NL80211_ATTR_AUTH_DATA: Fields and elements in Authentication frames.
+ * This contains the authentication frame body (non-IE and IE data),
+ * excluding the Authentication algorithm number, i.e., starting at the
+ * Authentication transaction sequence number field. It is used with
+ * authentication algorithms that need special fields to be added into
+ * the frames (SAE and FILS). Currently, only the SAE cases use the
+ * initial two fields (Authentication transaction sequence number and
+ * Status code). However, those fields are included in the attribute data
+ * for all authentication algorithms to keep the attribute definition
+ * consistent.
+ *
+ * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from
+ * association request when used with NL80211_CMD_NEW_STATION)
+ *
+ * @NL80211_ATTR_SCAN_FLAGS: scan request control flags (u32)
+ *
+ * @NL80211_ATTR_P2P_CTWINDOW: P2P GO Client Traffic Window (u8), used with
+ * the START_AP and SET_BSS commands
+ * @NL80211_ATTR_P2P_OPPPS: P2P GO opportunistic PS (u8), used with the
+ * START_AP and SET_BSS commands. This can have the values 0 or 1;
+ * if not given in START_AP 0 is assumed, if not given in SET_BSS
+ * no change is made.
+ *
+ * @NL80211_ATTR_LOCAL_MESH_POWER_MODE: local mesh STA link-specific power mode
+ * defined in &enum nl80211_mesh_power_mode.
+ *
+ * @NL80211_ATTR_ACL_POLICY: ACL policy, see &enum nl80211_acl_policy,
+ * carried in a u32 attribute
+ *
+ * @NL80211_ATTR_MAC_ADDRS: Array of nested MAC addresses, used for
+ * MAC ACL.
+ *
+ * @NL80211_ATTR_MAC_ACL_MAX: u32 attribute to advertise the maximum
+ * number of MAC addresses that a device can support for MAC
+ * ACL.
+ *
+ * @NL80211_ATTR_RADAR_EVENT: Type of radar event for notification to userspace,
+ * contains a value of enum nl80211_radar_event (u32).
+ *
+ * @NL80211_ATTR_EXT_CAPA: 802.11 extended capabilities that the kernel driver
+ * has and handles. The format is the same as the IE contents. See
+ * 802.11-2012 8.4.2.29 for more information.
+ * @NL80211_ATTR_EXT_CAPA_MASK: Extended capabilities that the kernel driver
+ * has set in the %NL80211_ATTR_EXT_CAPA value, for multibit fields.
+ *
+ * @NL80211_ATTR_STA_CAPABILITY: Station capabilities (u16) are advertised to
+ * the driver, e.g., to enable TDLS power save (PU-APSD).
+ *
+ * @NL80211_ATTR_STA_EXT_CAPABILITY: Station extended capabilities are
+ * advertised to the driver, e.g., to enable TDLS off channel operations
+ * and PU-APSD.
+ *
+ * @NL80211_ATTR_PROTOCOL_FEATURES: global nl80211 feature flags, see
+ * &enum nl80211_protocol_features, the attribute is a u32.
+ *
+ * @NL80211_ATTR_SPLIT_WIPHY_DUMP: flag attribute, userspace supports
+ * receiving the data for a single wiphy split across multiple
+ * messages, given with wiphy dump message
+ *
+ * @NL80211_ATTR_MDID: Mobility Domain Identifier
+ *
+ * @NL80211_ATTR_IE_RIC: Resource Information Container Information
+ * Element
+ *
+ * @NL80211_ATTR_CRIT_PROT_ID: critical protocol identifier requiring increased
+ * reliability, see &enum nl80211_crit_proto_id (u16).
+ * @NL80211_ATTR_MAX_CRIT_PROT_DURATION: duration in milliseconds in which
+ * the connection should have increased reliability (u16).
+ *
+ * @NL80211_ATTR_PEER_AID: Association ID for the peer TDLS station (u16).
+ * This is similar to @NL80211_ATTR_STA_AID but with a difference of being
+ * allowed to be used with the first @NL80211_CMD_SET_STATION command to
+ * update a TDLS peer STA entry.
+ *
+ * @NL80211_ATTR_COALESCE_RULE: Coalesce rule information.
+ *
+ * @NL80211_ATTR_CH_SWITCH_COUNT: u32 attribute specifying the number of TBTT's
+ * until the channel switch event.
+ * @NL80211_ATTR_CH_SWITCH_BLOCK_TX: flag attribute specifying that transmission
+ * must be blocked on the current channel (before the channel switch
+ * operation).
+ * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information
+ * for the time while performing a channel switch.
+ * @NL80211_ATTR_CSA_C_OFF_BEACON: An array of offsets (u16) to the channel
+ * switch counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
+ * @NL80211_ATTR_CSA_C_OFF_PRESP: An array of offsets (u16) to the channel
+ * switch counters in the probe response (%NL80211_ATTR_PROBE_RESP).
+ *
+ * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32.
+ * As specified in the &enum nl80211_rxmgmt_flags.
+ *
+ * @NL80211_ATTR_STA_SUPPORTED_CHANNELS: array of supported channels.
+ *
+ * @NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES: array of supported
+ * supported operating classes.
+ *
+ * @NL80211_ATTR_HANDLE_DFS: A flag indicating whether user space
+ * controls DFS operation in IBSS mode. If the flag is included in
+ * %NL80211_CMD_JOIN_IBSS request, the driver will allow use of DFS
+ * channels and reports radar events to userspace. Userspace is required
+ * to react to radar events, e.g. initiate a channel switch or leave the
+ * IBSS network.
+ *
+ * @NL80211_ATTR_SUPPORT_5_MHZ: A flag indicating that the device supports
+ * 5 MHz channel bandwidth.
+ * @NL80211_ATTR_SUPPORT_10_MHZ: A flag indicating that the device supports
+ * 10 MHz channel bandwidth.
+ *
+ * @NL80211_ATTR_OPMODE_NOTIF: Operating mode field from Operating Mode
+ * Notification Element based on association request when used with
+ * %NL80211_CMD_NEW_STATION or %NL80211_CMD_SET_STATION (only when
+ * %NL80211_FEATURE_FULL_AP_CLIENT_STATE is supported, or with TDLS);
+ * u8 attribute.
+ *
+ * @NL80211_ATTR_VENDOR_ID: The vendor ID, either a 24-bit OUI or, if
+ * %NL80211_VENDOR_ID_IS_LINUX is set, a special Linux ID (not used yet)
+ * @NL80211_ATTR_VENDOR_SUBCMD: vendor sub-command
+ * @NL80211_ATTR_VENDOR_DATA: data for the vendor command, if any; this
+ * attribute is also used for vendor command feature advertisement
+ * @NL80211_ATTR_VENDOR_EVENTS: used for event list advertising in the wiphy
+ * info, containing a nested array of possible events
+ *
+ * @NL80211_ATTR_QOS_MAP: IP DSCP mapping for Interworking QoS mapping. This
+ * data is in the format defined for the payload of the QoS Map Set element
+ * in IEEE Std 802.11-2012, 8.4.2.97.
+ *
+ * @NL80211_ATTR_MAC_HINT: MAC address recommendation as initial BSS
+ * @NL80211_ATTR_WIPHY_FREQ_HINT: frequency of the recommended initial BSS
+ *
+ * @NL80211_ATTR_MAX_AP_ASSOC_STA: Device attribute that indicates how many
+ * associated stations are supported in AP mode (including P2P GO); u32.
+ * Since drivers may not have a fixed limit on the maximum number (e.g.,
+ * other concurrent operations may affect this), drivers are allowed to
+ * advertise values that cannot always be met. In such cases, an attempt
+ * to add a new station entry with @NL80211_CMD_NEW_STATION may fail.
+ *
+ * @NL80211_ATTR_CSA_C_OFFSETS_TX: An array of csa counter offsets (u16) which
+ * should be updated when the frame is transmitted.
+ * @NL80211_ATTR_MAX_CSA_COUNTERS: U8 attribute used to advertise the maximum
+ * supported number of csa counters.
+ *
+ * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32.
+ * As specified in the &enum nl80211_tdls_peer_capability.
+ *
+ * @NL80211_ATTR_SOCKET_OWNER: Flag attribute, if set during interface
+ * creation then the new interface will be owned by the netlink socket
+ * that created it and will be destroyed when the socket is closed.
+ * If set during scheduled scan start then the new scan req will be
+ * owned by the netlink socket that created it and the scheduled scan will
+ * be stopped when the socket is closed.
+ * If set during configuration of regulatory indoor operation then the
+ * regulatory indoor configuration would be owned by the netlink socket
+ * that configured the indoor setting, and the indoor operation would be
+ * cleared when the socket is closed.
+ * If set during NAN interface creation, the interface will be destroyed
+ * if the socket is closed just like any other interface. Moreover, NAN
+ * notifications will be sent in unicast to that socket. Without this
+ * attribute, the notifications will be sent to the %NL80211_MCGRP_NAN
+ * multicast group.
+ * If set during %NL80211_CMD_ASSOCIATE or %NL80211_CMD_CONNECT the
+ * station will deauthenticate when the socket is closed.
+ * If set during %NL80211_CMD_JOIN_IBSS the IBSS will be automatically
+ * torn down when the socket is closed.
+ * If set during %NL80211_CMD_JOIN_MESH the mesh setup will be
+ * automatically torn down when the socket is closed.
+ * If set during %NL80211_CMD_START_AP the AP will be automatically
+ * disabled when the socket is closed.
+ *
+ * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
+ * the TDLS link initiator.
+ *
+ * @NL80211_ATTR_USE_RRM: flag for indicating whether the current connection
+ * shall support Radio Resource Measurements (11k). This attribute can be
+ * used with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests.
+ * User space applications are expected to use this flag only if the
+ * underlying device supports these minimal RRM features:
+ * %NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES,
+ * %NL80211_FEATURE_QUIET,
+ * Or, if global RRM is supported, see:
+ * %NL80211_EXT_FEATURE_RRM
+ * If this flag is used, driver must add the Power Capabilities IE to the
+ * association request. In addition, it must also set the RRM capability
+ * flag in the association request's Capability Info field.
+ *
+ * @NL80211_ATTR_WIPHY_DYN_ACK: flag attribute used to enable ACK timeout
+ * estimation algorithm (dynack). In order to activate dynack
+ * %NL80211_FEATURE_ACKTO_ESTIMATION feature flag must be set by lower
+ * drivers to indicate dynack capability. Dynack is automatically disabled
+ * setting valid value for coverage class.
+ *
+ * @NL80211_ATTR_TSID: a TSID value (u8 attribute)
+ * @NL80211_ATTR_USER_PRIO: user priority value (u8 attribute)
+ * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds
+ * (per second) (u16 attribute)
+ *
+ * @NL80211_ATTR_SMPS_MODE: SMPS mode to use (ap mode). see
+ * &enum nl80211_smps_mode.
+ *
+ * @NL80211_ATTR_OPER_CLASS: operating class
+ *
+ * @NL80211_ATTR_MAC_MASK: MAC address mask
+ *
+ * @NL80211_ATTR_WIPHY_SELF_MANAGED_REG: flag attribute indicating this device
+ * is self-managing its regulatory information and any regulatory domain
+ * obtained from it is coming from the device's wiphy and not the global
+ * cfg80211 regdomain.
+ *
+ * @NL80211_ATTR_EXT_FEATURES: extended feature flags contained in a byte
+ * array. The feature flags are identified by their bit index (see &enum
+ * nl80211_ext_feature_index). The bit index is ordered starting at the
+ * least-significant bit of the first byte in the array, ie. bit index 0
+ * is located at bit 0 of byte 0. bit index 25 would be located at bit 1
+ * of byte 3 (u8 array).
+ *
+ * @NL80211_ATTR_SURVEY_RADIO_STATS: Request overall radio statistics to be
+ * returned along with other survey data. If set, @NL80211_CMD_GET_SURVEY
+ * may return a survey entry without a channel indicating global radio
+ * statistics (only some values are valid and make sense.)
+ * For devices that don't return such an entry even then, the information
+ * should be contained in the result as the sum of the respective counters
+ * over all channels.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before the first cycle of a
+ * scheduled scan is started. Or the delay before a WoWLAN
+ * net-detect scan is started, counting from the moment the
+ * system is suspended. This value is a u32, in seconds.
+
+ * @NL80211_ATTR_REG_INDOOR: flag attribute, if set indicates that the device
+ * is operating in an indoor environment.
+ *
+ * @NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS: maximum number of scan plans for
+ * scheduled scan supported by the device (u32), a wiphy attribute.
+ * @NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL: maximum interval (in seconds) for
+ * a scan plan (u32), a wiphy attribute.
+ * @NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS: maximum number of iterations in
+ * a scan plan (u32), a wiphy attribute.
+ * @NL80211_ATTR_SCHED_SCAN_PLANS: a list of scan plans for scheduled scan.
+ * Each scan plan defines the number of scan iterations and the interval
+ * between scans. The last scan plan will always run infinitely,
+ * thus it must not specify the number of iterations, only the interval
+ * between scans. The scan plans are executed sequentially.
+ * Each scan plan is a nested attribute of &enum nl80211_sched_scan_plan.
+ * @NL80211_ATTR_PBSS: flag attribute. If set it means operate
+ * in a PBSS. Specified in %NL80211_CMD_CONNECT to request
+ * connecting to a PCP, and in %NL80211_CMD_START_AP to start
+ * a PCP instead of AP. Relevant for DMG networks only.
+ * @NL80211_ATTR_BSS_SELECT: nested attribute for driver supporting the
+ * BSS selection feature. When used with %NL80211_CMD_GET_WIPHY it contains
+ * attributes according &enum nl80211_bss_select_attr to indicate what
+ * BSS selection behaviours are supported. When used with %NL80211_CMD_CONNECT
+ * it contains the behaviour-specific attribute containing the parameters for
+ * BSS selection to be done by driver and/or firmware.
+ *
+ * @NL80211_ATTR_STA_SUPPORT_P2P_PS: whether P2P PS mechanism supported
+ * or not. u8, one of the values of &enum nl80211_sta_p2p_ps_status
+ *
+ * @NL80211_ATTR_PAD: attribute used for padding for 64-bit alignment
+ *
+ * @NL80211_ATTR_IFTYPE_EXT_CAPA: Nested attribute of the following attributes:
+ * %NL80211_ATTR_IFTYPE, %NL80211_ATTR_EXT_CAPA,
+ * %NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities per
+ * interface type.
+ *
+ * @NL80211_ATTR_MU_MIMO_GROUP_DATA: array of 24 bytes that defines a MU-MIMO
+ * groupID for monitor mode.
+ * The first 8 bytes are a mask that defines the membership in each
+ * group (there are 64 groups, group 0 and 63 are reserved),
+ * each bit represents a group and set to 1 for being a member in
+ * that group and 0 for not being a member.
+ * The remaining 16 bytes define the position in each group: 2 bits for
+ * each group.
+ * (smaller group numbers represented on most significant bits and bigger
+ * group numbers on least significant bits.)
+ * This attribute is used only if all interfaces are in monitor mode.
+ * Set this attribute in order to monitor packets using the given MU-MIMO
+ * groupID data.
+ * to turn off that feature set all the bits of the groupID to zero.
+ * @NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR: mac address for the sniffer to follow
+ * when using MU-MIMO air sniffer.
+ * to turn that feature off set an invalid mac address
+ * (e.g. FF:FF:FF:FF:FF:FF)
+ *
+ * @NL80211_ATTR_SCAN_START_TIME_TSF: The time at which the scan was actually
+ * started (u64). The time is the TSF of the BSS the interface that
+ * requested the scan is connected to (if available, otherwise this
+ * attribute must not be included).
+ * @NL80211_ATTR_SCAN_START_TIME_TSF_BSSID: The BSS according to which
+ * %NL80211_ATTR_SCAN_START_TIME_TSF is set.
+ * @NL80211_ATTR_MEASUREMENT_DURATION: measurement duration in TUs (u16). If
+ * %NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY is not set, this is the
+ * maximum measurement duration allowed. This attribute is used with
+ * measurement requests. It can also be used with %NL80211_CMD_TRIGGER_SCAN
+ * if the scan is used for beacon report radio measurement.
+ * @NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY: flag attribute that indicates
+ * that the duration specified with %NL80211_ATTR_MEASUREMENT_DURATION is
+ * mandatory. If this flag is not set, the duration is the maximum duration
+ * and the actual measurement duration may be shorter.
+ *
+ * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is
+ * used to pull the stored data for mesh peer in power save state.
+ *
+ * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by
+ * %NL80211_CMD_START_NAN and optionally with
+ * %NL80211_CMD_CHANGE_NAN_CONFIG. Its type is u8 and it can't be 0.
+ * Also, values 1 and 255 are reserved for certification purposes and
+ * should not be used during a normal device operation.
+ * @NL80211_ATTR_BANDS: operating bands configuration. This is a u32
+ * bitmask of BIT(NL80211_BAND_*) as described in %enum
+ * nl80211_band. For instance, for NL80211_BAND_2GHZ, bit 0
+ * would be set. This attribute is used with
+ * %NL80211_CMD_START_NAN and %NL80211_CMD_CHANGE_NAN_CONFIG, and
+ * it is optional. If no bands are set, it means don't-care and
+ * the device will decide what to use.
+ * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See
+ * &enum nl80211_nan_func_attributes for description of this nested
+ * attribute.
+ * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute.
+ * See &enum nl80211_nan_match_attributes.
+ * @NL80211_ATTR_FILS_KEK: KEK for FILS (Re)Association Request/Response frame
+ * protection.
+ * @NL80211_ATTR_FILS_NONCES: Nonces (part of AAD) for FILS (Re)Association
+ * Request/Response frame protection. This attribute contains the 16 octet
+ * STA Nonce followed by 16 octets of AP Nonce.
+ *
+ * @NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED: Indicates whether or not multicast
+ * packets should be send out as unicast to all stations (flag attribute).
+ *
+ * @NL80211_ATTR_BSSID: The BSSID of the AP. Note that %NL80211_ATTR_MAC is also
+ * used in various commands/events for specifying the BSSID.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI: Relative RSSI threshold by which
+ * other BSSs has to be better or slightly worse than the current
+ * connected BSS so that they get reported to user space.
+ * This will give an opportunity to userspace to consider connecting to
+ * other matching BSSs which have better or slightly worse RSSI than
+ * the current connected BSS by using an offloaded operation to avoid
+ * unnecessary wakeups.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST: When present the RSSI level for BSSs in
+ * the specified band is to be adjusted before doing
+ * %NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI based comparison to figure out
+ * better BSSs. The attribute value is a packed structure
+ * value as specified by &struct nl80211_bss_select_rssi_adjust.
+ *
+ * @NL80211_ATTR_TIMEOUT_REASON: The reason for which an operation timed out.
+ * u32 attribute with an &enum nl80211_timeout_reason value. This is used,
+ * e.g., with %NL80211_CMD_CONNECT event.
+ *
+ * @NL80211_ATTR_FILS_ERP_USERNAME: EAP Re-authentication Protocol (ERP)
+ * username part of NAI used to refer keys rRK and rIK. This is used with
+ * %NL80211_CMD_CONNECT.
+ *
+ * @NL80211_ATTR_FILS_ERP_REALM: EAP Re-authentication Protocol (ERP) realm part
+ * of NAI specifying the domain name of the ER server. This is used with
+ * %NL80211_CMD_CONNECT.
+ *
+ * @NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM: Unsigned 16-bit ERP next sequence number
+ * to use in ERP messages. This is used in generating the FILS wrapped data
+ * for FILS authentication and is used with %NL80211_CMD_CONNECT.
+ *
+ * @NL80211_ATTR_FILS_ERP_RRK: ERP re-authentication Root Key (rRK) for the
+ * NAI specified by %NL80211_ATTR_FILS_ERP_USERNAME and
+ * %NL80211_ATTR_FILS_ERP_REALM. This is used for generating rIK and rMSK
+ * from successful FILS authentication and is used with
+ * %NL80211_CMD_CONNECT.
+ *
+ * @NL80211_ATTR_FILS_CACHE_ID: A 2-octet identifier advertized by a FILS AP
+ * identifying the scope of PMKSAs. This is used with
+ * @NL80211_CMD_SET_PMKSA and @NL80211_CMD_DEL_PMKSA.
+ *
+ * @NL80211_ATTR_PMK: attribute for passing PMK key material. Used with
+ * %NL80211_CMD_SET_PMKSA for the PMKSA identified by %NL80211_ATTR_PMKID.
+ * For %NL80211_CMD_CONNECT it is used to provide PSK for offloading 4-way
+ * handshake for WPA/WPA2-PSK networks. For 802.1X authentication it is
+ * used with %NL80211_CMD_SET_PMK. For offloaded FT support this attribute
+ * specifies the PMK-R0 if NL80211_ATTR_PMKR0_NAME is included as well.
+ *
+ * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to
+ * indicate that it supports multiple active scheduled scan requests.
+ * @NL80211_ATTR_SCHED_SCAN_MAX_REQS: indicates maximum number of scheduled
+ * scan request that may be active for the device (u32).
+ *
+ * @NL80211_ATTR_WANT_1X_4WAY_HS: flag attribute which user-space can include
+ * in %NL80211_CMD_CONNECT to indicate that for 802.1X authentication it
+ * wants to use the supported offload of the 4-way handshake.
+ * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT.
+ * @NL80211_ATTR_PORT_AUTHORIZED: (reserved)
+ *
+ * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external
+ * authentication operation (u32 attribute with an
+ * &enum nl80211_external_auth_action value). This is used with the
+ * %NL80211_CMD_EXTERNAL_AUTH request event.
+ * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user
+ * space supports external authentication. This attribute shall be used
+ * with %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP request. The driver
+ * may offload authentication processing to user space if this capability
+ * is indicated in the respective requests from the user space.
+ *
+ * @NL80211_ATTR_NSS: Station's New/updated RX_NSS value notified using this
+ * u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED.
+ *
+ * @NL80211_ATTR_TXQ_STATS: TXQ statistics (nested attribute, see &enum
+ * nl80211_txq_stats)
+ * @NL80211_ATTR_TXQ_LIMIT: Total packet limit for the TXQ queues for this phy.
+ * The smaller of this and the memory limit is enforced.
+ * @NL80211_ATTR_TXQ_MEMORY_LIMIT: Total memory memory limit (in bytes) for the
+ * TXQ queues for this phy. The smaller of this and the packet limit is
+ * enforced.
+ * @NL80211_ATTR_TXQ_QUANTUM: TXQ scheduler quantum (bytes). Number of bytes
+ * a flow is assigned on each round of the DRR scheduler.
+ * @NL80211_ATTR_HE_CAPABILITY: HE Capability information element (from
+ * association request when used with NL80211_CMD_NEW_STATION). Can be set
+ * only if %NL80211_STA_FLAG_WME is set.
+ *
+ * @NL80211_ATTR_FTM_RESPONDER: nested attribute which user-space can include
+ * in %NL80211_CMD_START_AP or %NL80211_CMD_SET_BEACON for fine timing
+ * measurement (FTM) responder functionality and containing parameters as
+ * possible, see &enum nl80211_ftm_responder_attr
+ *
+ * @NL80211_ATTR_FTM_RESPONDER_STATS: Nested attribute with FTM responder
+ * statistics, see &enum nl80211_ftm_responder_stats.
+ *
+ * @NL80211_ATTR_TIMEOUT: Timeout for the given operation in milliseconds (u32),
+ * if the attribute is not given no timeout is requested. Note that 0 is an
+ * invalid value.
+ *
+ * @NL80211_ATTR_PEER_MEASUREMENTS: peer measurements request (and result)
+ * data, uses nested attributes specified in
+ * &enum nl80211_peer_measurement_attrs.
+ * This is also used for capability advertisement in the wiphy information,
+ * with the appropriate sub-attributes.
+ *
+ * @NL80211_ATTR_AIRTIME_WEIGHT: Station's weight when scheduled by the airtime
+ * scheduler.
+ *
+ * @NL80211_ATTR_STA_TX_POWER_SETTING: Transmit power setting type (u8) for
+ * station associated with the AP. See &enum nl80211_tx_power_setting for
+ * possible values.
+ * @NL80211_ATTR_STA_TX_POWER: Transmit power level (s16) in dBm units. This
+ * allows to set Tx power for a station. If this attribute is not included,
+ * the default per-interface tx power setting will be overriding. Driver
+ * should be picking up the lowest tx power, either tx power per-interface
+ * or per-station.
+ *
+ * @NL80211_ATTR_SAE_PASSWORD: attribute for passing SAE password material. It
+ * is used with %NL80211_CMD_CONNECT to provide password for offloading
+ * SAE authentication for WPA3-Personal networks.
+ *
+ * @NL80211_ATTR_TWT_RESPONDER: Enable target wait time responder support.
+ *
+ * @NL80211_ATTR_HE_OBSS_PD: nested attribute for OBSS Packet Detection
+ * functionality.
+ *
+ * @NL80211_ATTR_WIPHY_EDMG_CHANNELS: bitmap that indicates the 2.16 GHz
+ * channel(s) that are allowed to be used for EDMG transmissions.
+ * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251. (u8 attribute)
+ * @NL80211_ATTR_WIPHY_EDMG_BW_CONFIG: Channel BW Configuration subfield encodes
+ * the allowed channel bandwidth configurations. (u8 attribute)
+ * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13.
+ *
+ * @NUM_NL80211_ATTR: total number of nl80211_attrs available
+ * @NL80211_ATTR_MAX: highest attribute number currently defined
+ * @__NL80211_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_attrs {
+/* don't change the order or add anything between, this is ABI! */
+ NL80211_ATTR_UNSPEC,
+
+ NL80211_ATTR_WIPHY,
+ NL80211_ATTR_WIPHY_NAME,
+
+ NL80211_ATTR_IFINDEX,
+ NL80211_ATTR_IFNAME,
+ NL80211_ATTR_IFTYPE,
+
+ NL80211_ATTR_MAC,
+
+ NL80211_ATTR_KEY_DATA,
+ NL80211_ATTR_KEY_IDX,
+ NL80211_ATTR_KEY_CIPHER,
+ NL80211_ATTR_KEY_SEQ,
+ NL80211_ATTR_KEY_DEFAULT,
+
+ NL80211_ATTR_BEACON_INTERVAL,
+ NL80211_ATTR_DTIM_PERIOD,
+ NL80211_ATTR_BEACON_HEAD,
+ NL80211_ATTR_BEACON_TAIL,
+
+ NL80211_ATTR_STA_AID,
+ NL80211_ATTR_STA_FLAGS,
+ NL80211_ATTR_STA_LISTEN_INTERVAL,
+ NL80211_ATTR_STA_SUPPORTED_RATES,
+ NL80211_ATTR_STA_VLAN,
+ NL80211_ATTR_STA_INFO,
+
+ NL80211_ATTR_WIPHY_BANDS,
+
+ NL80211_ATTR_MNTR_FLAGS,
+
+ NL80211_ATTR_MESH_ID,
+ NL80211_ATTR_STA_PLINK_ACTION,
+ NL80211_ATTR_MPATH_NEXT_HOP,
+ NL80211_ATTR_MPATH_INFO,
+
+ NL80211_ATTR_BSS_CTS_PROT,
+ NL80211_ATTR_BSS_SHORT_PREAMBLE,
+ NL80211_ATTR_BSS_SHORT_SLOT_TIME,
+
+ NL80211_ATTR_HT_CAPABILITY,
+
+ NL80211_ATTR_SUPPORTED_IFTYPES,
+
+ NL80211_ATTR_REG_ALPHA2,
+ NL80211_ATTR_REG_RULES,
+
+ NL80211_ATTR_MESH_CONFIG,
+
+ NL80211_ATTR_BSS_BASIC_RATES,
+
+ NL80211_ATTR_WIPHY_TXQ_PARAMS,
+ NL80211_ATTR_WIPHY_FREQ,
+ NL80211_ATTR_WIPHY_CHANNEL_TYPE,
+
+ NL80211_ATTR_KEY_DEFAULT_MGMT,
+
+ NL80211_ATTR_MGMT_SUBTYPE,
+ NL80211_ATTR_IE,
+
+ NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
+
+ NL80211_ATTR_SCAN_FREQUENCIES,
+ NL80211_ATTR_SCAN_SSIDS,
+ NL80211_ATTR_GENERATION, /* replaces old SCAN_GENERATION */
+ NL80211_ATTR_BSS,
+
+ NL80211_ATTR_REG_INITIATOR,
+ NL80211_ATTR_REG_TYPE,
+
+ NL80211_ATTR_SUPPORTED_COMMANDS,
+
+ NL80211_ATTR_FRAME,
+ NL80211_ATTR_SSID,
+ NL80211_ATTR_AUTH_TYPE,
+ NL80211_ATTR_REASON_CODE,
+
+ NL80211_ATTR_KEY_TYPE,
+
+ NL80211_ATTR_MAX_SCAN_IE_LEN,
+ NL80211_ATTR_CIPHER_SUITES,
+
+ NL80211_ATTR_FREQ_BEFORE,
+ NL80211_ATTR_FREQ_AFTER,
+
+ NL80211_ATTR_FREQ_FIXED,
+
+
+ NL80211_ATTR_WIPHY_RETRY_SHORT,
+ NL80211_ATTR_WIPHY_RETRY_LONG,
+ NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
+ NL80211_ATTR_WIPHY_RTS_THRESHOLD,
+
+ NL80211_ATTR_TIMED_OUT,
+
+ NL80211_ATTR_USE_MFP,
+
+ NL80211_ATTR_STA_FLAGS2,
+
+ NL80211_ATTR_CONTROL_PORT,
+
+ NL80211_ATTR_TESTDATA,
+
+ NL80211_ATTR_PRIVACY,
+
+ NL80211_ATTR_DISCONNECTED_BY_AP,
+ NL80211_ATTR_STATUS_CODE,
+
+ NL80211_ATTR_CIPHER_SUITES_PAIRWISE,
+ NL80211_ATTR_CIPHER_SUITE_GROUP,
+ NL80211_ATTR_WPA_VERSIONS,
+ NL80211_ATTR_AKM_SUITES,
+
+ NL80211_ATTR_REQ_IE,
+ NL80211_ATTR_RESP_IE,
+
+ NL80211_ATTR_PREV_BSSID,
+
+ NL80211_ATTR_KEY,
+ NL80211_ATTR_KEYS,
+
+ NL80211_ATTR_PID,
+
+ NL80211_ATTR_4ADDR,
+
+ NL80211_ATTR_SURVEY_INFO,
+
+ NL80211_ATTR_PMKID,
+ NL80211_ATTR_MAX_NUM_PMKIDS,
+
+ NL80211_ATTR_DURATION,
+
+ NL80211_ATTR_COOKIE,
+
+ NL80211_ATTR_WIPHY_COVERAGE_CLASS,
+
+ NL80211_ATTR_TX_RATES,
+
+ NL80211_ATTR_FRAME_MATCH,
+
+ NL80211_ATTR_ACK,
+
+ NL80211_ATTR_PS_STATE,
+
+ NL80211_ATTR_CQM,
+
+ NL80211_ATTR_LOCAL_STATE_CHANGE,
+
+ NL80211_ATTR_AP_ISOLATE,
+
+ NL80211_ATTR_WIPHY_TX_POWER_SETTING,
+ NL80211_ATTR_WIPHY_TX_POWER_LEVEL,
+
+ NL80211_ATTR_TX_FRAME_TYPES,
+ NL80211_ATTR_RX_FRAME_TYPES,
+ NL80211_ATTR_FRAME_TYPE,
+
+ NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
+ NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT,
+
+ NL80211_ATTR_SUPPORT_IBSS_RSN,
+
+ NL80211_ATTR_WIPHY_ANTENNA_TX,
+ NL80211_ATTR_WIPHY_ANTENNA_RX,
+
+ NL80211_ATTR_MCAST_RATE,
+
+ NL80211_ATTR_OFFCHANNEL_TX_OK,
+
+ NL80211_ATTR_BSS_HT_OPMODE,
+
+ NL80211_ATTR_KEY_DEFAULT_TYPES,
+
+ NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
+
+ NL80211_ATTR_MESH_SETUP,
+
+ NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
+ NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
+
+ NL80211_ATTR_SUPPORT_MESH_AUTH,
+ NL80211_ATTR_STA_PLINK_STATE,
+
+ NL80211_ATTR_WOWLAN_TRIGGERS,
+ NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED,
+
+ NL80211_ATTR_SCHED_SCAN_INTERVAL,
+
+ NL80211_ATTR_INTERFACE_COMBINATIONS,
+ NL80211_ATTR_SOFTWARE_IFTYPES,
+
+ NL80211_ATTR_REKEY_DATA,
+
+ NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
+ NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
+
+ NL80211_ATTR_SCAN_SUPP_RATES,
+
+ NL80211_ATTR_HIDDEN_SSID,
+
+ NL80211_ATTR_IE_PROBE_RESP,
+ NL80211_ATTR_IE_ASSOC_RESP,
+
+ NL80211_ATTR_STA_WME,
+ NL80211_ATTR_SUPPORT_AP_UAPSD,
+
+ NL80211_ATTR_ROAM_SUPPORT,
+
+ NL80211_ATTR_SCHED_SCAN_MATCH,
+ NL80211_ATTR_MAX_MATCH_SETS,
+
+ NL80211_ATTR_PMKSA_CANDIDATE,
+
+ NL80211_ATTR_TX_NO_CCK_RATE,
+
+ NL80211_ATTR_TDLS_ACTION,
+ NL80211_ATTR_TDLS_DIALOG_TOKEN,
+ NL80211_ATTR_TDLS_OPERATION,
+ NL80211_ATTR_TDLS_SUPPORT,
+ NL80211_ATTR_TDLS_EXTERNAL_SETUP,
+
+ NL80211_ATTR_DEVICE_AP_SME,
+
+ NL80211_ATTR_DONT_WAIT_FOR_ACK,
+
+ NL80211_ATTR_FEATURE_FLAGS,
+
+ NL80211_ATTR_PROBE_RESP_OFFLOAD,
+
+ NL80211_ATTR_PROBE_RESP,
+
+ NL80211_ATTR_DFS_REGION,
+
+ NL80211_ATTR_DISABLE_HT,
+ NL80211_ATTR_HT_CAPABILITY_MASK,
+
+ NL80211_ATTR_NOACK_MAP,
+
+ NL80211_ATTR_INACTIVITY_TIMEOUT,
+
+ NL80211_ATTR_RX_SIGNAL_DBM,
+
+ NL80211_ATTR_BG_SCAN_PERIOD,
+
+ NL80211_ATTR_WDEV,
+
+ NL80211_ATTR_USER_REG_HINT_TYPE,
+
+ NL80211_ATTR_CONN_FAILED_REASON,
+
+ NL80211_ATTR_AUTH_DATA,
+
+ NL80211_ATTR_VHT_CAPABILITY,
+
+ NL80211_ATTR_SCAN_FLAGS,
+
+ NL80211_ATTR_CHANNEL_WIDTH,
+ NL80211_ATTR_CENTER_FREQ1,
+ NL80211_ATTR_CENTER_FREQ2,
+
+ NL80211_ATTR_P2P_CTWINDOW,
+ NL80211_ATTR_P2P_OPPPS,
+
+ NL80211_ATTR_LOCAL_MESH_POWER_MODE,
+
+ NL80211_ATTR_ACL_POLICY,
+
+ NL80211_ATTR_MAC_ADDRS,
+
+ NL80211_ATTR_MAC_ACL_MAX,
+
+ NL80211_ATTR_RADAR_EVENT,
+
+ NL80211_ATTR_EXT_CAPA,
+ NL80211_ATTR_EXT_CAPA_MASK,
+
+ NL80211_ATTR_STA_CAPABILITY,
+ NL80211_ATTR_STA_EXT_CAPABILITY,
+
+ NL80211_ATTR_PROTOCOL_FEATURES,
+ NL80211_ATTR_SPLIT_WIPHY_DUMP,
+
+ NL80211_ATTR_DISABLE_VHT,
+ NL80211_ATTR_VHT_CAPABILITY_MASK,
+
+ NL80211_ATTR_MDID,
+ NL80211_ATTR_IE_RIC,
+
+ NL80211_ATTR_CRIT_PROT_ID,
+ NL80211_ATTR_MAX_CRIT_PROT_DURATION,
+
+ NL80211_ATTR_PEER_AID,
+
+ NL80211_ATTR_COALESCE_RULE,
+
+ NL80211_ATTR_CH_SWITCH_COUNT,
+ NL80211_ATTR_CH_SWITCH_BLOCK_TX,
+ NL80211_ATTR_CSA_IES,
+ NL80211_ATTR_CSA_C_OFF_BEACON,
+ NL80211_ATTR_CSA_C_OFF_PRESP,
+
+ NL80211_ATTR_RXMGMT_FLAGS,
+
+ NL80211_ATTR_STA_SUPPORTED_CHANNELS,
+
+ NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES,
+
+ NL80211_ATTR_HANDLE_DFS,
+
+ NL80211_ATTR_SUPPORT_5_MHZ,
+ NL80211_ATTR_SUPPORT_10_MHZ,
+
+ NL80211_ATTR_OPMODE_NOTIF,
+
+ NL80211_ATTR_VENDOR_ID,
+ NL80211_ATTR_VENDOR_SUBCMD,
+ NL80211_ATTR_VENDOR_DATA,
+ NL80211_ATTR_VENDOR_EVENTS,
+
+ NL80211_ATTR_QOS_MAP,
+
+ NL80211_ATTR_MAC_HINT,
+ NL80211_ATTR_WIPHY_FREQ_HINT,
+
+ NL80211_ATTR_MAX_AP_ASSOC_STA,
+
+ NL80211_ATTR_TDLS_PEER_CAPABILITY,
+
+ NL80211_ATTR_SOCKET_OWNER,
+
+ NL80211_ATTR_CSA_C_OFFSETS_TX,
+ NL80211_ATTR_MAX_CSA_COUNTERS,
+
+ NL80211_ATTR_TDLS_INITIATOR,
+
+ NL80211_ATTR_USE_RRM,
+
+ NL80211_ATTR_WIPHY_DYN_ACK,
+
+ NL80211_ATTR_TSID,
+ NL80211_ATTR_USER_PRIO,
+ NL80211_ATTR_ADMITTED_TIME,
+
+ NL80211_ATTR_SMPS_MODE,
+
+ NL80211_ATTR_OPER_CLASS,
+
+ NL80211_ATTR_MAC_MASK,
+
+ NL80211_ATTR_WIPHY_SELF_MANAGED_REG,
+
+ NL80211_ATTR_EXT_FEATURES,
+
+ NL80211_ATTR_SURVEY_RADIO_STATS,
+
+ NL80211_ATTR_NETNS_FD,
+
+ NL80211_ATTR_SCHED_SCAN_DELAY,
+
+ NL80211_ATTR_REG_INDOOR,
+
+ NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS,
+ NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL,
+ NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS,
+ NL80211_ATTR_SCHED_SCAN_PLANS,
+
+ NL80211_ATTR_PBSS,
+
+ NL80211_ATTR_BSS_SELECT,
+
+ NL80211_ATTR_STA_SUPPORT_P2P_PS,
+
+ NL80211_ATTR_PAD,
+
+ NL80211_ATTR_IFTYPE_EXT_CAPA,
+
+ NL80211_ATTR_MU_MIMO_GROUP_DATA,
+ NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR,
+
+ NL80211_ATTR_SCAN_START_TIME_TSF,
+ NL80211_ATTR_SCAN_START_TIME_TSF_BSSID,
+ NL80211_ATTR_MEASUREMENT_DURATION,
+ NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY,
+
+ NL80211_ATTR_MESH_PEER_AID,
+
+ NL80211_ATTR_NAN_MASTER_PREF,
+ NL80211_ATTR_BANDS,
+ NL80211_ATTR_NAN_FUNC,
+ NL80211_ATTR_NAN_MATCH,
+
+ NL80211_ATTR_FILS_KEK,
+ NL80211_ATTR_FILS_NONCES,
+
+ NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED,
+
+ NL80211_ATTR_BSSID,
+
+ NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI,
+ NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST,
+
+ NL80211_ATTR_TIMEOUT_REASON,
+
+ NL80211_ATTR_FILS_ERP_USERNAME,
+ NL80211_ATTR_FILS_ERP_REALM,
+ NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM,
+ NL80211_ATTR_FILS_ERP_RRK,
+ NL80211_ATTR_FILS_CACHE_ID,
+
+ NL80211_ATTR_PMK,
+
+ NL80211_ATTR_SCHED_SCAN_MULTI,
+ NL80211_ATTR_SCHED_SCAN_MAX_REQS,
+
+ NL80211_ATTR_WANT_1X_4WAY_HS,
+ NL80211_ATTR_PMKR0_NAME,
+ NL80211_ATTR_PORT_AUTHORIZED,
+
+ NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+ NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
+
+ NL80211_ATTR_NSS,
+ NL80211_ATTR_ACK_SIGNAL,
+
+ NL80211_ATTR_CONTROL_PORT_OVER_NL80211,
+
+ NL80211_ATTR_TXQ_STATS,
+ NL80211_ATTR_TXQ_LIMIT,
+ NL80211_ATTR_TXQ_MEMORY_LIMIT,
+ NL80211_ATTR_TXQ_QUANTUM,
+
+ NL80211_ATTR_HE_CAPABILITY,
+
+ NL80211_ATTR_FTM_RESPONDER,
+
+ NL80211_ATTR_FTM_RESPONDER_STATS,
+
+ NL80211_ATTR_TIMEOUT,
+
+ NL80211_ATTR_PEER_MEASUREMENTS,
+
+ NL80211_ATTR_AIRTIME_WEIGHT,
+ NL80211_ATTR_STA_TX_POWER_SETTING,
+ NL80211_ATTR_STA_TX_POWER,
+
+ NL80211_ATTR_SAE_PASSWORD,
+
+ NL80211_ATTR_TWT_RESPONDER,
+
+ NL80211_ATTR_HE_OBSS_PD,
+
+ NL80211_ATTR_WIPHY_EDMG_CHANNELS,
+ NL80211_ATTR_WIPHY_EDMG_BW_CONFIG,
+
+ /* add attributes here, update the policy in nl80211.c */
+
+ __NL80211_ATTR_AFTER_LAST,
+ NUM_NL80211_ATTR = __NL80211_ATTR_AFTER_LAST,
+ NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1
+};
+
+/* source-level API compatibility */
+#define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION
+#define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG
+#define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER
+#define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA
+
+/*
+ * Allow user space programs to use #ifdef on new attributes by defining them
+ * here
+ */
+#define NL80211_CMD_CONNECT NL80211_CMD_CONNECT
+#define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY
+#define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES
+#define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS
+#define NL80211_ATTR_WIPHY_FREQ NL80211_ATTR_WIPHY_FREQ
+#define NL80211_ATTR_WIPHY_CHANNEL_TYPE NL80211_ATTR_WIPHY_CHANNEL_TYPE
+#define NL80211_ATTR_MGMT_SUBTYPE NL80211_ATTR_MGMT_SUBTYPE
+#define NL80211_ATTR_IE NL80211_ATTR_IE
+#define NL80211_ATTR_REG_INITIATOR NL80211_ATTR_REG_INITIATOR
+#define NL80211_ATTR_REG_TYPE NL80211_ATTR_REG_TYPE
+#define NL80211_ATTR_FRAME NL80211_ATTR_FRAME
+#define NL80211_ATTR_SSID NL80211_ATTR_SSID
+#define NL80211_ATTR_AUTH_TYPE NL80211_ATTR_AUTH_TYPE
+#define NL80211_ATTR_REASON_CODE NL80211_ATTR_REASON_CODE
+#define NL80211_ATTR_CIPHER_SUITES_PAIRWISE NL80211_ATTR_CIPHER_SUITES_PAIRWISE
+#define NL80211_ATTR_CIPHER_SUITE_GROUP NL80211_ATTR_CIPHER_SUITE_GROUP
+#define NL80211_ATTR_WPA_VERSIONS NL80211_ATTR_WPA_VERSIONS
+#define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES
+#define NL80211_ATTR_KEY NL80211_ATTR_KEY
+#define NL80211_ATTR_KEYS NL80211_ATTR_KEYS
+#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
+
+#define NL80211_WIPHY_NAME_MAXLEN 64
+
+#define NL80211_MAX_SUPP_RATES 32
+#define NL80211_MAX_SUPP_HT_RATES 77
+#define NL80211_MAX_SUPP_REG_RULES 128
+#define NL80211_TKIP_DATA_OFFSET_ENCR_KEY 0
+#define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16
+#define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24
+#define NL80211_HT_CAPABILITY_LEN 26
+#define NL80211_VHT_CAPABILITY_LEN 12
+#define NL80211_HE_MIN_CAPABILITY_LEN 16
+#define NL80211_HE_MAX_CAPABILITY_LEN 54
+#define NL80211_MAX_NR_CIPHER_SUITES 5
+#define NL80211_MAX_NR_AKM_SUITES 2
+
+#define NL80211_MIN_REMAIN_ON_CHANNEL_TIME 10
+
+/* default RSSI threshold for scan results if none specified. */
+#define NL80211_SCAN_RSSI_THOLD_OFF -300
+
+#define NL80211_CQM_TXE_MAX_INTVL 1800
+
+/**
+ * enum nl80211_iftype - (virtual) interface types
+ *
+ * @NL80211_IFTYPE_UNSPECIFIED: unspecified type, driver decides
+ * @NL80211_IFTYPE_ADHOC: independent BSS member
+ * @NL80211_IFTYPE_STATION: managed BSS member
+ * @NL80211_IFTYPE_AP: access point
+ * @NL80211_IFTYPE_AP_VLAN: VLAN interface for access points; VLAN interfaces
+ * are a bit special in that they must always be tied to a pre-existing
+ * AP type interface.
+ * @NL80211_IFTYPE_WDS: wireless distribution interface
+ * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames
+ * @NL80211_IFTYPE_MESH_POINT: mesh point
+ * @NL80211_IFTYPE_P2P_CLIENT: P2P client
+ * @NL80211_IFTYPE_P2P_GO: P2P group owner
+ * @NL80211_IFTYPE_P2P_DEVICE: P2P device interface type, this is not a netdev
+ * and therefore can't be created in the normal ways, use the
+ * %NL80211_CMD_START_P2P_DEVICE and %NL80211_CMD_STOP_P2P_DEVICE
+ * commands to create and destroy one
+ * @NL80211_IF_TYPE_OCB: Outside Context of a BSS
+ * This mode corresponds to the MIB variable dot11OCBActivated=true
+ * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev)
+ * @NL80211_IFTYPE_MAX: highest interface type number currently defined
+ * @NUM_NL80211_IFTYPES: number of defined interface types
+ *
+ * These values are used with the %NL80211_ATTR_IFTYPE
+ * to set the type of an interface.
+ *
+ */
+enum nl80211_iftype {
+ NL80211_IFTYPE_UNSPECIFIED,
+ NL80211_IFTYPE_ADHOC,
+ NL80211_IFTYPE_STATION,
+ NL80211_IFTYPE_AP,
+ NL80211_IFTYPE_AP_VLAN,
+ NL80211_IFTYPE_WDS,
+ NL80211_IFTYPE_MONITOR,
+ NL80211_IFTYPE_MESH_POINT,
+ NL80211_IFTYPE_P2P_CLIENT,
+ NL80211_IFTYPE_P2P_GO,
+ NL80211_IFTYPE_P2P_DEVICE,
+ NL80211_IFTYPE_OCB,
+ NL80211_IFTYPE_NAN,
+
+ /* keep last */
+ NUM_NL80211_IFTYPES,
+ NL80211_IFTYPE_MAX = NUM_NL80211_IFTYPES - 1
+};
+
+/**
+ * enum nl80211_sta_flags - station flags
+ *
+ * Station flags. When a station is added to an AP interface, it is
+ * assumed to be already associated (and hence authenticated.)
+ *
+ * @__NL80211_STA_FLAG_INVALID: attribute number 0 is reserved
+ * @NL80211_STA_FLAG_AUTHORIZED: station is authorized (802.1X)
+ * @NL80211_STA_FLAG_SHORT_PREAMBLE: station is capable of receiving frames
+ * with short barker preamble
+ * @NL80211_STA_FLAG_WME: station is WME/QoS capable
+ * @NL80211_STA_FLAG_MFP: station uses management frame protection
+ * @NL80211_STA_FLAG_AUTHENTICATED: station is authenticated
+ * @NL80211_STA_FLAG_TDLS_PEER: station is a TDLS peer -- this flag should
+ * only be used in managed mode (even in the flags mask). Note that the
+ * flag can't be changed, it is only valid while adding a station, and
+ * attempts to change it will silently be ignored (rather than rejected
+ * as errors.)
+ * @NL80211_STA_FLAG_ASSOCIATED: station is associated; used with drivers
+ * that support %NL80211_FEATURE_FULL_AP_CLIENT_STATE to transition a
+ * previously added station into associated state
+ * @NL80211_STA_FLAG_MAX: highest station flag number currently defined
+ * @__NL80211_STA_FLAG_AFTER_LAST: internal use
+ */
+enum nl80211_sta_flags {
+ __NL80211_STA_FLAG_INVALID,
+ NL80211_STA_FLAG_AUTHORIZED,
+ NL80211_STA_FLAG_SHORT_PREAMBLE,
+ NL80211_STA_FLAG_WME,
+ NL80211_STA_FLAG_MFP,
+ NL80211_STA_FLAG_AUTHENTICATED,
+ NL80211_STA_FLAG_TDLS_PEER,
+ NL80211_STA_FLAG_ASSOCIATED,
+
+ /* keep last */
+ __NL80211_STA_FLAG_AFTER_LAST,
+ NL80211_STA_FLAG_MAX = __NL80211_STA_FLAG_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_sta_p2p_ps_status - station support of P2P PS
+ *
+ * @NL80211_P2P_PS_UNSUPPORTED: station doesn't support P2P PS mechanism
+ * @@NL80211_P2P_PS_SUPPORTED: station supports P2P PS mechanism
+ * @NUM_NL80211_P2P_PS_STATUS: number of values
+ */
+enum nl80211_sta_p2p_ps_status {
+ NL80211_P2P_PS_UNSUPPORTED = 0,
+ NL80211_P2P_PS_SUPPORTED,
+
+ NUM_NL80211_P2P_PS_STATUS,
+};
+
+#define NL80211_STA_FLAG_MAX_OLD_API NL80211_STA_FLAG_TDLS_PEER
+
+/**
+ * struct nl80211_sta_flag_update - station flags mask/set
+ * @mask: mask of station flags to set
+ * @set: which values to set them to
+ *
+ * Both mask and set contain bits as per &enum nl80211_sta_flags.
+ */
+struct nl80211_sta_flag_update {
+ __u32 mask;
+ __u32 set;
+} __attribute__((packed));
+
+/**
+ * enum nl80211_he_gi - HE guard interval
+ * @NL80211_RATE_INFO_HE_GI_0_8: 0.8 usec
+ * @NL80211_RATE_INFO_HE_GI_1_6: 1.6 usec
+ * @NL80211_RATE_INFO_HE_GI_3_2: 3.2 usec
+ */
+enum nl80211_he_gi {
+ NL80211_RATE_INFO_HE_GI_0_8,
+ NL80211_RATE_INFO_HE_GI_1_6,
+ NL80211_RATE_INFO_HE_GI_3_2,
+};
+
+/**
+ * enum nl80211_he_ru_alloc - HE RU allocation values
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_26: 26-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_52: 52-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_106: 106-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_242: 242-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_484: 484-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_996: 996-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_2x996: 2x996-tone RU allocation
+ */
+enum nl80211_he_ru_alloc {
+ NL80211_RATE_INFO_HE_RU_ALLOC_26,
+ NL80211_RATE_INFO_HE_RU_ALLOC_52,
+ NL80211_RATE_INFO_HE_RU_ALLOC_106,
+ NL80211_RATE_INFO_HE_RU_ALLOC_242,
+ NL80211_RATE_INFO_HE_RU_ALLOC_484,
+ NL80211_RATE_INFO_HE_RU_ALLOC_996,
+ NL80211_RATE_INFO_HE_RU_ALLOC_2x996,
+};
+
+/**
+ * enum nl80211_rate_info - bitrate information
+ *
+ * These attribute types are used with %NL80211_STA_INFO_TXRATE
+ * when getting information about the bitrate of a station.
+ * There are 2 attributes for bitrate, a legacy one that represents
+ * a 16-bit value, and new one that represents a 32-bit value.
+ * If the rate value fits into 16 bit, both attributes are reported
+ * with the same value. If the rate is too high to fit into 16 bits
+ * (>6.5535Gbps) only 32-bit attribute is included.
+ * User space tools encouraged to use the 32-bit attribute and fall
+ * back to the 16-bit one for compatibility with older kernels.
+ *
+ * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved
+ * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s)
+ * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8)
+ * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 MHz dualchannel bitrate
+ * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval
+ * @NL80211_RATE_INFO_BITRATE32: total bitrate (u32, 100kbit/s)
+ * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined
+ * @NL80211_RATE_INFO_VHT_MCS: MCS index for VHT (u8)
+ * @NL80211_RATE_INFO_VHT_NSS: number of streams in VHT (u8)
+ * @NL80211_RATE_INFO_80_MHZ_WIDTH: 80 MHz VHT rate
+ * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: unused - 80+80 is treated the
+ * same as 160 for purposes of the bitrates
+ * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate
+ * @NL80211_RATE_INFO_10_MHZ_WIDTH: 10 MHz width - note that this is
+ * a legacy rate and will be reported as the actual bitrate, i.e.
+ * half the base (20 MHz) rate
+ * @NL80211_RATE_INFO_5_MHZ_WIDTH: 5 MHz width - note that this is
+ * a legacy rate and will be reported as the actual bitrate, i.e.
+ * a quarter of the base (20 MHz) rate
+ * @NL80211_RATE_INFO_HE_MCS: HE MCS index (u8, 0-11)
+ * @NL80211_RATE_INFO_HE_NSS: HE NSS value (u8, 1-8)
+ * @NL80211_RATE_INFO_HE_GI: HE guard interval identifier
+ * (u8, see &enum nl80211_he_gi)
+ * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1)
+ * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then
+ * non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc)
+ * @__NL80211_RATE_INFO_AFTER_LAST: internal use
+ */
+enum nl80211_rate_info {
+ __NL80211_RATE_INFO_INVALID,
+ NL80211_RATE_INFO_BITRATE,
+ NL80211_RATE_INFO_MCS,
+ NL80211_RATE_INFO_40_MHZ_WIDTH,
+ NL80211_RATE_INFO_SHORT_GI,
+ NL80211_RATE_INFO_BITRATE32,
+ NL80211_RATE_INFO_VHT_MCS,
+ NL80211_RATE_INFO_VHT_NSS,
+ NL80211_RATE_INFO_80_MHZ_WIDTH,
+ NL80211_RATE_INFO_80P80_MHZ_WIDTH,
+ NL80211_RATE_INFO_160_MHZ_WIDTH,
+ NL80211_RATE_INFO_10_MHZ_WIDTH,
+ NL80211_RATE_INFO_5_MHZ_WIDTH,
+ NL80211_RATE_INFO_HE_MCS,
+ NL80211_RATE_INFO_HE_NSS,
+ NL80211_RATE_INFO_HE_GI,
+ NL80211_RATE_INFO_HE_DCM,
+ NL80211_RATE_INFO_HE_RU_ALLOC,
+
+ /* keep last */
+ __NL80211_RATE_INFO_AFTER_LAST,
+ NL80211_RATE_INFO_MAX = __NL80211_RATE_INFO_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_sta_bss_param - BSS information collected by STA
+ *
+ * These attribute types are used with %NL80211_STA_INFO_BSS_PARAM
+ * when getting information about the bitrate of a station.
+ *
+ * @__NL80211_STA_BSS_PARAM_INVALID: attribute number 0 is reserved
+ * @NL80211_STA_BSS_PARAM_CTS_PROT: whether CTS protection is enabled (flag)
+ * @NL80211_STA_BSS_PARAM_SHORT_PREAMBLE: whether short preamble is enabled
+ * (flag)
+ * @NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME: whether short slot time is enabled
+ * (flag)
+ * @NL80211_STA_BSS_PARAM_DTIM_PERIOD: DTIM period for beaconing (u8)
+ * @NL80211_STA_BSS_PARAM_BEACON_INTERVAL: Beacon interval (u16)
+ * @NL80211_STA_BSS_PARAM_MAX: highest sta_bss_param number currently defined
+ * @__NL80211_STA_BSS_PARAM_AFTER_LAST: internal use
+ */
+enum nl80211_sta_bss_param {
+ __NL80211_STA_BSS_PARAM_INVALID,
+ NL80211_STA_BSS_PARAM_CTS_PROT,
+ NL80211_STA_BSS_PARAM_SHORT_PREAMBLE,
+ NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME,
+ NL80211_STA_BSS_PARAM_DTIM_PERIOD,
+ NL80211_STA_BSS_PARAM_BEACON_INTERVAL,
+
+ /* keep last */
+ __NL80211_STA_BSS_PARAM_AFTER_LAST,
+ NL80211_STA_BSS_PARAM_MAX = __NL80211_STA_BSS_PARAM_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_sta_info - station information
+ *
+ * These attribute types are used with %NL80211_ATTR_STA_INFO
+ * when getting information about a station.
+ *
+ * @__NL80211_STA_INFO_INVALID: attribute number 0 is reserved
+ * @NL80211_STA_INFO_INACTIVE_TIME: time since last activity (u32, msecs)
+ * @NL80211_STA_INFO_RX_BYTES: total received bytes (MPDU length)
+ * (u32, from this station)
+ * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (MPDU length)
+ * (u32, to this station)
+ * @NL80211_STA_INFO_RX_BYTES64: total received bytes (MPDU length)
+ * (u64, from this station)
+ * @NL80211_STA_INFO_TX_BYTES64: total transmitted bytes (MPDU length)
+ * (u64, to this station)
+ * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
+ * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
+ * containing info as possible, see &enum nl80211_rate_info
+ * @NL80211_STA_INFO_RX_PACKETS: total received packet (MSDUs and MMPDUs)
+ * (u32, from this station)
+ * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (MSDUs and MMPDUs)
+ * (u32, to this station)
+ * @NL80211_STA_INFO_TX_RETRIES: total retries (MPDUs) (u32, to this station)
+ * @NL80211_STA_INFO_TX_FAILED: total failed packets (MPDUs)
+ * (u32, to this station)
+ * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm)
+ * @NL80211_STA_INFO_LLID: the station's mesh LLID
+ * @NL80211_STA_INFO_PLID: the station's mesh PLID
+ * @NL80211_STA_INFO_PLINK_STATE: peer link state for the station
+ * (see %enum nl80211_plink_state)
+ * @NL80211_STA_INFO_RX_BITRATE: last unicast data frame rx rate, nested
+ * attribute, like NL80211_STA_INFO_TX_BITRATE.
+ * @NL80211_STA_INFO_BSS_PARAM: current station's view of BSS, nested attribute
+ * containing info as possible, see &enum nl80211_sta_bss_param
+ * @NL80211_STA_INFO_CONNECTED_TIME: time since the station is last connected
+ * @NL80211_STA_INFO_STA_FLAGS: Contains a struct nl80211_sta_flag_update.
+ * @NL80211_STA_INFO_BEACON_LOSS: count of times beacon loss was detected (u32)
+ * @NL80211_STA_INFO_T_OFFSET: timing offset with respect to this STA (s64)
+ * @NL80211_STA_INFO_LOCAL_PM: local mesh STA link-specific power mode
+ * @NL80211_STA_INFO_PEER_PM: peer mesh STA link-specific power mode
+ * @NL80211_STA_INFO_NONPEER_PM: neighbor mesh STA power save mode towards
+ * non-peer STA
+ * @NL80211_STA_INFO_CHAIN_SIGNAL: per-chain signal strength of last PPDU
+ * Contains a nested array of signal strength attributes (u8, dBm)
+ * @NL80211_STA_INFO_CHAIN_SIGNAL_AVG: per-chain signal strength average
+ * Same format as NL80211_STA_INFO_CHAIN_SIGNAL.
+ * @NL80211_STA_EXPECTED_THROUGHPUT: expected throughput considering also the
+ * 802.11 header (u32, kbps)
+ * @NL80211_STA_INFO_RX_DROP_MISC: RX packets dropped for unspecified reasons
+ * (u64)
+ * @NL80211_STA_INFO_BEACON_RX: number of beacons received from this peer (u64)
+ * @NL80211_STA_INFO_BEACON_SIGNAL_AVG: signal strength average
+ * for beacons only (u8, dBm)
+ * @NL80211_STA_INFO_TID_STATS: per-TID statistics (see &enum nl80211_tid_stats)
+ * This is a nested attribute where each the inner attribute number is the
+ * TID+1 and the special TID 16 (i.e. value 17) is used for non-QoS frames;
+ * each one of those is again nested with &enum nl80211_tid_stats
+ * attributes carrying the actual values.
+ * @NL80211_STA_INFO_RX_DURATION: aggregate PPDU duration for all frames
+ * received from the station (u64, usec)
+ * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm)
+ * @NL80211_STA_INFO_ACK_SIGNAL_AVG: avg signal strength of ACK frames (s8, dBm)
+ * @NL80211_STA_INFO_RX_MPDUS: total number of received packets (MPDUs)
+ * (u32, from this station)
+ * @NL80211_STA_INFO_FCS_ERROR_COUNT: total number of packets (MPDUs) received
+ * with an FCS error (u32, from this station). This count may not include
+ * some packets with an FCS error due to TA corruption. Hence this counter
+ * might not be fully accurate.
+ * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a
+ * mesh gate (u8, 0 or 1)
+ * @NL80211_STA_INFO_TX_DURATION: aggregate PPDU duration for all frames
+ * sent to the station (u64, usec)
+ * @NL80211_STA_INFO_AIRTIME_WEIGHT: current airtime weight for station (u16)
+ * @NL80211_STA_INFO_AIRTIME_LINK_METRIC: airtime link metric for mesh station
+ * @NL80211_STA_INFO_ASSOC_AT_BOOTTIME: Timestamp (CLOCK_BOOTTIME, nanoseconds)
+ * of STA's association
+ * @__NL80211_STA_INFO_AFTER_LAST: internal
+ * @NL80211_STA_INFO_MAX: highest possible station info attribute
+ */
+enum nl80211_sta_info {
+ __NL80211_STA_INFO_INVALID,
+ NL80211_STA_INFO_INACTIVE_TIME,
+ NL80211_STA_INFO_RX_BYTES,
+ NL80211_STA_INFO_TX_BYTES,
+ NL80211_STA_INFO_LLID,
+ NL80211_STA_INFO_PLID,
+ NL80211_STA_INFO_PLINK_STATE,
+ NL80211_STA_INFO_SIGNAL,
+ NL80211_STA_INFO_TX_BITRATE,
+ NL80211_STA_INFO_RX_PACKETS,
+ NL80211_STA_INFO_TX_PACKETS,
+ NL80211_STA_INFO_TX_RETRIES,
+ NL80211_STA_INFO_TX_FAILED,
+ NL80211_STA_INFO_SIGNAL_AVG,
+ NL80211_STA_INFO_RX_BITRATE,
+ NL80211_STA_INFO_BSS_PARAM,
+ NL80211_STA_INFO_CONNECTED_TIME,
+ NL80211_STA_INFO_STA_FLAGS,
+ NL80211_STA_INFO_BEACON_LOSS,
+ NL80211_STA_INFO_T_OFFSET,
+ NL80211_STA_INFO_LOCAL_PM,
+ NL80211_STA_INFO_PEER_PM,
+ NL80211_STA_INFO_NONPEER_PM,
+ NL80211_STA_INFO_RX_BYTES64,
+ NL80211_STA_INFO_TX_BYTES64,
+ NL80211_STA_INFO_CHAIN_SIGNAL,
+ NL80211_STA_INFO_CHAIN_SIGNAL_AVG,
+ NL80211_STA_INFO_EXPECTED_THROUGHPUT,
+ NL80211_STA_INFO_RX_DROP_MISC,
+ NL80211_STA_INFO_BEACON_RX,
+ NL80211_STA_INFO_BEACON_SIGNAL_AVG,
+ NL80211_STA_INFO_TID_STATS,
+ NL80211_STA_INFO_RX_DURATION,
+ NL80211_STA_INFO_PAD,
+ NL80211_STA_INFO_ACK_SIGNAL,
+ NL80211_STA_INFO_ACK_SIGNAL_AVG,
+ NL80211_STA_INFO_RX_MPDUS,
+ NL80211_STA_INFO_FCS_ERROR_COUNT,
+ NL80211_STA_INFO_CONNECTED_TO_GATE,
+ NL80211_STA_INFO_TX_DURATION,
+ NL80211_STA_INFO_AIRTIME_WEIGHT,
+ NL80211_STA_INFO_AIRTIME_LINK_METRIC,
+ NL80211_STA_INFO_ASSOC_AT_BOOTTIME,
+
+ /* keep last */
+ __NL80211_STA_INFO_AFTER_LAST,
+ NL80211_STA_INFO_MAX = __NL80211_STA_INFO_AFTER_LAST - 1
+};
+
+/* we renamed this - stay compatible */
+#define NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG NL80211_STA_INFO_ACK_SIGNAL_AVG
+
+
+/**
+ * enum nl80211_tid_stats - per TID statistics attributes
+ * @__NL80211_TID_STATS_INVALID: attribute number 0 is reserved
+ * @NL80211_TID_STATS_RX_MSDU: number of MSDUs received (u64)
+ * @NL80211_TID_STATS_TX_MSDU: number of MSDUs transmitted (or
+ * attempted to transmit; u64)
+ * @NL80211_TID_STATS_TX_MSDU_RETRIES: number of retries for
+ * transmitted MSDUs (not counting the first attempt; u64)
+ * @NL80211_TID_STATS_TX_MSDU_FAILED: number of failed transmitted
+ * MSDUs (u64)
+ * @NL80211_TID_STATS_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_TID_STATS_TXQ_STATS: TXQ stats (nested attribute)
+ * @NUM_NL80211_TID_STATS: number of attributes here
+ * @NL80211_TID_STATS_MAX: highest numbered attribute here
+ */
+enum nl80211_tid_stats {
+ __NL80211_TID_STATS_INVALID,
+ NL80211_TID_STATS_RX_MSDU,
+ NL80211_TID_STATS_TX_MSDU,
+ NL80211_TID_STATS_TX_MSDU_RETRIES,
+ NL80211_TID_STATS_TX_MSDU_FAILED,
+ NL80211_TID_STATS_PAD,
+ NL80211_TID_STATS_TXQ_STATS,
+
+ /* keep last */
+ NUM_NL80211_TID_STATS,
+ NL80211_TID_STATS_MAX = NUM_NL80211_TID_STATS - 1
+};
+
+/**
+ * enum nl80211_txq_stats - per TXQ statistics attributes
+ * @__NL80211_TXQ_STATS_INVALID: attribute number 0 is reserved
+ * @NUM_NL80211_TXQ_STATS: number of attributes here
+ * @NL80211_TXQ_STATS_BACKLOG_BYTES: number of bytes currently backlogged
+ * @NL80211_TXQ_STATS_BACKLOG_PACKETS: number of packets currently
+ * backlogged
+ * @NL80211_TXQ_STATS_FLOWS: total number of new flows seen
+ * @NL80211_TXQ_STATS_DROPS: total number of packet drops
+ * @NL80211_TXQ_STATS_ECN_MARKS: total number of packet ECN marks
+ * @NL80211_TXQ_STATS_OVERLIMIT: number of drops due to queue space overflow
+ * @NL80211_TXQ_STATS_OVERMEMORY: number of drops due to memory limit overflow
+ * (only for per-phy stats)
+ * @NL80211_TXQ_STATS_COLLISIONS: number of hash collisions
+ * @NL80211_TXQ_STATS_TX_BYTES: total number of bytes dequeued from TXQ
+ * @NL80211_TXQ_STATS_TX_PACKETS: total number of packets dequeued from TXQ
+ * @NL80211_TXQ_STATS_MAX_FLOWS: number of flow buckets for PHY
+ * @NL80211_TXQ_STATS_MAX: highest numbered attribute here
+ */
+enum nl80211_txq_stats {
+ __NL80211_TXQ_STATS_INVALID,
+ NL80211_TXQ_STATS_BACKLOG_BYTES,
+ NL80211_TXQ_STATS_BACKLOG_PACKETS,
+ NL80211_TXQ_STATS_FLOWS,
+ NL80211_TXQ_STATS_DROPS,
+ NL80211_TXQ_STATS_ECN_MARKS,
+ NL80211_TXQ_STATS_OVERLIMIT,
+ NL80211_TXQ_STATS_OVERMEMORY,
+ NL80211_TXQ_STATS_COLLISIONS,
+ NL80211_TXQ_STATS_TX_BYTES,
+ NL80211_TXQ_STATS_TX_PACKETS,
+ NL80211_TXQ_STATS_MAX_FLOWS,
+
+ /* keep last */
+ NUM_NL80211_TXQ_STATS,
+ NL80211_TXQ_STATS_MAX = NUM_NL80211_TXQ_STATS - 1
+};
+
+/**
+ * enum nl80211_mpath_flags - nl80211 mesh path flags
+ *
+ * @NL80211_MPATH_FLAG_ACTIVE: the mesh path is active
+ * @NL80211_MPATH_FLAG_RESOLVING: the mesh path discovery process is running
+ * @NL80211_MPATH_FLAG_SN_VALID: the mesh path contains a valid SN
+ * @NL80211_MPATH_FLAG_FIXED: the mesh path has been manually set
+ * @NL80211_MPATH_FLAG_RESOLVED: the mesh path discovery process succeeded
+ */
+enum nl80211_mpath_flags {
+ NL80211_MPATH_FLAG_ACTIVE = 1<<0,
+ NL80211_MPATH_FLAG_RESOLVING = 1<<1,
+ NL80211_MPATH_FLAG_SN_VALID = 1<<2,
+ NL80211_MPATH_FLAG_FIXED = 1<<3,
+ NL80211_MPATH_FLAG_RESOLVED = 1<<4,
+};
+
+/**
+ * enum nl80211_mpath_info - mesh path information
+ *
+ * These attribute types are used with %NL80211_ATTR_MPATH_INFO when getting
+ * information about a mesh path.
+ *
+ * @__NL80211_MPATH_INFO_INVALID: attribute number 0 is reserved
+ * @NL80211_MPATH_INFO_FRAME_QLEN: number of queued frames for this destination
+ * @NL80211_MPATH_INFO_SN: destination sequence number
+ * @NL80211_MPATH_INFO_METRIC: metric (cost) of this mesh path
+ * @NL80211_MPATH_INFO_EXPTIME: expiration time for the path, in msec from now
+ * @NL80211_MPATH_INFO_FLAGS: mesh path flags, enumerated in
+ * &enum nl80211_mpath_flags;
+ * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec
+ * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries
+ * @NL80211_MPATH_INFO_HOP_COUNT: hop count to destination
+ * @NL80211_MPATH_INFO_PATH_CHANGE: total number of path changes to destination
+ * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number
+ * currently defined
+ * @__NL80211_MPATH_INFO_AFTER_LAST: internal use
+ */
+enum nl80211_mpath_info {
+ __NL80211_MPATH_INFO_INVALID,
+ NL80211_MPATH_INFO_FRAME_QLEN,
+ NL80211_MPATH_INFO_SN,
+ NL80211_MPATH_INFO_METRIC,
+ NL80211_MPATH_INFO_EXPTIME,
+ NL80211_MPATH_INFO_FLAGS,
+ NL80211_MPATH_INFO_DISCOVERY_TIMEOUT,
+ NL80211_MPATH_INFO_DISCOVERY_RETRIES,
+ NL80211_MPATH_INFO_HOP_COUNT,
+ NL80211_MPATH_INFO_PATH_CHANGE,
+
+ /* keep last */
+ __NL80211_MPATH_INFO_AFTER_LAST,
+ NL80211_MPATH_INFO_MAX = __NL80211_MPATH_INFO_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_band_iftype_attr - Interface type data attributes
+ *
+ * @__NL80211_BAND_IFTYPE_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_BAND_IFTYPE_ATTR_IFTYPES: nested attribute containing a flag attribute
+ * for each interface type that supports the band data
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC: HE MAC capabilities as in HE
+ * capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY: HE PHY capabilities as in HE
+ * capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET: HE supported NSS/MCS as in HE
+ * capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE: HE PPE thresholds information as
+ * defined in HE capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently
+ * defined
+ * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_band_iftype_attr {
+ __NL80211_BAND_IFTYPE_ATTR_INVALID,
+
+ NL80211_BAND_IFTYPE_ATTR_IFTYPES,
+ NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC,
+ NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY,
+ NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET,
+ NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
+
+ /* keep last */
+ __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST,
+ NL80211_BAND_IFTYPE_ATTR_MAX = __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_band_attr - band attributes
+ * @__NL80211_BAND_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_BAND_ATTR_FREQS: supported frequencies in this band,
+ * an array of nested frequency attributes
+ * @NL80211_BAND_ATTR_RATES: supported bitrates in this band,
+ * an array of nested bitrate attributes
+ * @NL80211_BAND_ATTR_HT_MCS_SET: 16-byte attribute containing the MCS set as
+ * defined in 802.11n
+ * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE
+ * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n
+ * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n
+ * @NL80211_BAND_ATTR_VHT_MCS_SET: 32-byte attribute containing the MCS set as
+ * defined in 802.11ac
+ * @NL80211_BAND_ATTR_VHT_CAPA: VHT capabilities, as in the HT information IE
+ * @NL80211_BAND_ATTR_IFTYPE_DATA: nested array attribute, with each entry using
+ * attributes from &enum nl80211_band_iftype_attr
+ * @NL80211_BAND_ATTR_EDMG_CHANNELS: bitmap that indicates the 2.16 GHz
+ * channel(s) that are allowed to be used for EDMG transmissions.
+ * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251.
+ * @NL80211_BAND_ATTR_EDMG_BW_CONFIG: Channel BW Configuration subfield encodes
+ * the allowed channel bandwidth configurations.
+ * Defined by IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13.
+ * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined
+ * @__NL80211_BAND_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_band_attr {
+ __NL80211_BAND_ATTR_INVALID,
+ NL80211_BAND_ATTR_FREQS,
+ NL80211_BAND_ATTR_RATES,
+
+ NL80211_BAND_ATTR_HT_MCS_SET,
+ NL80211_BAND_ATTR_HT_CAPA,
+ NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
+ NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
+
+ NL80211_BAND_ATTR_VHT_MCS_SET,
+ NL80211_BAND_ATTR_VHT_CAPA,
+ NL80211_BAND_ATTR_IFTYPE_DATA,
+
+ NL80211_BAND_ATTR_EDMG_CHANNELS,
+ NL80211_BAND_ATTR_EDMG_BW_CONFIG,
+
+ /* keep last */
+ __NL80211_BAND_ATTR_AFTER_LAST,
+ NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1
+};
+
+#define NL80211_BAND_ATTR_HT_CAPA NL80211_BAND_ATTR_HT_CAPA
+
+/**
+ * enum nl80211_wmm_rule - regulatory wmm rule
+ *
+ * @__NL80211_WMMR_INVALID: attribute number 0 is reserved
+ * @NL80211_WMMR_CW_MIN: Minimum contention window slot.
+ * @NL80211_WMMR_CW_MAX: Maximum contention window slot.
+ * @NL80211_WMMR_AIFSN: Arbitration Inter Frame Space.
+ * @NL80211_WMMR_TXOP: Maximum allowed tx operation time.
+ * @nl80211_WMMR_MAX: highest possible wmm rule.
+ * @__NL80211_WMMR_LAST: Internal use.
+ */
+enum nl80211_wmm_rule {
+ __NL80211_WMMR_INVALID,
+ NL80211_WMMR_CW_MIN,
+ NL80211_WMMR_CW_MAX,
+ NL80211_WMMR_AIFSN,
+ NL80211_WMMR_TXOP,
+
+ /* keep last */
+ __NL80211_WMMR_LAST,
+ NL80211_WMMR_MAX = __NL80211_WMMR_LAST - 1
+};
+
+/**
+ * enum nl80211_frequency_attr - frequency attributes
+ * @__NL80211_FREQUENCY_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz
+ * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current
+ * regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_NO_IR: no mechanisms that initiate radiation
+ * are permitted on this channel, this includes sending probe
+ * requests, or modes of operation that require beaconing.
+ * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory
+ * on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm
+ * (100 * dBm).
+ * @NL80211_FREQUENCY_ATTR_DFS_STATE: current state for DFS
+ * (enum nl80211_dfs_state)
+ * @NL80211_FREQUENCY_ATTR_DFS_TIME: time in miliseconds for how long
+ * this channel is in this DFS state.
+ * @NL80211_FREQUENCY_ATTR_NO_HT40_MINUS: HT40- isn't possible with this
+ * channel as the control channel
+ * @NL80211_FREQUENCY_ATTR_NO_HT40_PLUS: HT40+ isn't possible with this
+ * channel as the control channel
+ * @NL80211_FREQUENCY_ATTR_NO_80MHZ: any 80 MHz channel using this channel
+ * as the primary or any of the secondary channels isn't possible,
+ * this includes 80+80 channels
+ * @NL80211_FREQUENCY_ATTR_NO_160MHZ: any 160 MHz (but not 80+80) channel
+ * using this channel as the primary or any of the secondary channels
+ * isn't possible
+ * @NL80211_FREQUENCY_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds.
+ * @NL80211_FREQUENCY_ATTR_INDOOR_ONLY: Only indoor use is permitted on this
+ * channel. A channel that has the INDOOR_ONLY attribute can only be
+ * used when there is a clear assessment that the device is operating in
+ * an indoor surroundings, i.e., it is connected to AC power (and not
+ * through portable DC inverters) or is under the control of a master
+ * that is acting as an AP and is connected to AC power.
+ * @NL80211_FREQUENCY_ATTR_IR_CONCURRENT: IR operation is allowed on this
+ * channel if it's connected concurrently to a BSS on the same channel on
+ * the 2 GHz band or to a channel in the same UNII band (on the 5 GHz
+ * band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO or TDLS
+ * off-channel on a channel that has the IR_CONCURRENT attribute set can be
+ * done when there is a clear assessment that the device is operating under
+ * the guidance of an authorized master, i.e., setting up a GO or TDLS
+ * off-channel while the device is also connected to an AP with DFS and
+ * radar detection on the UNII band (it is up to user-space, i.e.,
+ * wpa_supplicant to perform the required verifications). Using this
+ * attribute for IR is disallowed for master interfaces (IBSS, AP).
+ * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed
+ * on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed
+ * on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_WMM: this channel has wmm limitations.
+ * This is a nested attribute that contains the wmm limitation per AC.
+ * (see &enum nl80211_wmm_rule)
+ * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
+ * currently defined
+ * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
+ *
+ * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122
+ * for more information on the FCC description of the relaxations allowed
+ * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and
+ * NL80211_FREQUENCY_ATTR_IR_CONCURRENT.
+ */
+enum nl80211_frequency_attr {
+ __NL80211_FREQUENCY_ATTR_INVALID,
+ NL80211_FREQUENCY_ATTR_FREQ,
+ NL80211_FREQUENCY_ATTR_DISABLED,
+ NL80211_FREQUENCY_ATTR_NO_IR,
+ __NL80211_FREQUENCY_ATTR_NO_IBSS,
+ NL80211_FREQUENCY_ATTR_RADAR,
+ NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
+ NL80211_FREQUENCY_ATTR_DFS_STATE,
+ NL80211_FREQUENCY_ATTR_DFS_TIME,
+ NL80211_FREQUENCY_ATTR_NO_HT40_MINUS,
+ NL80211_FREQUENCY_ATTR_NO_HT40_PLUS,
+ NL80211_FREQUENCY_ATTR_NO_80MHZ,
+ NL80211_FREQUENCY_ATTR_NO_160MHZ,
+ NL80211_FREQUENCY_ATTR_DFS_CAC_TIME,
+ NL80211_FREQUENCY_ATTR_INDOOR_ONLY,
+ NL80211_FREQUENCY_ATTR_IR_CONCURRENT,
+ NL80211_FREQUENCY_ATTR_NO_20MHZ,
+ NL80211_FREQUENCY_ATTR_NO_10MHZ,
+ NL80211_FREQUENCY_ATTR_WMM,
+
+ /* keep last */
+ __NL80211_FREQUENCY_ATTR_AFTER_LAST,
+ NL80211_FREQUENCY_ATTR_MAX = __NL80211_FREQUENCY_ATTR_AFTER_LAST - 1
+};
+
+#define NL80211_FREQUENCY_ATTR_MAX_TX_POWER NL80211_FREQUENCY_ATTR_MAX_TX_POWER
+#define NL80211_FREQUENCY_ATTR_PASSIVE_SCAN NL80211_FREQUENCY_ATTR_NO_IR
+#define NL80211_FREQUENCY_ATTR_NO_IBSS NL80211_FREQUENCY_ATTR_NO_IR
+#define NL80211_FREQUENCY_ATTR_NO_IR NL80211_FREQUENCY_ATTR_NO_IR
+#define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \
+ NL80211_FREQUENCY_ATTR_IR_CONCURRENT
+
+/**
+ * enum nl80211_bitrate_attr - bitrate attributes
+ * @__NL80211_BITRATE_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps
+ * @NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE: Short preamble supported
+ * in 2.4 GHz band.
+ * @NL80211_BITRATE_ATTR_MAX: highest bitrate attribute number
+ * currently defined
+ * @__NL80211_BITRATE_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_bitrate_attr {
+ __NL80211_BITRATE_ATTR_INVALID,
+ NL80211_BITRATE_ATTR_RATE,
+ NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE,
+
+ /* keep last */
+ __NL80211_BITRATE_ATTR_AFTER_LAST,
+ NL80211_BITRATE_ATTR_MAX = __NL80211_BITRATE_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_initiator - Indicates the initiator of a reg domain request
+ * @NL80211_REGDOM_SET_BY_CORE: Core queried CRDA for a dynamic world
+ * regulatory domain.
+ * @NL80211_REGDOM_SET_BY_USER: User asked the wireless core to set the
+ * regulatory domain.
+ * @NL80211_REGDOM_SET_BY_DRIVER: a wireless drivers has hinted to the
+ * wireless core it thinks its knows the regulatory domain we should be in.
+ * @NL80211_REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an
+ * 802.11 country information element with regulatory information it
+ * thinks we should consider. cfg80211 only processes the country
+ * code from the IE, and relies on the regulatory domain information
+ * structure passed by userspace (CRDA) from our wireless-regdb.
+ * If a channel is enabled but the country code indicates it should
+ * be disabled we disable the channel and re-enable it upon disassociation.
+ */
+enum nl80211_reg_initiator {
+ NL80211_REGDOM_SET_BY_CORE,
+ NL80211_REGDOM_SET_BY_USER,
+ NL80211_REGDOM_SET_BY_DRIVER,
+ NL80211_REGDOM_SET_BY_COUNTRY_IE,
+};
+
+/**
+ * enum nl80211_reg_type - specifies the type of regulatory domain
+ * @NL80211_REGDOM_TYPE_COUNTRY: the regulatory domain set is one that pertains
+ * to a specific country. When this is set you can count on the
+ * ISO / IEC 3166 alpha2 country code being valid.
+ * @NL80211_REGDOM_TYPE_WORLD: the regulatory set domain is the world regulatory
+ * domain.
+ * @NL80211_REGDOM_TYPE_CUSTOM_WORLD: the regulatory domain set is a custom
+ * driver specific world regulatory domain. These do not apply system-wide
+ * and are only applicable to the individual devices which have requested
+ * them to be applied.
+ * @NL80211_REGDOM_TYPE_INTERSECTION: the regulatory domain set is the product
+ * of an intersection between two regulatory domains -- the previously
+ * set regulatory domain on the system and the last accepted regulatory
+ * domain request to be processed.
+ */
+enum nl80211_reg_type {
+ NL80211_REGDOM_TYPE_COUNTRY,
+ NL80211_REGDOM_TYPE_WORLD,
+ NL80211_REGDOM_TYPE_CUSTOM_WORLD,
+ NL80211_REGDOM_TYPE_INTERSECTION,
+};
+
+/**
+ * enum nl80211_reg_rule_attr - regulatory rule attributes
+ * @__NL80211_REG_RULE_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_ATTR_REG_RULE_FLAGS: a set of flags which specify additional
+ * considerations for a given frequency range. These are the
+ * &enum nl80211_reg_rule_flags.
+ * @NL80211_ATTR_FREQ_RANGE_START: starting frequencry for the regulatory
+ * rule in KHz. This is not a center of frequency but an actual regulatory
+ * band edge.
+ * @NL80211_ATTR_FREQ_RANGE_END: ending frequency for the regulatory rule
+ * in KHz. This is not a center a frequency but an actual regulatory
+ * band edge.
+ * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this
+ * frequency range, in KHz.
+ * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain
+ * for a given frequency range. The value is in mBi (100 * dBi).
+ * If you don't have one then don't send this.
+ * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for
+ * a given frequency range. The value is in mBm (100 * dBm).
+ * @NL80211_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds.
+ * If not present or 0 default CAC time will be used.
+ * @NL80211_REG_RULE_ATTR_MAX: highest regulatory rule attribute number
+ * currently defined
+ * @__NL80211_REG_RULE_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_reg_rule_attr {
+ __NL80211_REG_RULE_ATTR_INVALID,
+ NL80211_ATTR_REG_RULE_FLAGS,
+
+ NL80211_ATTR_FREQ_RANGE_START,
+ NL80211_ATTR_FREQ_RANGE_END,
+ NL80211_ATTR_FREQ_RANGE_MAX_BW,
+
+ NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN,
+ NL80211_ATTR_POWER_RULE_MAX_EIRP,
+
+ NL80211_ATTR_DFS_CAC_TIME,
+
+ /* keep last */
+ __NL80211_REG_RULE_ATTR_AFTER_LAST,
+ NL80211_REG_RULE_ATTR_MAX = __NL80211_REG_RULE_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_sched_scan_match_attr - scheduled scan match attributes
+ * @__NL80211_SCHED_SCAN_MATCH_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_SSID: SSID to be used for matching,
+ * only report BSS with matching SSID.
+ * (This cannot be used together with BSSID.)
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI: RSSI threshold (in dBm) for reporting a
+ * BSS in scan results. Filtering is turned off if not specified. Note that
+ * if this attribute is in a match set of its own, then it is treated as
+ * the default value for all matchsets with an SSID, rather than being a
+ * matchset of its own without an RSSI filter. This is due to problems with
+ * how this API was implemented in the past. Also, due to the same problem,
+ * the only way to create a matchset with only an RSSI filter (with this
+ * attribute) is if there's only a single matchset with the RSSI attribute.
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI: Flag indicating whether
+ * %NL80211_SCHED_SCAN_MATCH_ATTR_RSSI to be used as absolute RSSI or
+ * relative to current bss's RSSI.
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST: When present the RSSI level for
+ * BSS-es in the specified band is to be adjusted before doing
+ * RSSI-based BSS selection. The attribute value is a packed structure
+ * value as specified by &struct nl80211_bss_select_rssi_adjust.
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_BSSID: BSSID to be used for matching
+ * (this cannot be used together with SSID).
+ * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Nested attribute that carries the
+ * band specific minimum rssi thresholds for the bands defined in
+ * enum nl80211_band. The minimum rssi threshold value(s32) specific to a
+ * band shall be encapsulated in attribute with type value equals to one
+ * of the NL80211_BAND_* defined in enum nl80211_band. For example, the
+ * minimum rssi threshold value for 2.4GHZ band shall be encapsulated
+ * within an attribute of type NL80211_BAND_2GHZ. And one or more of such
+ * attributes will be nested within this attribute.
+ * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter
+ * attribute number currently defined
+ * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_sched_scan_match_attr {
+ __NL80211_SCHED_SCAN_MATCH_ATTR_INVALID,
+
+ NL80211_SCHED_SCAN_MATCH_ATTR_SSID,
+ NL80211_SCHED_SCAN_MATCH_ATTR_RSSI,
+ NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI,
+ NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST,
+ NL80211_SCHED_SCAN_MATCH_ATTR_BSSID,
+ NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI,
+
+ /* keep last */
+ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST,
+ NL80211_SCHED_SCAN_MATCH_ATTR_MAX =
+ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST - 1
+};
+
+/* only for backward compatibility */
+#define NL80211_ATTR_SCHED_SCAN_MATCH_SSID NL80211_SCHED_SCAN_MATCH_ATTR_SSID
+
+/**
+ * enum nl80211_reg_rule_flags - regulatory rule flags
+ *
+ * @NL80211_RRF_NO_OFDM: OFDM modulation not allowed
+ * @NL80211_RRF_NO_CCK: CCK modulation not allowed
+ * @NL80211_RRF_NO_INDOOR: indoor operation not allowed
+ * @NL80211_RRF_NO_OUTDOOR: outdoor operation not allowed
+ * @NL80211_RRF_DFS: DFS support is required to be used
+ * @NL80211_RRF_PTP_ONLY: this is only for Point To Point links
+ * @NL80211_RRF_PTMP_ONLY: this is only for Point To Multi Point links
+ * @NL80211_RRF_NO_IR: no mechanisms that initiate radiation are allowed,
+ * this includes probe requests or modes of operation that require
+ * beaconing.
+ * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated
+ * base on contiguous rules and wider channels will be allowed to cross
+ * multiple contiguous/overlapping frequency ranges.
+ * @NL80211_RRF_IR_CONCURRENT: See %NL80211_FREQUENCY_ATTR_IR_CONCURRENT
+ * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation
+ * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation
+ * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed
+ * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed
+ */
+enum nl80211_reg_rule_flags {
+ NL80211_RRF_NO_OFDM = 1<<0,
+ NL80211_RRF_NO_CCK = 1<<1,
+ NL80211_RRF_NO_INDOOR = 1<<2,
+ NL80211_RRF_NO_OUTDOOR = 1<<3,
+ NL80211_RRF_DFS = 1<<4,
+ NL80211_RRF_PTP_ONLY = 1<<5,
+ NL80211_RRF_PTMP_ONLY = 1<<6,
+ NL80211_RRF_NO_IR = 1<<7,
+ __NL80211_RRF_NO_IBSS = 1<<8,
+ NL80211_RRF_AUTO_BW = 1<<11,
+ NL80211_RRF_IR_CONCURRENT = 1<<12,
+ NL80211_RRF_NO_HT40MINUS = 1<<13,
+ NL80211_RRF_NO_HT40PLUS = 1<<14,
+ NL80211_RRF_NO_80MHZ = 1<<15,
+ NL80211_RRF_NO_160MHZ = 1<<16,
+};
+
+#define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR
+#define NL80211_RRF_NO_IBSS NL80211_RRF_NO_IR
+#define NL80211_RRF_NO_IR NL80211_RRF_NO_IR
+#define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS |\
+ NL80211_RRF_NO_HT40PLUS)
+#define NL80211_RRF_GO_CONCURRENT NL80211_RRF_IR_CONCURRENT
+
+/* For backport compatibility with older userspace */
+#define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS)
+
+/**
+ * enum nl80211_dfs_regions - regulatory DFS regions
+ *
+ * @NL80211_DFS_UNSET: Country has no DFS master region specified
+ * @NL80211_DFS_FCC: Country follows DFS master rules from FCC
+ * @NL80211_DFS_ETSI: Country follows DFS master rules from ETSI
+ * @NL80211_DFS_JP: Country follows DFS master rules from JP/MKK/Telec
+ */
+enum nl80211_dfs_regions {
+ NL80211_DFS_UNSET = 0,
+ NL80211_DFS_FCC = 1,
+ NL80211_DFS_ETSI = 2,
+ NL80211_DFS_JP = 3,
+};
+
+/**
+ * enum nl80211_user_reg_hint_type - type of user regulatory hint
+ *
+ * @NL80211_USER_REG_HINT_USER: a user sent the hint. This is always
+ * assumed if the attribute is not set.
+ * @NL80211_USER_REG_HINT_CELL_BASE: the hint comes from a cellular
+ * base station. Device drivers that have been tested to work
+ * properly to support this type of hint can enable these hints
+ * by setting the NL80211_FEATURE_CELL_BASE_REG_HINTS feature
+ * capability on the struct wiphy. The wireless core will
+ * ignore all cell base station hints until at least one device
+ * present has been registered with the wireless core that
+ * has listed NL80211_FEATURE_CELL_BASE_REG_HINTS as a
+ * supported feature.
+ * @NL80211_USER_REG_HINT_INDOOR: a user sent an hint indicating that the
+ * platform is operating in an indoor environment.
+ */
+enum nl80211_user_reg_hint_type {
+ NL80211_USER_REG_HINT_USER = 0,
+ NL80211_USER_REG_HINT_CELL_BASE = 1,
+ NL80211_USER_REG_HINT_INDOOR = 2,
+};
+
+/**
+ * enum nl80211_survey_info - survey information
+ *
+ * These attribute types are used with %NL80211_ATTR_SURVEY_INFO
+ * when getting information about a survey.
+ *
+ * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved
+ * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel
+ * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
+ * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used
+ * @NL80211_SURVEY_INFO_TIME: amount of time (in ms) that the radio
+ * was turned on (on channel or globally)
+ * @NL80211_SURVEY_INFO_TIME_BUSY: amount of the time the primary
+ * channel was sensed busy (either due to activity or energy detect)
+ * @NL80211_SURVEY_INFO_TIME_EXT_BUSY: amount of time the extension
+ * channel was sensed busy
+ * @NL80211_SURVEY_INFO_TIME_RX: amount of time the radio spent
+ * receiving data (on channel or globally)
+ * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent
+ * transmitting data (on channel or globally)
+ * @NL80211_SURVEY_INFO_TIME_SCAN: time the radio spent for scan
+ * (on this channel or globally)
+ * @NL80211_SURVEY_INFO_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_SURVEY_INFO_TIME_BSS_RX: amount of time the radio spent
+ * receiving frames destined to the local BSS
+ * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
+ * currently defined
+ * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
+ */
+enum nl80211_survey_info {
+ __NL80211_SURVEY_INFO_INVALID,
+ NL80211_SURVEY_INFO_FREQUENCY,
+ NL80211_SURVEY_INFO_NOISE,
+ NL80211_SURVEY_INFO_IN_USE,
+ NL80211_SURVEY_INFO_TIME,
+ NL80211_SURVEY_INFO_TIME_BUSY,
+ NL80211_SURVEY_INFO_TIME_EXT_BUSY,
+ NL80211_SURVEY_INFO_TIME_RX,
+ NL80211_SURVEY_INFO_TIME_TX,
+ NL80211_SURVEY_INFO_TIME_SCAN,
+ NL80211_SURVEY_INFO_PAD,
+ NL80211_SURVEY_INFO_TIME_BSS_RX,
+
+ /* keep last */
+ __NL80211_SURVEY_INFO_AFTER_LAST,
+ NL80211_SURVEY_INFO_MAX = __NL80211_SURVEY_INFO_AFTER_LAST - 1
+};
+
+/* keep old names for compatibility */
+#define NL80211_SURVEY_INFO_CHANNEL_TIME NL80211_SURVEY_INFO_TIME
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY NL80211_SURVEY_INFO_TIME_BUSY
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY NL80211_SURVEY_INFO_TIME_EXT_BUSY
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_RX NL80211_SURVEY_INFO_TIME_RX
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_TX NL80211_SURVEY_INFO_TIME_TX
+
+/**
+ * enum nl80211_mntr_flags - monitor configuration flags
+ *
+ * Monitor configuration flags.
+ *
+ * @__NL80211_MNTR_FLAG_INVALID: reserved
+ *
+ * @NL80211_MNTR_FLAG_FCSFAIL: pass frames with bad FCS
+ * @NL80211_MNTR_FLAG_PLCPFAIL: pass frames with bad PLCP
+ * @NL80211_MNTR_FLAG_CONTROL: pass control frames
+ * @NL80211_MNTR_FLAG_OTHER_BSS: disable BSSID filtering
+ * @NL80211_MNTR_FLAG_COOK_FRAMES: report frames after processing.
+ * overrides all other flags.
+ * @NL80211_MNTR_FLAG_ACTIVE: use the configured MAC address
+ * and ACK incoming unicast packets.
+ *
+ * @__NL80211_MNTR_FLAG_AFTER_LAST: internal use
+ * @NL80211_MNTR_FLAG_MAX: highest possible monitor flag
+ */
+enum nl80211_mntr_flags {
+ __NL80211_MNTR_FLAG_INVALID,
+ NL80211_MNTR_FLAG_FCSFAIL,
+ NL80211_MNTR_FLAG_PLCPFAIL,
+ NL80211_MNTR_FLAG_CONTROL,
+ NL80211_MNTR_FLAG_OTHER_BSS,
+ NL80211_MNTR_FLAG_COOK_FRAMES,
+ NL80211_MNTR_FLAG_ACTIVE,
+
+ /* keep last */
+ __NL80211_MNTR_FLAG_AFTER_LAST,
+ NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_mesh_power_mode - mesh power save modes
+ *
+ * @NL80211_MESH_POWER_UNKNOWN: The mesh power mode of the mesh STA is
+ * not known or has not been set yet.
+ * @NL80211_MESH_POWER_ACTIVE: Active mesh power mode. The mesh STA is
+ * in Awake state all the time.
+ * @NL80211_MESH_POWER_LIGHT_SLEEP: Light sleep mode. The mesh STA will
+ * alternate between Active and Doze states, but will wake up for
+ * neighbor's beacons.
+ * @NL80211_MESH_POWER_DEEP_SLEEP: Deep sleep mode. The mesh STA will
+ * alternate between Active and Doze states, but may not wake up
+ * for neighbor's beacons.
+ *
+ * @__NL80211_MESH_POWER_AFTER_LAST - internal use
+ * @NL80211_MESH_POWER_MAX - highest possible power save level
+ */
+
+enum nl80211_mesh_power_mode {
+ NL80211_MESH_POWER_UNKNOWN,
+ NL80211_MESH_POWER_ACTIVE,
+ NL80211_MESH_POWER_LIGHT_SLEEP,
+ NL80211_MESH_POWER_DEEP_SLEEP,
+
+ __NL80211_MESH_POWER_AFTER_LAST,
+ NL80211_MESH_POWER_MAX = __NL80211_MESH_POWER_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_meshconf_params - mesh configuration parameters
+ *
+ * Mesh configuration parameters. These can be changed while the mesh is
+ * active.
+ *
+ * @__NL80211_MESHCONF_INVALID: internal use
+ *
+ * @NL80211_MESHCONF_RETRY_TIMEOUT: specifies the initial retry timeout in
+ * millisecond units, used by the Peer Link Open message
+ *
+ * @NL80211_MESHCONF_CONFIRM_TIMEOUT: specifies the initial confirm timeout, in
+ * millisecond units, used by the peer link management to close a peer link
+ *
+ * @NL80211_MESHCONF_HOLDING_TIMEOUT: specifies the holding timeout, in
+ * millisecond units
+ *
+ * @NL80211_MESHCONF_MAX_PEER_LINKS: maximum number of peer links allowed
+ * on this mesh interface
+ *
+ * @NL80211_MESHCONF_MAX_RETRIES: specifies the maximum number of peer link
+ * open retries that can be sent to establish a new peer link instance in a
+ * mesh
+ *
+ * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh
+ * point.
+ *
+ * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically open
+ * peer links when we detect compatible mesh peers. Disabled if
+ * @NL80211_MESH_SETUP_USERSPACE_MPM or @NL80211_MESH_SETUP_USERSPACE_AMPE are
+ * set.
+ *
+ * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames
+ * containing a PREQ that an MP can send to a particular destination (path
+ * target)
+ *
+ * @NL80211_MESHCONF_PATH_REFRESH_TIME: how frequently to refresh mesh paths
+ * (in milliseconds)
+ *
+ * @NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT: minimum length of time to wait
+ * until giving up on a path discovery (in milliseconds)
+ *
+ * @NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT: The time (in TUs) for which mesh
+ * points receiving a PREQ shall consider the forwarding information from
+ * the root to be valid. (TU = time unit)
+ *
+ * @NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL: The minimum interval of time (in
+ * TUs) during which an MP can send only one action frame containing a PREQ
+ * reference element
+ *
+ * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs)
+ * that it takes for an HWMP information element to propagate across the
+ * mesh
+ *
+ * @NL80211_MESHCONF_HWMP_ROOTMODE: whether root mode is enabled or not
+ *
+ * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a
+ * source mesh point for path selection elements.
+ *
+ * @NL80211_MESHCONF_HWMP_RANN_INTERVAL: The interval of time (in TUs) between
+ * root announcements are transmitted.
+ *
+ * @NL80211_MESHCONF_GATE_ANNOUNCEMENTS: Advertise that this mesh station has
+ * access to a broader network beyond the MBSS. This is done via Root
+ * Announcement frames.
+ *
+ * @NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL: The minimum interval of time (in
+ * TUs) during which a mesh STA can send only one Action frame containing a
+ * PERR element.
+ *
+ * @NL80211_MESHCONF_FORWARDING: set Mesh STA as forwarding or non-forwarding
+ * or forwarding entity (default is TRUE - forwarding entity)
+ *
+ * @NL80211_MESHCONF_RSSI_THRESHOLD: RSSI threshold in dBm. This specifies the
+ * threshold for average signal strength of candidate station to establish
+ * a peer link.
+ *
+ * @NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR: maximum number of neighbors
+ * to synchronize to for 11s default synchronization method
+ * (see 11C.12.2.2)
+ *
+ * @NL80211_MESHCONF_HT_OPMODE: set mesh HT protection mode.
+ *
+ * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute
+ *
+ * @NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT: The time (in TUs) for
+ * which mesh STAs receiving a proactive PREQ shall consider the forwarding
+ * information to the root mesh STA to be valid.
+ *
+ * @NL80211_MESHCONF_HWMP_ROOT_INTERVAL: The interval of time (in TUs) between
+ * proactive PREQs are transmitted.
+ *
+ * @NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL: The minimum interval of time
+ * (in TUs) during which a mesh STA can send only one Action frame
+ * containing a PREQ element for root path confirmation.
+ *
+ * @NL80211_MESHCONF_POWER_MODE: Default mesh power mode for new peer links.
+ * type &enum nl80211_mesh_power_mode (u32)
+ *
+ * @NL80211_MESHCONF_AWAKE_WINDOW: awake window duration (in TUs)
+ *
+ * @NL80211_MESHCONF_PLINK_TIMEOUT: If no tx activity is seen from a STA we've
+ * established peering with for longer than this time (in seconds), then
+ * remove it from the STA's list of peers. You may set this to 0 to disable
+ * the removal of the STA. Default is 30 minutes.
+ *
+ * @NL80211_MESHCONF_CONNECTED_TO_GATE: If set to true then this mesh STA
+ * will advertise that it is connected to a gate in the mesh formation
+ * field. If left unset then the mesh formation field will only
+ * advertise such if there is an active root mesh path.
+ *
+ * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_meshconf_params {
+ __NL80211_MESHCONF_INVALID,
+ NL80211_MESHCONF_RETRY_TIMEOUT,
+ NL80211_MESHCONF_CONFIRM_TIMEOUT,
+ NL80211_MESHCONF_HOLDING_TIMEOUT,
+ NL80211_MESHCONF_MAX_PEER_LINKS,
+ NL80211_MESHCONF_MAX_RETRIES,
+ NL80211_MESHCONF_TTL,
+ NL80211_MESHCONF_AUTO_OPEN_PLINKS,
+ NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
+ NL80211_MESHCONF_PATH_REFRESH_TIME,
+ NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
+ NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
+ NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
+ NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
+ NL80211_MESHCONF_HWMP_ROOTMODE,
+ NL80211_MESHCONF_ELEMENT_TTL,
+ NL80211_MESHCONF_HWMP_RANN_INTERVAL,
+ NL80211_MESHCONF_GATE_ANNOUNCEMENTS,
+ NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
+ NL80211_MESHCONF_FORWARDING,
+ NL80211_MESHCONF_RSSI_THRESHOLD,
+ NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR,
+ NL80211_MESHCONF_HT_OPMODE,
+ NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
+ NL80211_MESHCONF_HWMP_ROOT_INTERVAL,
+ NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL,
+ NL80211_MESHCONF_POWER_MODE,
+ NL80211_MESHCONF_AWAKE_WINDOW,
+ NL80211_MESHCONF_PLINK_TIMEOUT,
+ NL80211_MESHCONF_CONNECTED_TO_GATE,
+
+ /* keep last */
+ __NL80211_MESHCONF_ATTR_AFTER_LAST,
+ NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_mesh_setup_params - mesh setup parameters
+ *
+ * Mesh setup parameters. These are used to start/join a mesh and cannot be
+ * changed while the mesh is active.
+ *
+ * @__NL80211_MESH_SETUP_INVALID: Internal use
+ *
+ * @NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL: Enable this option to use a
+ * vendor specific path selection algorithm or disable it to use the
+ * default HWMP.
+ *
+ * @NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC: Enable this option to use a
+ * vendor specific path metric or disable it to use the default Airtime
+ * metric.
+ *
+ * @NL80211_MESH_SETUP_IE: Information elements for this mesh, for instance, a
+ * robust security network ie, or a vendor specific information element
+ * that vendors will use to identify the path selection methods and
+ * metrics in use.
+ *
+ * @NL80211_MESH_SETUP_USERSPACE_AUTH: Enable this option if an authentication
+ * daemon will be authenticating mesh candidates.
+ *
+ * @NL80211_MESH_SETUP_USERSPACE_AMPE: Enable this option if an authentication
+ * daemon will be securing peer link frames. AMPE is a secured version of
+ * Mesh Peering Management (MPM) and is implemented with the assistance of
+ * a userspace daemon. When this flag is set, the kernel will send peer
+ * management frames to a userspace daemon that will implement AMPE
+ * functionality (security capabilities selection, key confirmation, and
+ * key management). When the flag is unset (default), the kernel can
+ * autonomously complete (unsecured) mesh peering without the need of a
+ * userspace daemon.
+ *
+ * @NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC: Enable this option to use a
+ * vendor specific synchronization method or disable it to use the default
+ * neighbor offset synchronization
+ *
+ * @NL80211_MESH_SETUP_USERSPACE_MPM: Enable this option if userspace will
+ * implement an MPM which handles peer allocation and state.
+ *
+ * @NL80211_MESH_SETUP_AUTH_PROTOCOL: Inform the kernel of the authentication
+ * method (u8, as defined in IEEE 8.4.2.100.6, e.g. 0x1 for SAE).
+ * Default is no authentication method required.
+ *
+ * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number
+ *
+ * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
+ */
+enum nl80211_mesh_setup_params {
+ __NL80211_MESH_SETUP_INVALID,
+ NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL,
+ NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC,
+ NL80211_MESH_SETUP_IE,
+ NL80211_MESH_SETUP_USERSPACE_AUTH,
+ NL80211_MESH_SETUP_USERSPACE_AMPE,
+ NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC,
+ NL80211_MESH_SETUP_USERSPACE_MPM,
+ NL80211_MESH_SETUP_AUTH_PROTOCOL,
+
+ /* keep last */
+ __NL80211_MESH_SETUP_ATTR_AFTER_LAST,
+ NL80211_MESH_SETUP_ATTR_MAX = __NL80211_MESH_SETUP_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_txq_attr - TX queue parameter attributes
+ * @__NL80211_TXQ_ATTR_INVALID: Attribute number 0 is reserved
+ * @NL80211_TXQ_ATTR_AC: AC identifier (NL80211_AC_*)
+ * @NL80211_TXQ_ATTR_TXOP: Maximum burst time in units of 32 usecs, 0 meaning
+ * disabled
+ * @NL80211_TXQ_ATTR_CWMIN: Minimum contention window [a value of the form
+ * 2^n-1 in the range 1..32767]
+ * @NL80211_TXQ_ATTR_CWMAX: Maximum contention window [a value of the form
+ * 2^n-1 in the range 1..32767]
+ * @NL80211_TXQ_ATTR_AIFS: Arbitration interframe space [0..255]
+ * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal
+ * @NL80211_TXQ_ATTR_MAX: Maximum TXQ attribute number
+ */
+enum nl80211_txq_attr {
+ __NL80211_TXQ_ATTR_INVALID,
+ NL80211_TXQ_ATTR_AC,
+ NL80211_TXQ_ATTR_TXOP,
+ NL80211_TXQ_ATTR_CWMIN,
+ NL80211_TXQ_ATTR_CWMAX,
+ NL80211_TXQ_ATTR_AIFS,
+
+ /* keep last */
+ __NL80211_TXQ_ATTR_AFTER_LAST,
+ NL80211_TXQ_ATTR_MAX = __NL80211_TXQ_ATTR_AFTER_LAST - 1
+};
+
+enum nl80211_ac {
+ NL80211_AC_VO,
+ NL80211_AC_VI,
+ NL80211_AC_BE,
+ NL80211_AC_BK,
+ NL80211_NUM_ACS
+};
+
+/* backward compat */
+#define NL80211_TXQ_ATTR_QUEUE NL80211_TXQ_ATTR_AC
+#define NL80211_TXQ_Q_VO NL80211_AC_VO
+#define NL80211_TXQ_Q_VI NL80211_AC_VI
+#define NL80211_TXQ_Q_BE NL80211_AC_BE
+#define NL80211_TXQ_Q_BK NL80211_AC_BK
+
+/**
+ * enum nl80211_channel_type - channel type
+ * @NL80211_CHAN_NO_HT: 20 MHz, non-HT channel
+ * @NL80211_CHAN_HT20: 20 MHz HT channel
+ * @NL80211_CHAN_HT40MINUS: HT40 channel, secondary channel
+ * below the control channel
+ * @NL80211_CHAN_HT40PLUS: HT40 channel, secondary channel
+ * above the control channel
+ */
+enum nl80211_channel_type {
+ NL80211_CHAN_NO_HT,
+ NL80211_CHAN_HT20,
+ NL80211_CHAN_HT40MINUS,
+ NL80211_CHAN_HT40PLUS
+};
+
+/**
+ * enum nl80211_key_mode - Key mode
+ *
+ * @NL80211_KEY_RX_TX: (Default)
+ * Key can be used for Rx and Tx immediately
+ *
+ * The following modes can only be selected for unicast keys and when the
+ * driver supports @NL80211_EXT_FEATURE_EXT_KEY_ID:
+ *
+ * @NL80211_KEY_NO_TX: Only allowed in combination with @NL80211_CMD_NEW_KEY:
+ * Unicast key can only be used for Rx, Tx not allowed, yet
+ * @NL80211_KEY_SET_TX: Only allowed in combination with @NL80211_CMD_SET_KEY:
+ * The unicast key identified by idx and mac is cleared for Tx and becomes
+ * the preferred Tx key for the station.
+ */
+enum nl80211_key_mode {
+ NL80211_KEY_RX_TX,
+ NL80211_KEY_NO_TX,
+ NL80211_KEY_SET_TX
+};
+
+/**
+ * enum nl80211_chan_width - channel width definitions
+ *
+ * These values are used with the %NL80211_ATTR_CHANNEL_WIDTH
+ * attribute.
+ *
+ * @NL80211_CHAN_WIDTH_20_NOHT: 20 MHz, non-HT channel
+ * @NL80211_CHAN_WIDTH_20: 20 MHz HT channel
+ * @NL80211_CHAN_WIDTH_40: 40 MHz channel, the %NL80211_ATTR_CENTER_FREQ1
+ * attribute must be provided as well
+ * @NL80211_CHAN_WIDTH_80: 80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1
+ * attribute must be provided as well
+ * @NL80211_CHAN_WIDTH_80P80: 80+80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1
+ * and %NL80211_ATTR_CENTER_FREQ2 attributes must be provided as well
+ * @NL80211_CHAN_WIDTH_160: 160 MHz channel, the %NL80211_ATTR_CENTER_FREQ1
+ * attribute must be provided as well
+ * @NL80211_CHAN_WIDTH_5: 5 MHz OFDM channel
+ * @NL80211_CHAN_WIDTH_10: 10 MHz OFDM channel
+ */
+enum nl80211_chan_width {
+ NL80211_CHAN_WIDTH_20_NOHT,
+ NL80211_CHAN_WIDTH_20,
+ NL80211_CHAN_WIDTH_40,
+ NL80211_CHAN_WIDTH_80,
+ NL80211_CHAN_WIDTH_80P80,
+ NL80211_CHAN_WIDTH_160,
+ NL80211_CHAN_WIDTH_5,
+ NL80211_CHAN_WIDTH_10,
+};
+
+/**
+ * enum nl80211_bss_scan_width - control channel width for a BSS
+ *
+ * These values are used with the %NL80211_BSS_CHAN_WIDTH attribute.
+ *
+ * @NL80211_BSS_CHAN_WIDTH_20: control channel is 20 MHz wide or compatible
+ * @NL80211_BSS_CHAN_WIDTH_10: control channel is 10 MHz wide
+ * @NL80211_BSS_CHAN_WIDTH_5: control channel is 5 MHz wide
+ */
+enum nl80211_bss_scan_width {
+ NL80211_BSS_CHAN_WIDTH_20,
+ NL80211_BSS_CHAN_WIDTH_10,
+ NL80211_BSS_CHAN_WIDTH_5,
+};
+
+/**
+ * enum nl80211_bss - netlink attributes for a BSS
+ *
+ * @__NL80211_BSS_INVALID: invalid
+ * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets)
+ * @NL80211_BSS_FREQUENCY: frequency in MHz (u32)
+ * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64)
+ * (if @NL80211_BSS_PRESP_DATA is present then this is known to be
+ * from a probe response, otherwise it may be from the same beacon
+ * that the NL80211_BSS_BEACON_TSF will be from)
+ * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16)
+ * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16)
+ * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the
+ * raw information elements from the probe response/beacon (bin);
+ * if the %NL80211_BSS_BEACON_IES attribute is present and the data is
+ * different then the IEs here are from a Probe Response frame; otherwise
+ * they are from a Beacon frame.
+ * However, if the driver does not indicate the source of the IEs, these
+ * IEs may be from either frame subtype.
+ * If present, the @NL80211_BSS_PRESP_DATA attribute indicates that the
+ * data here is known to be from a probe response, without any heuristics.
+ * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon
+ * in mBm (100 * dBm) (s32)
+ * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon
+ * in unspecified units, scaled to 0..100 (u8)
+ * @NL80211_BSS_STATUS: status, if this BSS is "used"
+ * @NL80211_BSS_SEEN_MS_AGO: age of this BSS entry in ms
+ * @NL80211_BSS_BEACON_IES: binary attribute containing the raw information
+ * elements from a Beacon frame (bin); not present if no Beacon frame has
+ * yet been received
+ * @NL80211_BSS_CHAN_WIDTH: channel width of the control channel
+ * (u32, enum nl80211_bss_scan_width)
+ * @NL80211_BSS_BEACON_TSF: TSF of the last received beacon (u64)
+ * (not present if no beacon frame has been received yet)
+ * @NL80211_BSS_PRESP_DATA: the data in @NL80211_BSS_INFORMATION_ELEMENTS and
+ * @NL80211_BSS_TSF is known to be from a probe response (flag attribute)
+ * @NL80211_BSS_LAST_SEEN_BOOTTIME: CLOCK_BOOTTIME timestamp when this entry
+ * was last updated by a received frame. The value is expected to be
+ * accurate to about 10ms. (u64, nanoseconds)
+ * @NL80211_BSS_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_BSS_PARENT_TSF: the time at the start of reception of the first
+ * octet of the timestamp field of the last beacon/probe received for
+ * this BSS. The time is the TSF of the BSS specified by
+ * @NL80211_BSS_PARENT_BSSID. (u64).
+ * @NL80211_BSS_PARENT_BSSID: the BSS according to which @NL80211_BSS_PARENT_TSF
+ * is set.
+ * @NL80211_BSS_CHAIN_SIGNAL: per-chain signal strength of last BSS update.
+ * Contains a nested array of signal strength attributes (u8, dBm),
+ * using the nesting index as the antenna number.
+ * @__NL80211_BSS_AFTER_LAST: internal
+ * @NL80211_BSS_MAX: highest BSS attribute
+ */
+enum nl80211_bss {
+ __NL80211_BSS_INVALID,
+ NL80211_BSS_BSSID,
+ NL80211_BSS_FREQUENCY,
+ NL80211_BSS_TSF,
+ NL80211_BSS_BEACON_INTERVAL,
+ NL80211_BSS_CAPABILITY,
+ NL80211_BSS_INFORMATION_ELEMENTS,
+ NL80211_BSS_SIGNAL_MBM,
+ NL80211_BSS_SIGNAL_UNSPEC,
+ NL80211_BSS_STATUS,
+ NL80211_BSS_SEEN_MS_AGO,
+ NL80211_BSS_BEACON_IES,
+ NL80211_BSS_CHAN_WIDTH,
+ NL80211_BSS_BEACON_TSF,
+ NL80211_BSS_PRESP_DATA,
+ NL80211_BSS_LAST_SEEN_BOOTTIME,
+ NL80211_BSS_PAD,
+ NL80211_BSS_PARENT_TSF,
+ NL80211_BSS_PARENT_BSSID,
+ NL80211_BSS_CHAIN_SIGNAL,
+
+ /* keep last */
+ __NL80211_BSS_AFTER_LAST,
+ NL80211_BSS_MAX = __NL80211_BSS_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_bss_status - BSS "status"
+ * @NL80211_BSS_STATUS_AUTHENTICATED: Authenticated with this BSS.
+ * Note that this is no longer used since cfg80211 no longer
+ * keeps track of whether or not authentication was done with
+ * a given BSS.
+ * @NL80211_BSS_STATUS_ASSOCIATED: Associated with this BSS.
+ * @NL80211_BSS_STATUS_IBSS_JOINED: Joined to this IBSS.
+ *
+ * The BSS status is a BSS attribute in scan dumps, which
+ * indicates the status the interface has wrt. this BSS.
+ */
+enum nl80211_bss_status {
+ NL80211_BSS_STATUS_AUTHENTICATED,
+ NL80211_BSS_STATUS_ASSOCIATED,
+ NL80211_BSS_STATUS_IBSS_JOINED,
+};
+
+/**
+ * enum nl80211_auth_type - AuthenticationType
+ *
+ * @NL80211_AUTHTYPE_OPEN_SYSTEM: Open System authentication
+ * @NL80211_AUTHTYPE_SHARED_KEY: Shared Key authentication (WEP only)
+ * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r)
+ * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP)
+ * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals
+ * @NL80211_AUTHTYPE_FILS_SK: Fast Initial Link Setup shared key
+ * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS
+ * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key
+ * @__NL80211_AUTHTYPE_NUM: internal
+ * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm
+ * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by
+ * trying multiple times); this is invalid in netlink -- leave out
+ * the attribute for this on CONNECT commands.
+ */
+enum nl80211_auth_type {
+ NL80211_AUTHTYPE_OPEN_SYSTEM,
+ NL80211_AUTHTYPE_SHARED_KEY,
+ NL80211_AUTHTYPE_FT,
+ NL80211_AUTHTYPE_NETWORK_EAP,
+ NL80211_AUTHTYPE_SAE,
+ NL80211_AUTHTYPE_FILS_SK,
+ NL80211_AUTHTYPE_FILS_SK_PFS,
+ NL80211_AUTHTYPE_FILS_PK,
+
+ /* keep last */
+ __NL80211_AUTHTYPE_NUM,
+ NL80211_AUTHTYPE_MAX = __NL80211_AUTHTYPE_NUM - 1,
+ NL80211_AUTHTYPE_AUTOMATIC
+};
+
+/**
+ * enum nl80211_key_type - Key Type
+ * @NL80211_KEYTYPE_GROUP: Group (broadcast/multicast) key
+ * @NL80211_KEYTYPE_PAIRWISE: Pairwise (unicast/individual) key
+ * @NL80211_KEYTYPE_PEERKEY: PeerKey (DLS)
+ * @NUM_NL80211_KEYTYPES: number of defined key types
+ */
+enum nl80211_key_type {
+ NL80211_KEYTYPE_GROUP,
+ NL80211_KEYTYPE_PAIRWISE,
+ NL80211_KEYTYPE_PEERKEY,
+
+ NUM_NL80211_KEYTYPES
+};
+
+/**
+ * enum nl80211_mfp - Management frame protection state
+ * @NL80211_MFP_NO: Management frame protection not used
+ * @NL80211_MFP_REQUIRED: Management frame protection required
+ * @NL80211_MFP_OPTIONAL: Management frame protection is optional
+ */
+enum nl80211_mfp {
+ NL80211_MFP_NO,
+ NL80211_MFP_REQUIRED,
+ NL80211_MFP_OPTIONAL,
+};
+
+enum nl80211_wpa_versions {
+ NL80211_WPA_VERSION_1 = 1 << 0,
+ NL80211_WPA_VERSION_2 = 1 << 1,
+ NL80211_WPA_VERSION_3 = 1 << 2,
+};
+
+/**
+ * enum nl80211_key_default_types - key default types
+ * @__NL80211_KEY_DEFAULT_TYPE_INVALID: invalid
+ * @NL80211_KEY_DEFAULT_TYPE_UNICAST: key should be used as default
+ * unicast key
+ * @NL80211_KEY_DEFAULT_TYPE_MULTICAST: key should be used as default
+ * multicast key
+ * @NUM_NL80211_KEY_DEFAULT_TYPES: number of default types
+ */
+enum nl80211_key_default_types {
+ __NL80211_KEY_DEFAULT_TYPE_INVALID,
+ NL80211_KEY_DEFAULT_TYPE_UNICAST,
+ NL80211_KEY_DEFAULT_TYPE_MULTICAST,
+
+ NUM_NL80211_KEY_DEFAULT_TYPES
+};
+
+/**
+ * enum nl80211_key_attributes - key attributes
+ * @__NL80211_KEY_INVALID: invalid
+ * @NL80211_KEY_DATA: (temporal) key data; for TKIP this consists of
+ * 16 bytes encryption key followed by 8 bytes each for TX and RX MIC
+ * keys
+ * @NL80211_KEY_IDX: key ID (u8, 0-3)
+ * @NL80211_KEY_CIPHER: key cipher suite (u32, as defined by IEEE 802.11
+ * section 7.3.2.25.1, e.g. 0x000FAC04)
+ * @NL80211_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and
+ * CCMP keys, each six bytes in little endian
+ * @NL80211_KEY_DEFAULT: flag indicating default key
+ * @NL80211_KEY_DEFAULT_MGMT: flag indicating default management key
+ * @NL80211_KEY_TYPE: the key type from enum nl80211_key_type, if not
+ * specified the default depends on whether a MAC address was
+ * given with the command using the key or not (u32)
+ * @NL80211_KEY_DEFAULT_TYPES: A nested attribute containing flags
+ * attributes, specifying what a key should be set as default as.
+ * See &enum nl80211_key_default_types.
+ * @NL80211_KEY_MODE: the mode from enum nl80211_key_mode.
+ * Defaults to @NL80211_KEY_RX_TX.
+ *
+ * @__NL80211_KEY_AFTER_LAST: internal
+ * @NL80211_KEY_MAX: highest key attribute
+ */
+enum nl80211_key_attributes {
+ __NL80211_KEY_INVALID,
+ NL80211_KEY_DATA,
+ NL80211_KEY_IDX,
+ NL80211_KEY_CIPHER,
+ NL80211_KEY_SEQ,
+ NL80211_KEY_DEFAULT,
+ NL80211_KEY_DEFAULT_MGMT,
+ NL80211_KEY_TYPE,
+ NL80211_KEY_DEFAULT_TYPES,
+ NL80211_KEY_MODE,
+
+ /* keep last */
+ __NL80211_KEY_AFTER_LAST,
+ NL80211_KEY_MAX = __NL80211_KEY_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_tx_rate_attributes - TX rate set attributes
+ * @__NL80211_TXRATE_INVALID: invalid
+ * @NL80211_TXRATE_LEGACY: Legacy (non-MCS) rates allowed for TX rate selection
+ * in an array of rates as defined in IEEE 802.11 7.3.2.2 (u8 values with
+ * 1 = 500 kbps) but without the IE length restriction (at most
+ * %NL80211_MAX_SUPP_RATES in a single array).
+ * @NL80211_TXRATE_HT: HT (MCS) rates allowed for TX rate selection
+ * in an array of MCS numbers.
+ * @NL80211_TXRATE_VHT: VHT rates allowed for TX rate selection,
+ * see &struct nl80211_txrate_vht
+ * @NL80211_TXRATE_GI: configure GI, see &enum nl80211_txrate_gi
+ * @__NL80211_TXRATE_AFTER_LAST: internal
+ * @NL80211_TXRATE_MAX: highest TX rate attribute
+ */
+enum nl80211_tx_rate_attributes {
+ __NL80211_TXRATE_INVALID,
+ NL80211_TXRATE_LEGACY,
+ NL80211_TXRATE_HT,
+ NL80211_TXRATE_VHT,
+ NL80211_TXRATE_GI,
+
+ /* keep last */
+ __NL80211_TXRATE_AFTER_LAST,
+ NL80211_TXRATE_MAX = __NL80211_TXRATE_AFTER_LAST - 1
+};
+
+#define NL80211_TXRATE_MCS NL80211_TXRATE_HT
+#define NL80211_VHT_NSS_MAX 8
+
+/**
+ * struct nl80211_txrate_vht - VHT MCS/NSS txrate bitmap
+ * @mcs: MCS bitmap table for each NSS (array index 0 for 1 stream, etc.)
+ */
+struct nl80211_txrate_vht {
+ __u16 mcs[NL80211_VHT_NSS_MAX];
+};
+
+enum nl80211_txrate_gi {
+ NL80211_TXRATE_DEFAULT_GI,
+ NL80211_TXRATE_FORCE_SGI,
+ NL80211_TXRATE_FORCE_LGI,
+};
+
+/**
+ * enum nl80211_band - Frequency band
+ * @NL80211_BAND_2GHZ: 2.4 GHz ISM band
+ * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz)
+ * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 69.12 GHz)
+ * @NL80211_BAND_6GHZ: around 6 GHz band (5.9 - 7.2 GHz)
+ * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace
+ * since newer kernel versions may support more bands
+ */
+enum nl80211_band {
+ NL80211_BAND_2GHZ,
+ NL80211_BAND_5GHZ,
+ NL80211_BAND_60GHZ,
+ NL80211_BAND_6GHZ,
+
+ NUM_NL80211_BANDS,
+};
+
+/**
+ * enum nl80211_ps_state - powersave state
+ * @NL80211_PS_DISABLED: powersave is disabled
+ * @NL80211_PS_ENABLED: powersave is enabled
+ */
+enum nl80211_ps_state {
+ NL80211_PS_DISABLED,
+ NL80211_PS_ENABLED,
+};
+
+/**
+ * enum nl80211_attr_cqm - connection quality monitor attributes
+ * @__NL80211_ATTR_CQM_INVALID: invalid
+ * @NL80211_ATTR_CQM_RSSI_THOLD: RSSI threshold in dBm. This value specifies
+ * the threshold for the RSSI level at which an event will be sent. Zero
+ * to disable. Alternatively, if %NL80211_EXT_FEATURE_CQM_RSSI_LIST is
+ * set, multiple values can be supplied as a low-to-high sorted array of
+ * threshold values in dBm. Events will be sent when the RSSI value
+ * crosses any of the thresholds.
+ * @NL80211_ATTR_CQM_RSSI_HYST: RSSI hysteresis in dBm. This value specifies
+ * the minimum amount the RSSI level must change after an event before a
+ * new event may be issued (to reduce effects of RSSI oscillation).
+ * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event
+ * @NL80211_ATTR_CQM_PKT_LOSS_EVENT: a u32 value indicating that this many
+ * consecutive packets were not acknowledged by the peer
+ * @NL80211_ATTR_CQM_TXE_RATE: TX error rate in %. Minimum % of TX failures
+ * during the given %NL80211_ATTR_CQM_TXE_INTVL before an
+ * %NL80211_CMD_NOTIFY_CQM with reported %NL80211_ATTR_CQM_TXE_RATE and
+ * %NL80211_ATTR_CQM_TXE_PKTS is generated.
+ * @NL80211_ATTR_CQM_TXE_PKTS: number of attempted packets in a given
+ * %NL80211_ATTR_CQM_TXE_INTVL before %NL80211_ATTR_CQM_TXE_RATE is
+ * checked.
+ * @NL80211_ATTR_CQM_TXE_INTVL: interval in seconds. Specifies the periodic
+ * interval in which %NL80211_ATTR_CQM_TXE_PKTS and
+ * %NL80211_ATTR_CQM_TXE_RATE must be satisfied before generating an
+ * %NL80211_CMD_NOTIFY_CQM. Set to 0 to turn off TX error reporting.
+ * @NL80211_ATTR_CQM_BEACON_LOSS_EVENT: flag attribute that's set in a beacon
+ * loss event
+ * @NL80211_ATTR_CQM_RSSI_LEVEL: the RSSI value in dBm that triggered the
+ * RSSI threshold event.
+ * @__NL80211_ATTR_CQM_AFTER_LAST: internal
+ * @NL80211_ATTR_CQM_MAX: highest key attribute
+ */
+enum nl80211_attr_cqm {
+ __NL80211_ATTR_CQM_INVALID,
+ NL80211_ATTR_CQM_RSSI_THOLD,
+ NL80211_ATTR_CQM_RSSI_HYST,
+ NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
+ NL80211_ATTR_CQM_PKT_LOSS_EVENT,
+ NL80211_ATTR_CQM_TXE_RATE,
+ NL80211_ATTR_CQM_TXE_PKTS,
+ NL80211_ATTR_CQM_TXE_INTVL,
+ NL80211_ATTR_CQM_BEACON_LOSS_EVENT,
+ NL80211_ATTR_CQM_RSSI_LEVEL,
+
+ /* keep last */
+ __NL80211_ATTR_CQM_AFTER_LAST,
+ NL80211_ATTR_CQM_MAX = __NL80211_ATTR_CQM_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_cqm_rssi_threshold_event - RSSI threshold event
+ * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW: The RSSI level is lower than the
+ * configured threshold
+ * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH: The RSSI is higher than the
+ * configured threshold
+ * @NL80211_CQM_RSSI_BEACON_LOSS_EVENT: (reserved, never sent)
+ */
+enum nl80211_cqm_rssi_threshold_event {
+ NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW,
+ NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
+ NL80211_CQM_RSSI_BEACON_LOSS_EVENT,
+};
+
+
+/**
+ * enum nl80211_tx_power_setting - TX power adjustment
+ * @NL80211_TX_POWER_AUTOMATIC: automatically determine transmit power
+ * @NL80211_TX_POWER_LIMITED: limit TX power by the mBm parameter
+ * @NL80211_TX_POWER_FIXED: fix TX power to the mBm parameter
+ */
+enum nl80211_tx_power_setting {
+ NL80211_TX_POWER_AUTOMATIC,
+ NL80211_TX_POWER_LIMITED,
+ NL80211_TX_POWER_FIXED,
+};
+
+/**
+ * enum nl80211_packet_pattern_attr - packet pattern attribute
+ * @__NL80211_PKTPAT_INVALID: invalid number for nested attribute
+ * @NL80211_PKTPAT_PATTERN: the pattern, values where the mask has
+ * a zero bit are ignored
+ * @NL80211_PKTPAT_MASK: pattern mask, must be long enough to have
+ * a bit for each byte in the pattern. The lowest-order bit corresponds
+ * to the first byte of the pattern, but the bytes of the pattern are
+ * in a little-endian-like format, i.e. the 9th byte of the pattern
+ * corresponds to the lowest-order bit in the second byte of the mask.
+ * For example: The match 00:xx:00:00:xx:00:00:00:00:xx:xx:xx (where
+ * xx indicates "don't care") would be represented by a pattern of
+ * twelve zero bytes, and a mask of "0xed,0x01".
+ * Note that the pattern matching is done as though frames were not
+ * 802.11 frames but 802.3 frames, i.e. the frame is fully unpacked
+ * first (including SNAP header unpacking) and then matched.
+ * @NL80211_PKTPAT_OFFSET: packet offset, pattern is matched after
+ * these fixed number of bytes of received packet
+ * @NUM_NL80211_PKTPAT: number of attributes
+ * @MAX_NL80211_PKTPAT: max attribute number
+ */
+enum nl80211_packet_pattern_attr {
+ __NL80211_PKTPAT_INVALID,
+ NL80211_PKTPAT_MASK,
+ NL80211_PKTPAT_PATTERN,
+ NL80211_PKTPAT_OFFSET,
+
+ NUM_NL80211_PKTPAT,
+ MAX_NL80211_PKTPAT = NUM_NL80211_PKTPAT - 1,
+};
+
+/**
+ * struct nl80211_pattern_support - packet pattern support information
+ * @max_patterns: maximum number of patterns supported
+ * @min_pattern_len: minimum length of each pattern
+ * @max_pattern_len: maximum length of each pattern
+ * @max_pkt_offset: maximum Rx packet offset
+ *
+ * This struct is carried in %NL80211_WOWLAN_TRIG_PKT_PATTERN when
+ * that is part of %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED or in
+ * %NL80211_ATTR_COALESCE_RULE_PKT_PATTERN when that is part of
+ * %NL80211_ATTR_COALESCE_RULE in the capability information given
+ * by the kernel to userspace.
+ */
+struct nl80211_pattern_support {
+ __u32 max_patterns;
+ __u32 min_pattern_len;
+ __u32 max_pattern_len;
+ __u32 max_pkt_offset;
+} __attribute__((packed));
+
+/* only for backward compatibility */
+#define __NL80211_WOWLAN_PKTPAT_INVALID __NL80211_PKTPAT_INVALID
+#define NL80211_WOWLAN_PKTPAT_MASK NL80211_PKTPAT_MASK
+#define NL80211_WOWLAN_PKTPAT_PATTERN NL80211_PKTPAT_PATTERN
+#define NL80211_WOWLAN_PKTPAT_OFFSET NL80211_PKTPAT_OFFSET
+#define NUM_NL80211_WOWLAN_PKTPAT NUM_NL80211_PKTPAT
+#define MAX_NL80211_WOWLAN_PKTPAT MAX_NL80211_PKTPAT
+#define nl80211_wowlan_pattern_support nl80211_pattern_support
+
+/**
+ * enum nl80211_wowlan_triggers - WoWLAN trigger definitions
+ * @__NL80211_WOWLAN_TRIG_INVALID: invalid number for nested attributes
+ * @NL80211_WOWLAN_TRIG_ANY: wake up on any activity, do not really put
+ * the chip into a special state -- works best with chips that have
+ * support for low-power operation already (flag)
+ * Note that this mode is incompatible with all of the others, if
+ * any others are even supported by the device.
+ * @NL80211_WOWLAN_TRIG_DISCONNECT: wake up on disconnect, the way disconnect
+ * is detected is implementation-specific (flag)
+ * @NL80211_WOWLAN_TRIG_MAGIC_PKT: wake up on magic packet (6x 0xff, followed
+ * by 16 repetitions of MAC addr, anywhere in payload) (flag)
+ * @NL80211_WOWLAN_TRIG_PKT_PATTERN: wake up on the specified packet patterns
+ * which are passed in an array of nested attributes, each nested attribute
+ * defining a with attributes from &struct nl80211_wowlan_trig_pkt_pattern.
+ * Each pattern defines a wakeup packet. Packet offset is associated with
+ * each pattern which is used while matching the pattern. The matching is
+ * done on the MSDU, i.e. as though the packet was an 802.3 packet, so the
+ * pattern matching is done after the packet is converted to the MSDU.
+ *
+ * In %NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED, it is a binary attribute
+ * carrying a &struct nl80211_pattern_support.
+ *
+ * When reporting wakeup. it is a u32 attribute containing the 0-based
+ * index of the pattern that caused the wakeup, in the patterns passed
+ * to the kernel when configuring.
+ * @NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED: Not a real trigger, and cannot be
+ * used when setting, used only to indicate that GTK rekeying is supported
+ * by the device (flag)
+ * @NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE: wake up on GTK rekey failure (if
+ * done by the device) (flag)
+ * @NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST: wake up on EAP Identity Request
+ * packet (flag)
+ * @NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE: wake up on 4-way handshake (flag)
+ * @NL80211_WOWLAN_TRIG_RFKILL_RELEASE: wake up when rfkill is released
+ * (on devices that have rfkill in the device) (flag)
+ * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211: For wakeup reporting only, contains
+ * the 802.11 packet that caused the wakeup, e.g. a deauth frame. The frame
+ * may be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN
+ * attribute contains the original length.
+ * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN: Original length of the 802.11
+ * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211
+ * attribute if the packet was truncated somewhere.
+ * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023: For wakeup reporting only, contains the
+ * 802.11 packet that caused the wakeup, e.g. a magic packet. The frame may
+ * be truncated, the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN attribute
+ * contains the original length.
+ * @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN: Original length of the 802.3
+ * packet, may be bigger than the @NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023
+ * attribute if the packet was truncated somewhere.
+ * @NL80211_WOWLAN_TRIG_TCP_CONNECTION: TCP connection wake, see DOC section
+ * "TCP connection wakeup" for more details. This is a nested attribute
+ * containing the exact information for establishing and keeping alive
+ * the TCP connection.
+ * @NL80211_WOWLAN_TRIG_TCP_WAKEUP_MATCH: For wakeup reporting only, the
+ * wakeup packet was received on the TCP connection
+ * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST: For wakeup reporting only, the
+ * TCP connection was lost or failed to be established
+ * @NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS: For wakeup reporting only,
+ * the TCP connection ran out of tokens to use for data to send to the
+ * service
+ * @NL80211_WOWLAN_TRIG_NET_DETECT: wake up when a configured network
+ * is detected. This is a nested attribute that contains the
+ * same attributes used with @NL80211_CMD_START_SCHED_SCAN. It
+ * specifies how the scan is performed (e.g. the interval, the
+ * channels to scan and the initial delay) as well as the scan
+ * results that will trigger a wake (i.e. the matchsets). This
+ * attribute is also sent in a response to
+ * @NL80211_CMD_GET_WIPHY, indicating the number of match sets
+ * supported by the driver (u32).
+ * @NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS: nested attribute
+ * containing an array with information about what triggered the
+ * wake up. If no elements are present in the array, it means
+ * that the information is not available. If more than one
+ * element is present, it means that more than one match
+ * occurred.
+ * Each element in the array is a nested attribute that contains
+ * one optional %NL80211_ATTR_SSID attribute and one optional
+ * %NL80211_ATTR_SCAN_FREQUENCIES attribute. At least one of
+ * these attributes must be present. If
+ * %NL80211_ATTR_SCAN_FREQUENCIES contains more than one
+ * frequency, it means that the match occurred in more than one
+ * channel.
+ * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers
+ * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number
+ *
+ * These nested attributes are used to configure the wakeup triggers and
+ * to report the wakeup reason(s).
+ */
+enum nl80211_wowlan_triggers {
+ __NL80211_WOWLAN_TRIG_INVALID,
+ NL80211_WOWLAN_TRIG_ANY,
+ NL80211_WOWLAN_TRIG_DISCONNECT,
+ NL80211_WOWLAN_TRIG_MAGIC_PKT,
+ NL80211_WOWLAN_TRIG_PKT_PATTERN,
+ NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED,
+ NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE,
+ NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST,
+ NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE,
+ NL80211_WOWLAN_TRIG_RFKILL_RELEASE,
+ NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211,
+ NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN,
+ NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023,
+ NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN,
+ NL80211_WOWLAN_TRIG_TCP_CONNECTION,
+ NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH,
+ NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST,
+ NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS,
+ NL80211_WOWLAN_TRIG_NET_DETECT,
+ NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS,
+
+ /* keep last */
+ NUM_NL80211_WOWLAN_TRIG,
+ MAX_NL80211_WOWLAN_TRIG = NUM_NL80211_WOWLAN_TRIG - 1
+};
+
+/**
+ * DOC: TCP connection wakeup
+ *
+ * Some devices can establish a TCP connection in order to be woken up by a
+ * packet coming in from outside their network segment, or behind NAT. If
+ * configured, the device will establish a TCP connection to the given
+ * service, and periodically send data to that service. The first data
+ * packet is usually transmitted after SYN/ACK, also ACKing the SYN/ACK.
+ * The data packets can optionally include a (little endian) sequence
+ * number (in the TCP payload!) that is generated by the device, and, also
+ * optionally, a token from a list of tokens. This serves as a keep-alive
+ * with the service, and for NATed connections, etc.
+ *
+ * During this keep-alive period, the server doesn't send any data to the
+ * client. When receiving data, it is compared against the wakeup pattern
+ * (and mask) and if it matches, the host is woken up. Similarly, if the
+ * connection breaks or cannot be established to start with, the host is
+ * also woken up.
+ *
+ * Developer's note: ARP offload is required for this, otherwise TCP
+ * response packets might not go through correctly.
+ */
+
+/**
+ * struct nl80211_wowlan_tcp_data_seq - WoWLAN TCP data sequence
+ * @start: starting value
+ * @offset: offset of sequence number in packet
+ * @len: length of the sequence value to write, 1 through 4
+ *
+ * Note: don't confuse with the TCP sequence number(s), this is for the
+ * keepalive packet payload. The actual value is written into the packet
+ * in little endian.
+ */
+struct nl80211_wowlan_tcp_data_seq {
+ __u32 start, offset, len;
+};
+
+/**
+ * struct nl80211_wowlan_tcp_data_token - WoWLAN TCP data token config
+ * @offset: offset of token in packet
+ * @len: length of each token
+ * @token_stream: stream of data to be used for the tokens, the length must
+ * be a multiple of @len for this to make sense
+ */
+struct nl80211_wowlan_tcp_data_token {
+ __u32 offset, len;
+ __u8 token_stream[];
+};
+
+/**
+ * struct nl80211_wowlan_tcp_data_token_feature - data token features
+ * @min_len: minimum token length
+ * @max_len: maximum token length
+ * @bufsize: total available token buffer size (max size of @token_stream)
+ */
+struct nl80211_wowlan_tcp_data_token_feature {
+ __u32 min_len, max_len, bufsize;
+};
+
+/**
+ * enum nl80211_wowlan_tcp_attrs - WoWLAN TCP connection parameters
+ * @__NL80211_WOWLAN_TCP_INVALID: invalid number for nested attributes
+ * @NL80211_WOWLAN_TCP_SRC_IPV4: source IPv4 address (in network byte order)
+ * @NL80211_WOWLAN_TCP_DST_IPV4: destination IPv4 address
+ * (in network byte order)
+ * @NL80211_WOWLAN_TCP_DST_MAC: destination MAC address, this is given because
+ * route lookup when configured might be invalid by the time we suspend,
+ * and doing a route lookup when suspending is no longer possible as it
+ * might require ARP querying.
+ * @NL80211_WOWLAN_TCP_SRC_PORT: source port (u16); optional, if not given a
+ * socket and port will be allocated
+ * @NL80211_WOWLAN_TCP_DST_PORT: destination port (u16)
+ * @NL80211_WOWLAN_TCP_DATA_PAYLOAD: data packet payload, at least one byte.
+ * For feature advertising, a u32 attribute holding the maximum length
+ * of the data payload.
+ * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ: data packet sequence configuration
+ * (if desired), a &struct nl80211_wowlan_tcp_data_seq. For feature
+ * advertising it is just a flag
+ * @NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN: data packet token configuration,
+ * see &struct nl80211_wowlan_tcp_data_token and for advertising see
+ * &struct nl80211_wowlan_tcp_data_token_feature.
+ * @NL80211_WOWLAN_TCP_DATA_INTERVAL: data interval in seconds, maximum
+ * interval in feature advertising (u32)
+ * @NL80211_WOWLAN_TCP_WAKE_PAYLOAD: wake packet payload, for advertising a
+ * u32 attribute holding the maximum length
+ * @NL80211_WOWLAN_TCP_WAKE_MASK: Wake packet payload mask, not used for
+ * feature advertising. The mask works like @NL80211_PKTPAT_MASK
+ * but on the TCP payload only.
+ * @NUM_NL80211_WOWLAN_TCP: number of TCP attributes
+ * @MAX_NL80211_WOWLAN_TCP: highest attribute number
+ */
+enum nl80211_wowlan_tcp_attrs {
+ __NL80211_WOWLAN_TCP_INVALID,
+ NL80211_WOWLAN_TCP_SRC_IPV4,
+ NL80211_WOWLAN_TCP_DST_IPV4,
+ NL80211_WOWLAN_TCP_DST_MAC,
+ NL80211_WOWLAN_TCP_SRC_PORT,
+ NL80211_WOWLAN_TCP_DST_PORT,
+ NL80211_WOWLAN_TCP_DATA_PAYLOAD,
+ NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ,
+ NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN,
+ NL80211_WOWLAN_TCP_DATA_INTERVAL,
+ NL80211_WOWLAN_TCP_WAKE_PAYLOAD,
+ NL80211_WOWLAN_TCP_WAKE_MASK,
+
+ /* keep last */
+ NUM_NL80211_WOWLAN_TCP,
+ MAX_NL80211_WOWLAN_TCP = NUM_NL80211_WOWLAN_TCP - 1
+};
+
+/**
+ * struct nl80211_coalesce_rule_support - coalesce rule support information
+ * @max_rules: maximum number of rules supported
+ * @pat: packet pattern support information
+ * @max_delay: maximum supported coalescing delay in msecs
+ *
+ * This struct is carried in %NL80211_ATTR_COALESCE_RULE in the
+ * capability information given by the kernel to userspace.
+ */
+struct nl80211_coalesce_rule_support {
+ __u32 max_rules;
+ struct nl80211_pattern_support pat;
+ __u32 max_delay;
+} __attribute__((packed));
+
+/**
+ * enum nl80211_attr_coalesce_rule - coalesce rule attribute
+ * @__NL80211_COALESCE_RULE_INVALID: invalid number for nested attribute
+ * @NL80211_ATTR_COALESCE_RULE_DELAY: delay in msecs used for packet coalescing
+ * @NL80211_ATTR_COALESCE_RULE_CONDITION: condition for packet coalescence,
+ * see &enum nl80211_coalesce_condition.
+ * @NL80211_ATTR_COALESCE_RULE_PKT_PATTERN: packet offset, pattern is matched
+ * after these fixed number of bytes of received packet
+ * @NUM_NL80211_ATTR_COALESCE_RULE: number of attributes
+ * @NL80211_ATTR_COALESCE_RULE_MAX: max attribute number
+ */
+enum nl80211_attr_coalesce_rule {
+ __NL80211_COALESCE_RULE_INVALID,
+ NL80211_ATTR_COALESCE_RULE_DELAY,
+ NL80211_ATTR_COALESCE_RULE_CONDITION,
+ NL80211_ATTR_COALESCE_RULE_PKT_PATTERN,
+
+ /* keep last */
+ NUM_NL80211_ATTR_COALESCE_RULE,
+ NL80211_ATTR_COALESCE_RULE_MAX = NUM_NL80211_ATTR_COALESCE_RULE - 1
+};
+
+/**
+ * enum nl80211_coalesce_condition - coalesce rule conditions
+ * @NL80211_COALESCE_CONDITION_MATCH: coalaesce Rx packets when patterns
+ * in a rule are matched.
+ * @NL80211_COALESCE_CONDITION_NO_MATCH: coalesce Rx packets when patterns
+ * in a rule are not matched.
+ */
+enum nl80211_coalesce_condition {
+ NL80211_COALESCE_CONDITION_MATCH,
+ NL80211_COALESCE_CONDITION_NO_MATCH
+};
+
+/**
+ * enum nl80211_iface_limit_attrs - limit attributes
+ * @NL80211_IFACE_LIMIT_UNSPEC: (reserved)
+ * @NL80211_IFACE_LIMIT_MAX: maximum number of interfaces that
+ * can be chosen from this set of interface types (u32)
+ * @NL80211_IFACE_LIMIT_TYPES: nested attribute containing a
+ * flag attribute for each interface type in this set
+ * @NUM_NL80211_IFACE_LIMIT: number of attributes
+ * @MAX_NL80211_IFACE_LIMIT: highest attribute number
+ */
+enum nl80211_iface_limit_attrs {
+ NL80211_IFACE_LIMIT_UNSPEC,
+ NL80211_IFACE_LIMIT_MAX,
+ NL80211_IFACE_LIMIT_TYPES,
+
+ /* keep last */
+ NUM_NL80211_IFACE_LIMIT,
+ MAX_NL80211_IFACE_LIMIT = NUM_NL80211_IFACE_LIMIT - 1
+};
+
+/**
+ * enum nl80211_if_combination_attrs -- interface combination attributes
+ *
+ * @NL80211_IFACE_COMB_UNSPEC: (reserved)
+ * @NL80211_IFACE_COMB_LIMITS: Nested attributes containing the limits
+ * for given interface types, see &enum nl80211_iface_limit_attrs.
+ * @NL80211_IFACE_COMB_MAXNUM: u32 attribute giving the total number of
+ * interfaces that can be created in this group. This number doesn't
+ * apply to interfaces purely managed in software, which are listed
+ * in a separate attribute %NL80211_ATTR_INTERFACES_SOFTWARE.
+ * @NL80211_IFACE_COMB_STA_AP_BI_MATCH: flag attribute specifying that
+ * beacon intervals within this group must be all the same even for
+ * infrastructure and AP/GO combinations, i.e. the GO(s) must adopt
+ * the infrastructure network's beacon interval.
+ * @NL80211_IFACE_COMB_NUM_CHANNELS: u32 attribute specifying how many
+ * different channels may be used within this group.
+ * @NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS: u32 attribute containing the bitmap
+ * of supported channel widths for radar detection.
+ * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap
+ * of supported regulatory regions for radar detection.
+ * @NL80211_IFACE_COMB_BI_MIN_GCD: u32 attribute specifying the minimum GCD of
+ * different beacon intervals supported by all the interface combinations
+ * in this group (if not present, all beacon intervals be identical).
+ * @NUM_NL80211_IFACE_COMB: number of attributes
+ * @MAX_NL80211_IFACE_COMB: highest attribute number
+ *
+ * Examples:
+ * limits = [ #{STA} <= 1, #{AP} <= 1 ], matching BI, channels = 1, max = 2
+ * => allows an AP and a STA that must match BIs
+ *
+ * numbers = [ #{AP, P2P-GO} <= 8 ], BI min gcd, channels = 1, max = 8,
+ * => allows 8 of AP/GO that can have BI gcd >= min gcd
+ *
+ * numbers = [ #{STA} <= 2 ], channels = 2, max = 2
+ * => allows two STAs on different channels
+ *
+ * numbers = [ #{STA} <= 1, #{P2P-client,P2P-GO} <= 3 ], max = 4
+ * => allows a STA plus three P2P interfaces
+ *
+ * The list of these four possibilities could completely be contained
+ * within the %NL80211_ATTR_INTERFACE_COMBINATIONS attribute to indicate
+ * that any of these groups must match.
+ *
+ * "Combinations" of just a single interface will not be listed here,
+ * a single interface of any valid interface type is assumed to always
+ * be possible by itself. This means that implicitly, for each valid
+ * interface type, the following group always exists:
+ * numbers = [ #{<type>} <= 1 ], channels = 1, max = 1
+ */
+enum nl80211_if_combination_attrs {
+ NL80211_IFACE_COMB_UNSPEC,
+ NL80211_IFACE_COMB_LIMITS,
+ NL80211_IFACE_COMB_MAXNUM,
+ NL80211_IFACE_COMB_STA_AP_BI_MATCH,
+ NL80211_IFACE_COMB_NUM_CHANNELS,
+ NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
+ NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
+ NL80211_IFACE_COMB_BI_MIN_GCD,
+
+ /* keep last */
+ NUM_NL80211_IFACE_COMB,
+ MAX_NL80211_IFACE_COMB = NUM_NL80211_IFACE_COMB - 1
+};
+
+
+/**
+ * enum nl80211_plink_state - state of a mesh peer link finite state machine
+ *
+ * @NL80211_PLINK_LISTEN: initial state, considered the implicit
+ * state of non existent mesh peer links
+ * @NL80211_PLINK_OPN_SNT: mesh plink open frame has been sent to
+ * this mesh peer
+ * @NL80211_PLINK_OPN_RCVD: mesh plink open frame has been received
+ * from this mesh peer
+ * @NL80211_PLINK_CNF_RCVD: mesh plink confirm frame has been
+ * received from this mesh peer
+ * @NL80211_PLINK_ESTAB: mesh peer link is established
+ * @NL80211_PLINK_HOLDING: mesh peer link is being closed or cancelled
+ * @NL80211_PLINK_BLOCKED: all frames transmitted from this mesh
+ * plink are discarded
+ * @NUM_NL80211_PLINK_STATES: number of peer link states
+ * @MAX_NL80211_PLINK_STATES: highest numerical value of plink states
+ */
+enum nl80211_plink_state {
+ NL80211_PLINK_LISTEN,
+ NL80211_PLINK_OPN_SNT,
+ NL80211_PLINK_OPN_RCVD,
+ NL80211_PLINK_CNF_RCVD,
+ NL80211_PLINK_ESTAB,
+ NL80211_PLINK_HOLDING,
+ NL80211_PLINK_BLOCKED,
+
+ /* keep last */
+ NUM_NL80211_PLINK_STATES,
+ MAX_NL80211_PLINK_STATES = NUM_NL80211_PLINK_STATES - 1
+};
+
+/**
+ * enum nl80211_plink_action - actions to perform in mesh peers
+ *
+ * @NL80211_PLINK_ACTION_NO_ACTION: perform no action
+ * @NL80211_PLINK_ACTION_OPEN: start mesh peer link establishment
+ * @NL80211_PLINK_ACTION_BLOCK: block traffic from this mesh peer
+ * @NUM_NL80211_PLINK_ACTIONS: number of possible actions
+ */
+enum plink_actions {
+ NL80211_PLINK_ACTION_NO_ACTION,
+ NL80211_PLINK_ACTION_OPEN,
+ NL80211_PLINK_ACTION_BLOCK,
+
+ NUM_NL80211_PLINK_ACTIONS,
+};
+
+
+#define NL80211_KCK_LEN 16
+#define NL80211_KEK_LEN 16
+#define NL80211_REPLAY_CTR_LEN 8
+
+/**
+ * enum nl80211_rekey_data - attributes for GTK rekey offload
+ * @__NL80211_REKEY_DATA_INVALID: invalid number for nested attributes
+ * @NL80211_REKEY_DATA_KEK: key encryption key (binary)
+ * @NL80211_REKEY_DATA_KCK: key confirmation key (binary)
+ * @NL80211_REKEY_DATA_REPLAY_CTR: replay counter (binary)
+ * @NUM_NL80211_REKEY_DATA: number of rekey attributes (internal)
+ * @MAX_NL80211_REKEY_DATA: highest rekey attribute (internal)
+ */
+enum nl80211_rekey_data {
+ __NL80211_REKEY_DATA_INVALID,
+ NL80211_REKEY_DATA_KEK,
+ NL80211_REKEY_DATA_KCK,
+ NL80211_REKEY_DATA_REPLAY_CTR,
+
+ /* keep last */
+ NUM_NL80211_REKEY_DATA,
+ MAX_NL80211_REKEY_DATA = NUM_NL80211_REKEY_DATA - 1
+};
+
+/**
+ * enum nl80211_hidden_ssid - values for %NL80211_ATTR_HIDDEN_SSID
+ * @NL80211_HIDDEN_SSID_NOT_IN_USE: do not hide SSID (i.e., broadcast it in
+ * Beacon frames)
+ * @NL80211_HIDDEN_SSID_ZERO_LEN: hide SSID by using zero-length SSID element
+ * in Beacon frames
+ * @NL80211_HIDDEN_SSID_ZERO_CONTENTS: hide SSID by using correct length of SSID
+ * element in Beacon frames but zero out each byte in the SSID
+ */
+enum nl80211_hidden_ssid {
+ NL80211_HIDDEN_SSID_NOT_IN_USE,
+ NL80211_HIDDEN_SSID_ZERO_LEN,
+ NL80211_HIDDEN_SSID_ZERO_CONTENTS
+};
+
+/**
+ * enum nl80211_sta_wme_attr - station WME attributes
+ * @__NL80211_STA_WME_INVALID: invalid number for nested attribute
+ * @NL80211_STA_WME_UAPSD_QUEUES: bitmap of uapsd queues. the format
+ * is the same as the AC bitmap in the QoS info field.
+ * @NL80211_STA_WME_MAX_SP: max service period. the format is the same
+ * as the MAX_SP field in the QoS info field (but already shifted down).
+ * @__NL80211_STA_WME_AFTER_LAST: internal
+ * @NL80211_STA_WME_MAX: highest station WME attribute
+ */
+enum nl80211_sta_wme_attr {
+ __NL80211_STA_WME_INVALID,
+ NL80211_STA_WME_UAPSD_QUEUES,
+ NL80211_STA_WME_MAX_SP,
+
+ /* keep last */
+ __NL80211_STA_WME_AFTER_LAST,
+ NL80211_STA_WME_MAX = __NL80211_STA_WME_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_pmksa_candidate_attr - attributes for PMKSA caching candidates
+ * @__NL80211_PMKSA_CANDIDATE_INVALID: invalid number for nested attributes
+ * @NL80211_PMKSA_CANDIDATE_INDEX: candidate index (u32; the smaller, the higher
+ * priority)
+ * @NL80211_PMKSA_CANDIDATE_BSSID: candidate BSSID (6 octets)
+ * @NL80211_PMKSA_CANDIDATE_PREAUTH: RSN pre-authentication supported (flag)
+ * @NUM_NL80211_PMKSA_CANDIDATE: number of PMKSA caching candidate attributes
+ * (internal)
+ * @MAX_NL80211_PMKSA_CANDIDATE: highest PMKSA caching candidate attribute
+ * (internal)
+ */
+enum nl80211_pmksa_candidate_attr {
+ __NL80211_PMKSA_CANDIDATE_INVALID,
+ NL80211_PMKSA_CANDIDATE_INDEX,
+ NL80211_PMKSA_CANDIDATE_BSSID,
+ NL80211_PMKSA_CANDIDATE_PREAUTH,
+
+ /* keep last */
+ NUM_NL80211_PMKSA_CANDIDATE,
+ MAX_NL80211_PMKSA_CANDIDATE = NUM_NL80211_PMKSA_CANDIDATE - 1
+};
+
+/**
+ * enum nl80211_tdls_operation - values for %NL80211_ATTR_TDLS_OPERATION
+ * @NL80211_TDLS_DISCOVERY_REQ: Send a TDLS discovery request
+ * @NL80211_TDLS_SETUP: Setup TDLS link
+ * @NL80211_TDLS_TEARDOWN: Teardown a TDLS link which is already established
+ * @NL80211_TDLS_ENABLE_LINK: Enable TDLS link
+ * @NL80211_TDLS_DISABLE_LINK: Disable TDLS link
+ */
+enum nl80211_tdls_operation {
+ NL80211_TDLS_DISCOVERY_REQ,
+ NL80211_TDLS_SETUP,
+ NL80211_TDLS_TEARDOWN,
+ NL80211_TDLS_ENABLE_LINK,
+ NL80211_TDLS_DISABLE_LINK,
+};
+
+/*
+ * enum nl80211_ap_sme_features - device-integrated AP features
+ * Reserved for future use, no bits are defined in
+ * NL80211_ATTR_DEVICE_AP_SME yet.
+enum nl80211_ap_sme_features {
+};
+ */
+
+/**
+ * enum nl80211_feature_flags - device/driver features
+ * @NL80211_FEATURE_SK_TX_STATUS: This driver supports reflecting back
+ * TX status to the socket error queue when requested with the
+ * socket option.
+ * @NL80211_FEATURE_HT_IBSS: This driver supports IBSS with HT datarates.
+ * @NL80211_FEATURE_INACTIVITY_TIMER: This driver takes care of freeing up
+ * the connected inactive stations in AP mode.
+ * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested
+ * to work properly to suppport receiving regulatory hints from
+ * cellular base stations.
+ * @NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL: (no longer available, only
+ * here to reserve the value for API/ABI compatibility)
+ * @NL80211_FEATURE_SAE: This driver supports simultaneous authentication of
+ * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station
+ * mode
+ * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan
+ * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported
+ * @NL80211_FEATURE_AP_SCAN: Support scanning using an AP vif
+ * @NL80211_FEATURE_VIF_TXPOWER: The driver supports per-vif TX power setting
+ * @NL80211_FEATURE_NEED_OBSS_SCAN: The driver expects userspace to perform
+ * OBSS scans and generate 20/40 BSS coex reports. This flag is used only
+ * for drivers implementing the CONNECT API, for AUTH/ASSOC it is implied.
+ * @NL80211_FEATURE_P2P_GO_CTWIN: P2P GO implementation supports CT Window
+ * setting
+ * @NL80211_FEATURE_P2P_GO_OPPPS: P2P GO implementation supports opportunistic
+ * powersave
+ * @NL80211_FEATURE_FULL_AP_CLIENT_STATE: The driver supports full state
+ * transitions for AP clients. Without this flag (and if the driver
+ * doesn't have the AP SME in the device) the driver supports adding
+ * stations only when they're associated and adds them in associated
+ * state (to later be transitioned into authorized), with this flag
+ * they should be added before even sending the authentication reply
+ * and then transitioned into authenticated, associated and authorized
+ * states using station flags.
+ * Note that even for drivers that support this, the default is to add
+ * stations in authenticated/associated state, so to add unauthenticated
+ * stations the authenticated/associated bits have to be set in the mask.
+ * @NL80211_FEATURE_ADVERTISE_CHAN_LIMITS: cfg80211 advertises channel limits
+ * (HT40, VHT 80/160 MHz) if this flag is set
+ * @NL80211_FEATURE_USERSPACE_MPM: This driver supports a userspace Mesh
+ * Peering Management entity which may be implemented by registering for
+ * beacons or NL80211_CMD_NEW_PEER_CANDIDATE events. The mesh beacon is
+ * still generated by the driver.
+ * @NL80211_FEATURE_ACTIVE_MONITOR: This driver supports an active monitor
+ * interface. An active monitor interface behaves like a normal monitor
+ * interface, but gets added to the driver. It ensures that incoming
+ * unicast packets directed at the configured interface address get ACKed.
+ * @NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE: This driver supports dynamic
+ * channel bandwidth change (e.g., HT 20 <-> 40 MHz channel) during the
+ * lifetime of a BSS.
+ * @NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES: This device adds a DS Parameter
+ * Set IE to probe requests.
+ * @NL80211_FEATURE_WFA_TPC_IE_IN_PROBES: This device adds a WFA TPC Report IE
+ * to probe requests.
+ * @NL80211_FEATURE_QUIET: This device, in client mode, supports Quiet Period
+ * requests sent to it by an AP.
+ * @NL80211_FEATURE_TX_POWER_INSERTION: This device is capable of inserting the
+ * current tx power value into the TPC Report IE in the spectrum
+ * management TPC Report action frame, and in the Radio Measurement Link
+ * Measurement Report action frame.
+ * @NL80211_FEATURE_ACKTO_ESTIMATION: This driver supports dynamic ACK timeout
+ * estimation (dynack). %NL80211_ATTR_WIPHY_DYN_ACK flag attribute is used
+ * to enable dynack.
+ * @NL80211_FEATURE_STATIC_SMPS: Device supports static spatial
+ * multiplexing powersave, ie. can turn off all but one chain
+ * even on HT connections that should be using more chains.
+ * @NL80211_FEATURE_DYNAMIC_SMPS: Device supports dynamic spatial
+ * multiplexing powersave, ie. can turn off all but one chain
+ * and then wake the rest up as required after, for example,
+ * rts/cts handshake.
+ * @NL80211_FEATURE_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM
+ * TSPEC sessions (TID aka TSID 0-7) with the %NL80211_CMD_ADD_TX_TS
+ * command. Standard IEEE 802.11 TSPEC setup is not yet supported, it
+ * needs to be able to handle Block-Ack agreements and other things.
+ * @NL80211_FEATURE_MAC_ON_CREATE: Device supports configuring
+ * the vif's MAC address upon creation.
+ * See 'macaddr' field in the vif_params (cfg80211.h).
+ * @NL80211_FEATURE_TDLS_CHANNEL_SWITCH: Driver supports channel switching when
+ * operating as a TDLS peer.
+ * @NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR: This device/driver supports using a
+ * random MAC address during scan (if the device is unassociated); the
+ * %NL80211_SCAN_FLAG_RANDOM_ADDR flag may be set for scans and the MAC
+ * address mask/value will be used.
+ * @NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR: This device/driver supports
+ * using a random MAC address for every scan iteration during scheduled
+ * scan (while not associated), the %NL80211_SCAN_FLAG_RANDOM_ADDR may
+ * be set for scheduled scan and the MAC address mask/value will be used.
+ * @NL80211_FEATURE_ND_RANDOM_MAC_ADDR: This device/driver supports using a
+ * random MAC address for every scan iteration during "net detect", i.e.
+ * scan in unassociated WoWLAN, the %NL80211_SCAN_FLAG_RANDOM_ADDR may
+ * be set for scheduled scan and the MAC address mask/value will be used.
+ */
+enum nl80211_feature_flags {
+ NL80211_FEATURE_SK_TX_STATUS = 1 << 0,
+ NL80211_FEATURE_HT_IBSS = 1 << 1,
+ NL80211_FEATURE_INACTIVITY_TIMER = 1 << 2,
+ NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3,
+ NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4,
+ NL80211_FEATURE_SAE = 1 << 5,
+ NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6,
+ NL80211_FEATURE_SCAN_FLUSH = 1 << 7,
+ NL80211_FEATURE_AP_SCAN = 1 << 8,
+ NL80211_FEATURE_VIF_TXPOWER = 1 << 9,
+ NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10,
+ NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11,
+ NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12,
+ /* bit 13 is reserved */
+ NL80211_FEATURE_ADVERTISE_CHAN_LIMITS = 1 << 14,
+ NL80211_FEATURE_FULL_AP_CLIENT_STATE = 1 << 15,
+ NL80211_FEATURE_USERSPACE_MPM = 1 << 16,
+ NL80211_FEATURE_ACTIVE_MONITOR = 1 << 17,
+ NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE = 1 << 18,
+ NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES = 1 << 19,
+ NL80211_FEATURE_WFA_TPC_IE_IN_PROBES = 1 << 20,
+ NL80211_FEATURE_QUIET = 1 << 21,
+ NL80211_FEATURE_TX_POWER_INSERTION = 1 << 22,
+ NL80211_FEATURE_ACKTO_ESTIMATION = 1 << 23,
+ NL80211_FEATURE_STATIC_SMPS = 1 << 24,
+ NL80211_FEATURE_DYNAMIC_SMPS = 1 << 25,
+ NL80211_FEATURE_SUPPORTS_WMM_ADMISSION = 1 << 26,
+ NL80211_FEATURE_MAC_ON_CREATE = 1 << 27,
+ NL80211_FEATURE_TDLS_CHANNEL_SWITCH = 1 << 28,
+ NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR = 1 << 29,
+ NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR = 1 << 30,
+ NL80211_FEATURE_ND_RANDOM_MAC_ADDR = 1U << 31,
+};
+
+/**
+ * enum nl80211_ext_feature_index - bit index of extended features.
+ * @NL80211_EXT_FEATURE_VHT_IBSS: This driver supports IBSS with VHT datarates.
+ * @NL80211_EXT_FEATURE_RRM: This driver supports RRM. When featured, user can
+ * can request to use RRM (see %NL80211_ATTR_USE_RRM) with
+ * %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests, which will set
+ * the ASSOC_REQ_USE_RRM flag in the association request even if
+ * NL80211_FEATURE_QUIET is not advertized.
+ * @NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER: This device supports MU-MIMO air
+ * sniffer which means that it can be configured to hear packets from
+ * certain groups which can be configured by the
+ * %NL80211_ATTR_MU_MIMO_GROUP_DATA attribute,
+ * or can be configured to follow a station by configuring the
+ * %NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR attribute.
+ * @NL80211_EXT_FEATURE_SCAN_START_TIME: This driver includes the actual
+ * time the scan started in scan results event. The time is the TSF of
+ * the BSS that the interface that requested the scan is connected to
+ * (if available).
+ * @NL80211_EXT_FEATURE_BSS_PARENT_TSF: Per BSS, this driver reports the
+ * time the last beacon/probe was received. The time is the TSF of the
+ * BSS that the interface that requested the scan is connected to
+ * (if available).
+ * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of
+ * channel dwell time.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_LEGACY: Driver supports beacon rate
+ * configuration (AP/mesh), supporting a legacy (non HT/VHT) rate.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_HT: Driver supports beacon rate
+ * configuration (AP/mesh) with HT rates.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate
+ * configuration (AP/mesh) with VHT rates.
+ * @NL80211_EXT_FEATURE_FILS_STA: This driver supports Fast Initial Link Setup
+ * with user space SME (NL80211_CMD_AUTHENTICATE) in station mode.
+ * @NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA: This driver supports randomized TA
+ * in @NL80211_CMD_FRAME while not associated.
+ * @NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED: This driver supports
+ * randomized TA in @NL80211_CMD_FRAME while associated.
+ * @NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI: The driver supports sched_scan
+ * for reporting BSSs with better RSSI than the current connected BSS
+ * (%NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI).
+ * @NL80211_EXT_FEATURE_CQM_RSSI_LIST: With this driver the
+ * %NL80211_ATTR_CQM_RSSI_THOLD attribute accepts a list of zero or more
+ * RSSI threshold values to monitor rather than exactly one threshold.
+ * @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD: Driver SME supports FILS shared key
+ * authentication with %NL80211_CMD_CONNECT.
+ * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK: Device wants to do 4-way
+ * handshake with PSK in station mode (PSK is passed as part of the connect
+ * and associate commands), doing it in the host might not be supported.
+ * @NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X: Device wants to do doing 4-way
+ * handshake with 802.1X in station mode (will pass EAP frames to the host
+ * and accept the set_pmk/del_pmk commands), doing it in the host might not
+ * be supported.
+ * @NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME: Driver is capable of overriding
+ * the max channel attribute in the FILS request params IE with the
+ * actual dwell time.
+ * @NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP: Driver accepts broadcast probe
+ * response
+ * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE: Driver supports sending
+ * the first probe request in each channel at rate of at least 5.5Mbps.
+ * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: Driver supports
+ * probe request tx deferral and suppression
+ * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL
+ * value in %NL80211_ATTR_USE_MFP.
+ * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan.
+ * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan.
+ * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan.
+ * @NL80211_EXT_FEATURE_DFS_OFFLOAD: HW/driver will offload DFS actions.
+ * Device or driver will do all DFS-related actions by itself,
+ * informing user-space about CAC progress, radar detection event,
+ * channel change triggered by radar detection event.
+ * No need to start CAC from user-space, no need to react to
+ * "radar detected" event.
+ * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211: Driver supports sending and
+ * receiving control port frames over nl80211 instead of the netdevice.
+ * @NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT: This driver/device supports
+ * (average) ACK signal strength reporting.
+ * @NL80211_EXT_FEATURE_TXQS: Driver supports FQ-CoDel-enabled intermediate
+ * TXQs.
+ * @NL80211_EXT_FEATURE_SCAN_RANDOM_SN: Driver/device supports randomizing the
+ * SN in probe request frames if requested by %NL80211_SCAN_FLAG_RANDOM_SN.
+ * @NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT: Driver/device can omit all data
+ * except for supported rates from the probe request content if requested
+ * by the %NL80211_SCAN_FLAG_MIN_PREQ_CONTENT flag.
+ * @NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER: Driver supports enabling fine
+ * timing measurement responder role.
+ *
+ * @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0: Driver/device confirm that they are
+ * able to rekey an in-use key correctly. Userspace must not rekey PTK keys
+ * if this flag is not set. Ignoring this can leak clear text packets and/or
+ * freeze the connection.
+ * @NL80211_EXT_FEATURE_EXT_KEY_ID: Driver supports "Extended Key ID for
+ * Individually Addressed Frames" from IEEE802.11-2016.
+ *
+ * @NL80211_EXT_FEATURE_AIRTIME_FAIRNESS: Driver supports getting airtime
+ * fairness for transmitted packets and has enabled airtime fairness
+ * scheduling.
+ *
+ * @NL80211_EXT_FEATURE_AP_PMKSA_CACHING: Driver/device supports PMKSA caching
+ * (set/del PMKSA operations) in AP mode.
+ *
+ * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Driver supports
+ * filtering of sched scan results using band specific RSSI thresholds.
+ *
+ * @NL80211_EXT_FEATURE_STA_TX_PWR: This driver supports controlling tx power
+ * to a station.
+ *
+ * @NL80211_EXT_FEATURE_SAE_OFFLOAD: Device wants to do SAE authentication in
+ * station mode (SAE password is passed as part of the connect command).
+ *
+ * @NUM_NL80211_EXT_FEATURES: number of extended features.
+ * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
+ */
+enum nl80211_ext_feature_index {
+ NL80211_EXT_FEATURE_VHT_IBSS,
+ NL80211_EXT_FEATURE_RRM,
+ NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER,
+ NL80211_EXT_FEATURE_SCAN_START_TIME,
+ NL80211_EXT_FEATURE_BSS_PARENT_TSF,
+ NL80211_EXT_FEATURE_SET_SCAN_DWELL,
+ NL80211_EXT_FEATURE_BEACON_RATE_LEGACY,
+ NL80211_EXT_FEATURE_BEACON_RATE_HT,
+ NL80211_EXT_FEATURE_BEACON_RATE_VHT,
+ NL80211_EXT_FEATURE_FILS_STA,
+ NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA,
+ NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED,
+ NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI,
+ NL80211_EXT_FEATURE_CQM_RSSI_LIST,
+ NL80211_EXT_FEATURE_FILS_SK_OFFLOAD,
+ NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK,
+ NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X,
+ NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME,
+ NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION,
+ NL80211_EXT_FEATURE_MFP_OPTIONAL,
+ NL80211_EXT_FEATURE_LOW_SPAN_SCAN,
+ NL80211_EXT_FEATURE_LOW_POWER_SCAN,
+ NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
+ NL80211_EXT_FEATURE_DFS_OFFLOAD,
+ NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211,
+ NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT,
+ /* we renamed this - stay compatible */
+ NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT = NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT,
+ NL80211_EXT_FEATURE_TXQS,
+ NL80211_EXT_FEATURE_SCAN_RANDOM_SN,
+ NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT,
+ NL80211_EXT_FEATURE_CAN_REPLACE_PTK0,
+ NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER,
+ NL80211_EXT_FEATURE_AIRTIME_FAIRNESS,
+ NL80211_EXT_FEATURE_AP_PMKSA_CACHING,
+ NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD,
+ NL80211_EXT_FEATURE_EXT_KEY_ID,
+ NL80211_EXT_FEATURE_STA_TX_PWR,
+ NL80211_EXT_FEATURE_SAE_OFFLOAD,
+
+ /* add new features before the definition below */
+ NUM_NL80211_EXT_FEATURES,
+ MAX_NL80211_EXT_FEATURES = NUM_NL80211_EXT_FEATURES - 1
+};
+
+/**
+ * enum nl80211_probe_resp_offload_support_attr - optional supported
+ * protocols for probe-response offloading by the driver/FW.
+ * To be used with the %NL80211_ATTR_PROBE_RESP_OFFLOAD attribute.
+ * Each enum value represents a bit in the bitmap of supported
+ * protocols. Typically a subset of probe-requests belonging to a
+ * supported protocol will be excluded from offload and uploaded
+ * to the host.
+ *
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS: Support for WPS ver. 1
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2: Support for WPS ver. 2
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P: Support for P2P
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U: Support for 802.11u
+ */
+enum nl80211_probe_resp_offload_support_attr {
+ NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS = 1<<0,
+ NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 = 1<<1,
+ NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P = 1<<2,
+ NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U = 1<<3,
+};
+
+/**
+ * enum nl80211_connect_failed_reason - connection request failed reasons
+ * @NL80211_CONN_FAIL_MAX_CLIENTS: Maximum number of clients that can be
+ * handled by the AP is reached.
+ * @NL80211_CONN_FAIL_BLOCKED_CLIENT: Connection request is rejected due to ACL.
+ */
+enum nl80211_connect_failed_reason {
+ NL80211_CONN_FAIL_MAX_CLIENTS,
+ NL80211_CONN_FAIL_BLOCKED_CLIENT,
+};
+
+/**
+ * enum nl80211_timeout_reason - timeout reasons
+ *
+ * @NL80211_TIMEOUT_UNSPECIFIED: Timeout reason unspecified.
+ * @NL80211_TIMEOUT_SCAN: Scan (AP discovery) timed out.
+ * @NL80211_TIMEOUT_AUTH: Authentication timed out.
+ * @NL80211_TIMEOUT_ASSOC: Association timed out.
+ */
+enum nl80211_timeout_reason {
+ NL80211_TIMEOUT_UNSPECIFIED,
+ NL80211_TIMEOUT_SCAN,
+ NL80211_TIMEOUT_AUTH,
+ NL80211_TIMEOUT_ASSOC,
+};
+
+/**
+ * enum nl80211_scan_flags - scan request control flags
+ *
+ * Scan request control flags are used to control the handling
+ * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN
+ * requests.
+ *
+ * NL80211_SCAN_FLAG_LOW_SPAN, NL80211_SCAN_FLAG_LOW_POWER, and
+ * NL80211_SCAN_FLAG_HIGH_ACCURACY flags are exclusive of each other, i.e., only
+ * one of them can be used in the request.
+ *
+ * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority
+ * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning
+ * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured
+ * as AP and the beaconing has already been configured. This attribute is
+ * dangerous because will destroy stations performance as a lot of frames
+ * will be lost while scanning off-channel, therefore it must be used only
+ * when really needed
+ * @NL80211_SCAN_FLAG_RANDOM_ADDR: use a random MAC address for this scan (or
+ * for scheduled scan: a different one for every scan iteration). When the
+ * flag is set, depending on device capabilities the @NL80211_ATTR_MAC and
+ * @NL80211_ATTR_MAC_MASK attributes may also be given in which case only
+ * the masked bits will be preserved from the MAC address and the remainder
+ * randomised. If the attributes are not given full randomisation (46 bits,
+ * locally administered 1, multicast 0) is assumed.
+ * This flag must not be requested when the feature isn't supported, check
+ * the nl80211 feature flags for the device.
+ * @NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME: fill the dwell time in the FILS
+ * request parameters IE in the probe request
+ * @NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe responses
+ * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE: send probe request frames at
+ * rate of at least 5.5M. In case non OCE AP is discovered in the channel,
+ * only the first probe req in the channel will be sent in high rate.
+ * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: allow probe request
+ * tx deferral (dot11FILSProbeDelay shall be set to 15ms)
+ * and suppression (if it has received a broadcast Probe Response frame,
+ * Beacon frame or FILS Discovery frame from an AP that the STA considers
+ * a suitable candidate for (re-)association - suitable in terms of
+ * SSID and/or RSSI.
+ * @NL80211_SCAN_FLAG_LOW_SPAN: Span corresponds to the total time taken to
+ * accomplish the scan. Thus, this flag intends the driver to perform the
+ * scan request with lesser span/duration. It is specific to the driver
+ * implementations on how this is accomplished. Scan accuracy may get
+ * impacted with this flag.
+ * @NL80211_SCAN_FLAG_LOW_POWER: This flag intends the scan attempts to consume
+ * optimal possible power. Drivers can resort to their specific means to
+ * optimize the power. Scan accuracy may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_HIGH_ACCURACY: Accuracy here intends to the extent of scan
+ * results obtained. Thus HIGH_ACCURACY scan flag aims to get maximum
+ * possible scan results. This flag hints the driver to use the best
+ * possible scan configuration to improve the accuracy in scanning.
+ * Latency and power use may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_RANDOM_SN: randomize the sequence number in probe
+ * request frames from this scan to avoid correlation/tracking being
+ * possible.
+ * @NL80211_SCAN_FLAG_MIN_PREQ_CONTENT: minimize probe request content to
+ * only have supported rates and no additional capabilities (unless
+ * added by userspace explicitly.)
+ */
+enum nl80211_scan_flags {
+ NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0,
+ NL80211_SCAN_FLAG_FLUSH = 1<<1,
+ NL80211_SCAN_FLAG_AP = 1<<2,
+ NL80211_SCAN_FLAG_RANDOM_ADDR = 1<<3,
+ NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME = 1<<4,
+ NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP = 1<<5,
+ NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE = 1<<6,
+ NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION = 1<<7,
+ NL80211_SCAN_FLAG_LOW_SPAN = 1<<8,
+ NL80211_SCAN_FLAG_LOW_POWER = 1<<9,
+ NL80211_SCAN_FLAG_HIGH_ACCURACY = 1<<10,
+ NL80211_SCAN_FLAG_RANDOM_SN = 1<<11,
+ NL80211_SCAN_FLAG_MIN_PREQ_CONTENT = 1<<12,
+};
+
+/**
+ * enum nl80211_acl_policy - access control policy
+ *
+ * Access control policy is applied on a MAC list set by
+ * %NL80211_CMD_START_AP and %NL80211_CMD_SET_MAC_ACL, to
+ * be used with %NL80211_ATTR_ACL_POLICY.
+ *
+ * @NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED: Deny stations which are
+ * listed in ACL, i.e. allow all the stations which are not listed
+ * in ACL to authenticate.
+ * @NL80211_ACL_POLICY_DENY_UNLESS_LISTED: Allow the stations which are listed
+ * in ACL, i.e. deny all the stations which are not listed in ACL.
+ */
+enum nl80211_acl_policy {
+ NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED,
+ NL80211_ACL_POLICY_DENY_UNLESS_LISTED,
+};
+
+/**
+ * enum nl80211_smps_mode - SMPS mode
+ *
+ * Requested SMPS mode (for AP mode)
+ *
+ * @NL80211_SMPS_OFF: SMPS off (use all antennas).
+ * @NL80211_SMPS_STATIC: static SMPS (use a single antenna)
+ * @NL80211_SMPS_DYNAMIC: dynamic smps (start with a single antenna and
+ * turn on other antennas after CTS/RTS).
+ */
+enum nl80211_smps_mode {
+ NL80211_SMPS_OFF,
+ NL80211_SMPS_STATIC,
+ NL80211_SMPS_DYNAMIC,
+
+ __NL80211_SMPS_AFTER_LAST,
+ NL80211_SMPS_MAX = __NL80211_SMPS_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_radar_event - type of radar event for DFS operation
+ *
+ * Type of event to be used with NL80211_ATTR_RADAR_EVENT to inform userspace
+ * about detected radars or success of the channel available check (CAC)
+ *
+ * @NL80211_RADAR_DETECTED: A radar pattern has been detected. The channel is
+ * now unusable.
+ * @NL80211_RADAR_CAC_FINISHED: Channel Availability Check has been finished,
+ * the channel is now available.
+ * @NL80211_RADAR_CAC_ABORTED: Channel Availability Check has been aborted, no
+ * change to the channel status.
+ * @NL80211_RADAR_NOP_FINISHED: The Non-Occupancy Period for this channel is
+ * over, channel becomes usable.
+ * @NL80211_RADAR_PRE_CAC_EXPIRED: Channel Availability Check done on this
+ * non-operating channel is expired and no longer valid. New CAC must
+ * be done on this channel before starting the operation. This is not
+ * applicable for ETSI dfs domain where pre-CAC is valid for ever.
+ * @NL80211_RADAR_CAC_STARTED: Channel Availability Check has been started,
+ * should be generated by HW if NL80211_EXT_FEATURE_DFS_OFFLOAD is enabled.
+ */
+enum nl80211_radar_event {
+ NL80211_RADAR_DETECTED,
+ NL80211_RADAR_CAC_FINISHED,
+ NL80211_RADAR_CAC_ABORTED,
+ NL80211_RADAR_NOP_FINISHED,
+ NL80211_RADAR_PRE_CAC_EXPIRED,
+ NL80211_RADAR_CAC_STARTED,
+};
+
+/**
+ * enum nl80211_dfs_state - DFS states for channels
+ *
+ * Channel states used by the DFS code.
+ *
+ * @NL80211_DFS_USABLE: The channel can be used, but channel availability
+ * check (CAC) must be performed before using it for AP or IBSS.
+ * @NL80211_DFS_UNAVAILABLE: A radar has been detected on this channel, it
+ * is therefore marked as not available.
+ * @NL80211_DFS_AVAILABLE: The channel has been CAC checked and is available.
+ */
+enum nl80211_dfs_state {
+ NL80211_DFS_USABLE,
+ NL80211_DFS_UNAVAILABLE,
+ NL80211_DFS_AVAILABLE,
+};
+
+/**
+ * enum enum nl80211_protocol_features - nl80211 protocol features
+ * @NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP: nl80211 supports splitting
+ * wiphy dumps (if requested by the application with the attribute
+ * %NL80211_ATTR_SPLIT_WIPHY_DUMP. Also supported is filtering the
+ * wiphy dump by %NL80211_ATTR_WIPHY, %NL80211_ATTR_IFINDEX or
+ * %NL80211_ATTR_WDEV.
+ */
+enum nl80211_protocol_features {
+ NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP = 1 << 0,
+};
+
+/**
+ * enum nl80211_crit_proto_id - nl80211 critical protocol identifiers
+ *
+ * @NL80211_CRIT_PROTO_UNSPEC: protocol unspecified.
+ * @NL80211_CRIT_PROTO_DHCP: BOOTP or DHCPv6 protocol.
+ * @NL80211_CRIT_PROTO_EAPOL: EAPOL protocol.
+ * @NL80211_CRIT_PROTO_APIPA: APIPA protocol.
+ * @NUM_NL80211_CRIT_PROTO: must be kept last.
+ */
+enum nl80211_crit_proto_id {
+ NL80211_CRIT_PROTO_UNSPEC,
+ NL80211_CRIT_PROTO_DHCP,
+ NL80211_CRIT_PROTO_EAPOL,
+ NL80211_CRIT_PROTO_APIPA,
+ /* add other protocols before this one */
+ NUM_NL80211_CRIT_PROTO
+};
+
+/* maximum duration for critical protocol measures */
+#define NL80211_CRIT_PROTO_MAX_DURATION 5000 /* msec */
+
+/**
+ * enum nl80211_rxmgmt_flags - flags for received management frame.
+ *
+ * Used by cfg80211_rx_mgmt()
+ *
+ * @NL80211_RXMGMT_FLAG_ANSWERED: frame was answered by device/driver.
+ * @NL80211_RXMGMT_FLAG_EXTERNAL_AUTH: Host driver intends to offload
+ * the authentication. Exclusively defined for host drivers that
+ * advertises the SME functionality but would like the userspace
+ * to handle certain authentication algorithms (e.g. SAE).
+ */
+enum nl80211_rxmgmt_flags {
+ NL80211_RXMGMT_FLAG_ANSWERED = 1 << 0,
+ NL80211_RXMGMT_FLAG_EXTERNAL_AUTH = 1 << 1,
+};
+
+/*
+ * If this flag is unset, the lower 24 bits are an OUI, if set
+ * a Linux nl80211 vendor ID is used (no such IDs are allocated
+ * yet, so that's not valid so far)
+ */
+#define NL80211_VENDOR_ID_IS_LINUX 0x80000000
+
+/**
+ * struct nl80211_vendor_cmd_info - vendor command data
+ * @vendor_id: If the %NL80211_VENDOR_ID_IS_LINUX flag is clear, then the
+ * value is a 24-bit OUI; if it is set then a separately allocated ID
+ * may be used, but no such IDs are allocated yet. New IDs should be
+ * added to this file when needed.
+ * @subcmd: sub-command ID for the command
+ */
+struct nl80211_vendor_cmd_info {
+ __u32 vendor_id;
+ __u32 subcmd;
+};
+
+/**
+ * enum nl80211_tdls_peer_capability - TDLS peer flags.
+ *
+ * Used by tdls_mgmt() to determine which conditional elements need
+ * to be added to TDLS Setup frames.
+ *
+ * @NL80211_TDLS_PEER_HT: TDLS peer is HT capable.
+ * @NL80211_TDLS_PEER_VHT: TDLS peer is VHT capable.
+ * @NL80211_TDLS_PEER_WMM: TDLS peer is WMM capable.
+ */
+enum nl80211_tdls_peer_capability {
+ NL80211_TDLS_PEER_HT = 1<<0,
+ NL80211_TDLS_PEER_VHT = 1<<1,
+ NL80211_TDLS_PEER_WMM = 1<<2,
+};
+
+/**
+ * enum nl80211_sched_scan_plan - scanning plan for scheduled scan
+ * @__NL80211_SCHED_SCAN_PLAN_INVALID: attribute number 0 is reserved
+ * @NL80211_SCHED_SCAN_PLAN_INTERVAL: interval between scan iterations. In
+ * seconds (u32).
+ * @NL80211_SCHED_SCAN_PLAN_ITERATIONS: number of scan iterations in this
+ * scan plan (u32). The last scan plan must not specify this attribute
+ * because it will run infinitely. A value of zero is invalid as it will
+ * make the scan plan meaningless.
+ * @NL80211_SCHED_SCAN_PLAN_MAX: highest scheduled scan plan attribute number
+ * currently defined
+ * @__NL80211_SCHED_SCAN_PLAN_AFTER_LAST: internal use
+ */
+enum nl80211_sched_scan_plan {
+ __NL80211_SCHED_SCAN_PLAN_INVALID,
+ NL80211_SCHED_SCAN_PLAN_INTERVAL,
+ NL80211_SCHED_SCAN_PLAN_ITERATIONS,
+
+ /* keep last */
+ __NL80211_SCHED_SCAN_PLAN_AFTER_LAST,
+ NL80211_SCHED_SCAN_PLAN_MAX =
+ __NL80211_SCHED_SCAN_PLAN_AFTER_LAST - 1
+};
+
+/**
+ * struct nl80211_bss_select_rssi_adjust - RSSI adjustment parameters.
+ *
+ * @band: band of BSS that must match for RSSI value adjustment. The value
+ * of this field is according to &enum nl80211_band.
+ * @delta: value used to adjust the RSSI value of matching BSS in dB.
+ */
+struct nl80211_bss_select_rssi_adjust {
+ __u8 band;
+ __s8 delta;
+} __attribute__((packed));
+
+/**
+ * enum nl80211_bss_select_attr - attributes for bss selection.
+ *
+ * @__NL80211_BSS_SELECT_ATTR_INVALID: reserved.
+ * @NL80211_BSS_SELECT_ATTR_RSSI: Flag indicating only RSSI-based BSS selection
+ * is requested.
+ * @NL80211_BSS_SELECT_ATTR_BAND_PREF: attribute indicating BSS
+ * selection should be done such that the specified band is preferred.
+ * When there are multiple BSS-es in the preferred band, the driver
+ * shall use RSSI-based BSS selection as a second step. The value of
+ * this attribute is according to &enum nl80211_band (u32).
+ * @NL80211_BSS_SELECT_ATTR_RSSI_ADJUST: When present the RSSI level for
+ * BSS-es in the specified band is to be adjusted before doing
+ * RSSI-based BSS selection. The attribute value is a packed structure
+ * value as specified by &struct nl80211_bss_select_rssi_adjust.
+ * @NL80211_BSS_SELECT_ATTR_MAX: highest bss select attribute number.
+ * @__NL80211_BSS_SELECT_ATTR_AFTER_LAST: internal use.
+ *
+ * One and only one of these attributes are found within %NL80211_ATTR_BSS_SELECT
+ * for %NL80211_CMD_CONNECT. It specifies the required BSS selection behaviour
+ * which the driver shall use.
+ */
+enum nl80211_bss_select_attr {
+ __NL80211_BSS_SELECT_ATTR_INVALID,
+ NL80211_BSS_SELECT_ATTR_RSSI,
+ NL80211_BSS_SELECT_ATTR_BAND_PREF,
+ NL80211_BSS_SELECT_ATTR_RSSI_ADJUST,
+
+ /* keep last */
+ __NL80211_BSS_SELECT_ATTR_AFTER_LAST,
+ NL80211_BSS_SELECT_ATTR_MAX = __NL80211_BSS_SELECT_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_nan_function_type - NAN function type
+ *
+ * Defines the function type of a NAN function
+ *
+ * @NL80211_NAN_FUNC_PUBLISH: function is publish
+ * @NL80211_NAN_FUNC_SUBSCRIBE: function is subscribe
+ * @NL80211_NAN_FUNC_FOLLOW_UP: function is follow-up
+ */
+enum nl80211_nan_function_type {
+ NL80211_NAN_FUNC_PUBLISH,
+ NL80211_NAN_FUNC_SUBSCRIBE,
+ NL80211_NAN_FUNC_FOLLOW_UP,
+
+ /* keep last */
+ __NL80211_NAN_FUNC_TYPE_AFTER_LAST,
+ NL80211_NAN_FUNC_MAX_TYPE = __NL80211_NAN_FUNC_TYPE_AFTER_LAST - 1,
+};
+
+/**
+ * enum nl80211_nan_publish_type - NAN publish tx type
+ *
+ * Defines how to send publish Service Discovery Frames
+ *
+ * @NL80211_NAN_SOLICITED_PUBLISH: publish function is solicited
+ * @NL80211_NAN_UNSOLICITED_PUBLISH: publish function is unsolicited
+ */
+enum nl80211_nan_publish_type {
+ NL80211_NAN_SOLICITED_PUBLISH = 1 << 0,
+ NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1,
+};
+
+/**
+ * enum nl80211_nan_func_term_reason - NAN functions termination reason
+ *
+ * Defines termination reasons of a NAN function
+ *
+ * @NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST: requested by user
+ * @NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED: timeout
+ * @NL80211_NAN_FUNC_TERM_REASON_ERROR: errored
+ */
+enum nl80211_nan_func_term_reason {
+ NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST,
+ NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED,
+ NL80211_NAN_FUNC_TERM_REASON_ERROR,
+};
+
+#define NL80211_NAN_FUNC_SERVICE_ID_LEN 6
+#define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff
+#define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff
+
+/**
+ * enum nl80211_nan_func_attributes - NAN function attributes
+ * @__NL80211_NAN_FUNC_INVALID: invalid
+ * @NL80211_NAN_FUNC_TYPE: &enum nl80211_nan_function_type (u8).
+ * @NL80211_NAN_FUNC_SERVICE_ID: 6 bytes of the service ID hash as
+ * specified in NAN spec. This is a binary attribute.
+ * @NL80211_NAN_FUNC_PUBLISH_TYPE: relevant if the function's type is
+ * publish. Defines the transmission type for the publish Service Discovery
+ * Frame, see &enum nl80211_nan_publish_type. Its type is u8.
+ * @NL80211_NAN_FUNC_PUBLISH_BCAST: relevant if the function is a solicited
+ * publish. Should the solicited publish Service Discovery Frame be sent to
+ * the NAN Broadcast address. This is a flag.
+ * @NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE: relevant if the function's type is
+ * subscribe. Is the subscribe active. This is a flag.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_ID: relevant if the function's type is follow up.
+ * The instance ID for the follow up Service Discovery Frame. This is u8.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID: relevant if the function's type
+ * is follow up. This is a u8.
+ * The requestor instance ID for the follow up Service Discovery Frame.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_DEST: the MAC address of the recipient of the
+ * follow up Service Discovery Frame. This is a binary attribute.
+ * @NL80211_NAN_FUNC_CLOSE_RANGE: is this function limited for devices in a
+ * close range. The range itself (RSSI) is defined by the device.
+ * This is a flag.
+ * @NL80211_NAN_FUNC_TTL: strictly positive number of DWs this function should
+ * stay active. If not present infinite TTL is assumed. This is a u32.
+ * @NL80211_NAN_FUNC_SERVICE_INFO: array of bytes describing the service
+ * specific info. This is a binary attribute.
+ * @NL80211_NAN_FUNC_SRF: Service Receive Filter. This is a nested attribute.
+ * See &enum nl80211_nan_srf_attributes.
+ * @NL80211_NAN_FUNC_RX_MATCH_FILTER: Receive Matching filter. This is a nested
+ * attribute. It is a list of binary values.
+ * @NL80211_NAN_FUNC_TX_MATCH_FILTER: Transmit Matching filter. This is a
+ * nested attribute. It is a list of binary values.
+ * @NL80211_NAN_FUNC_INSTANCE_ID: The instance ID of the function.
+ * Its type is u8 and it cannot be 0.
+ * @NL80211_NAN_FUNC_TERM_REASON: NAN function termination reason.
+ * See &enum nl80211_nan_func_term_reason.
+ *
+ * @NUM_NL80211_NAN_FUNC_ATTR: internal
+ * @NL80211_NAN_FUNC_ATTR_MAX: highest NAN function attribute
+ */
+enum nl80211_nan_func_attributes {
+ __NL80211_NAN_FUNC_INVALID,
+ NL80211_NAN_FUNC_TYPE,
+ NL80211_NAN_FUNC_SERVICE_ID,
+ NL80211_NAN_FUNC_PUBLISH_TYPE,
+ NL80211_NAN_FUNC_PUBLISH_BCAST,
+ NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE,
+ NL80211_NAN_FUNC_FOLLOW_UP_ID,
+ NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID,
+ NL80211_NAN_FUNC_FOLLOW_UP_DEST,
+ NL80211_NAN_FUNC_CLOSE_RANGE,
+ NL80211_NAN_FUNC_TTL,
+ NL80211_NAN_FUNC_SERVICE_INFO,
+ NL80211_NAN_FUNC_SRF,
+ NL80211_NAN_FUNC_RX_MATCH_FILTER,
+ NL80211_NAN_FUNC_TX_MATCH_FILTER,
+ NL80211_NAN_FUNC_INSTANCE_ID,
+ NL80211_NAN_FUNC_TERM_REASON,
+
+ /* keep last */
+ NUM_NL80211_NAN_FUNC_ATTR,
+ NL80211_NAN_FUNC_ATTR_MAX = NUM_NL80211_NAN_FUNC_ATTR - 1
+};
+
+/**
+ * enum nl80211_nan_srf_attributes - NAN Service Response filter attributes
+ * @__NL80211_NAN_SRF_INVALID: invalid
+ * @NL80211_NAN_SRF_INCLUDE: present if the include bit of the SRF set.
+ * This is a flag.
+ * @NL80211_NAN_SRF_BF: Bloom Filter. Present if and only if
+ * %NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary.
+ * @NL80211_NAN_SRF_BF_IDX: index of the Bloom Filter. Mandatory if
+ * %NL80211_NAN_SRF_BF is present. This is a u8.
+ * @NL80211_NAN_SRF_MAC_ADDRS: list of MAC addresses for the SRF. Present if
+ * and only if %NL80211_NAN_SRF_BF isn't present. This is a nested
+ * attribute. Each nested attribute is a MAC address.
+ * @NUM_NL80211_NAN_SRF_ATTR: internal
+ * @NL80211_NAN_SRF_ATTR_MAX: highest NAN SRF attribute
+ */
+enum nl80211_nan_srf_attributes {
+ __NL80211_NAN_SRF_INVALID,
+ NL80211_NAN_SRF_INCLUDE,
+ NL80211_NAN_SRF_BF,
+ NL80211_NAN_SRF_BF_IDX,
+ NL80211_NAN_SRF_MAC_ADDRS,
+
+ /* keep last */
+ NUM_NL80211_NAN_SRF_ATTR,
+ NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1,
+};
+
+/**
+ * enum nl80211_nan_match_attributes - NAN match attributes
+ * @__NL80211_NAN_MATCH_INVALID: invalid
+ * @NL80211_NAN_MATCH_FUNC_LOCAL: the local function that had the
+ * match. This is a nested attribute.
+ * See &enum nl80211_nan_func_attributes.
+ * @NL80211_NAN_MATCH_FUNC_PEER: the peer function
+ * that caused the match. This is a nested attribute.
+ * See &enum nl80211_nan_func_attributes.
+ *
+ * @NUM_NL80211_NAN_MATCH_ATTR: internal
+ * @NL80211_NAN_MATCH_ATTR_MAX: highest NAN match attribute
+ */
+enum nl80211_nan_match_attributes {
+ __NL80211_NAN_MATCH_INVALID,
+ NL80211_NAN_MATCH_FUNC_LOCAL,
+ NL80211_NAN_MATCH_FUNC_PEER,
+
+ /* keep last */
+ NUM_NL80211_NAN_MATCH_ATTR,
+ NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1
+};
+
+/**
+ * nl80211_external_auth_action - Action to perform with external
+ * authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION.
+ * @NL80211_EXTERNAL_AUTH_START: Start the authentication.
+ * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication.
+ */
+enum nl80211_external_auth_action {
+ NL80211_EXTERNAL_AUTH_START,
+ NL80211_EXTERNAL_AUTH_ABORT,
+};
+
+/**
+ * enum nl80211_ftm_responder_attributes - fine timing measurement
+ * responder attributes
+ * @__NL80211_FTM_RESP_ATTR_INVALID: Invalid
+ * @NL80211_FTM_RESP_ATTR_ENABLED: FTM responder is enabled
+ * @NL80211_FTM_RESP_ATTR_LCI: The content of Measurement Report Element
+ * (9.4.2.22 in 802.11-2016) with type 8 - LCI (9.4.2.22.10),
+ * i.e. starting with the measurement token
+ * @NL80211_FTM_RESP_ATTR_CIVIC: The content of Measurement Report Element
+ * (9.4.2.22 in 802.11-2016) with type 11 - Civic (Section 9.4.2.22.13),
+ * i.e. starting with the measurement token
+ * @__NL80211_FTM_RESP_ATTR_LAST: Internal
+ * @NL80211_FTM_RESP_ATTR_MAX: highest FTM responder attribute.
+ */
+enum nl80211_ftm_responder_attributes {
+ __NL80211_FTM_RESP_ATTR_INVALID,
+
+ NL80211_FTM_RESP_ATTR_ENABLED,
+ NL80211_FTM_RESP_ATTR_LCI,
+ NL80211_FTM_RESP_ATTR_CIVICLOC,
+
+ /* keep last */
+ __NL80211_FTM_RESP_ATTR_LAST,
+ NL80211_FTM_RESP_ATTR_MAX = __NL80211_FTM_RESP_ATTR_LAST - 1,
+};
+
+/*
+ * enum nl80211_ftm_responder_stats - FTM responder statistics
+ *
+ * These attribute types are used with %NL80211_ATTR_FTM_RESPONDER_STATS
+ * when getting FTM responder statistics.
+ *
+ * @__NL80211_FTM_STATS_INVALID: attribute number 0 is reserved
+ * @NL80211_FTM_STATS_SUCCESS_NUM: number of FTM sessions in which all frames
+ * were ssfully answered (u32)
+ * @NL80211_FTM_STATS_PARTIAL_NUM: number of FTM sessions in which part of the
+ * frames were successfully answered (u32)
+ * @NL80211_FTM_STATS_FAILED_NUM: number of failed FTM sessions (u32)
+ * @NL80211_FTM_STATS_ASAP_NUM: number of ASAP sessions (u32)
+ * @NL80211_FTM_STATS_NON_ASAP_NUM: number of non-ASAP sessions (u32)
+ * @NL80211_FTM_STATS_TOTAL_DURATION_MSEC: total sessions durations - gives an
+ * indication of how much time the responder was busy (u64, msec)
+ * @NL80211_FTM_STATS_UNKNOWN_TRIGGERS_NUM: number of unknown FTM triggers -
+ * triggers from initiators that didn't finish successfully the negotiation
+ * phase with the responder (u32)
+ * @NL80211_FTM_STATS_RESCHEDULE_REQUESTS_NUM: number of FTM reschedule requests
+ * - initiator asks for a new scheduling although it already has scheduled
+ * FTM slot (u32)
+ * @NL80211_FTM_STATS_OUT_OF_WINDOW_TRIGGERS_NUM: number of FTM triggers out of
+ * scheduled window (u32)
+ * @NL80211_FTM_STATS_PAD: used for padding, ignore
+ * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal
+ * @NL80211_FTM_STATS_MAX: highest possible FTM responder stats attribute
+ */
+enum nl80211_ftm_responder_stats {
+ __NL80211_FTM_STATS_INVALID,
+ NL80211_FTM_STATS_SUCCESS_NUM,
+ NL80211_FTM_STATS_PARTIAL_NUM,
+ NL80211_FTM_STATS_FAILED_NUM,
+ NL80211_FTM_STATS_ASAP_NUM,
+ NL80211_FTM_STATS_NON_ASAP_NUM,
+ NL80211_FTM_STATS_TOTAL_DURATION_MSEC,
+ NL80211_FTM_STATS_UNKNOWN_TRIGGERS_NUM,
+ NL80211_FTM_STATS_RESCHEDULE_REQUESTS_NUM,
+ NL80211_FTM_STATS_OUT_OF_WINDOW_TRIGGERS_NUM,
+ NL80211_FTM_STATS_PAD,
+
+ /* keep last */
+ __NL80211_FTM_STATS_AFTER_LAST,
+ NL80211_FTM_STATS_MAX = __NL80211_FTM_STATS_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_preamble - frame preamble types
+ * @NL80211_PREAMBLE_LEGACY: legacy (HR/DSSS, OFDM, ERP PHY) preamble
+ * @NL80211_PREAMBLE_HT: HT preamble
+ * @NL80211_PREAMBLE_VHT: VHT preamble
+ * @NL80211_PREAMBLE_DMG: DMG preamble
+ */
+enum nl80211_preamble {
+ NL80211_PREAMBLE_LEGACY,
+ NL80211_PREAMBLE_HT,
+ NL80211_PREAMBLE_VHT,
+ NL80211_PREAMBLE_DMG,
+};
+
+/**
+ * enum nl80211_peer_measurement_type - peer measurement types
+ * @NL80211_PMSR_TYPE_INVALID: invalid/unused, needed as we use
+ * these numbers also for attributes
+ *
+ * @NL80211_PMSR_TYPE_FTM: flight time measurement
+ *
+ * @NUM_NL80211_PMSR_TYPES: internal
+ * @NL80211_PMSR_TYPE_MAX: highest type number
+ */
+enum nl80211_peer_measurement_type {
+ NL80211_PMSR_TYPE_INVALID,
+
+ NL80211_PMSR_TYPE_FTM,
+
+ NUM_NL80211_PMSR_TYPES,
+ NL80211_PMSR_TYPE_MAX = NUM_NL80211_PMSR_TYPES - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_status - peer measurement status
+ * @NL80211_PMSR_STATUS_SUCCESS: measurement completed successfully
+ * @NL80211_PMSR_STATUS_REFUSED: measurement was locally refused
+ * @NL80211_PMSR_STATUS_TIMEOUT: measurement timed out
+ * @NL80211_PMSR_STATUS_FAILURE: measurement failed, a type-dependent
+ * reason may be available in the response data
+ */
+enum nl80211_peer_measurement_status {
+ NL80211_PMSR_STATUS_SUCCESS,
+ NL80211_PMSR_STATUS_REFUSED,
+ NL80211_PMSR_STATUS_TIMEOUT,
+ NL80211_PMSR_STATUS_FAILURE,
+};
+
+/**
+ * enum nl80211_peer_measurement_req - peer measurement request attributes
+ * @__NL80211_PMSR_REQ_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_REQ_ATTR_DATA: This is a nested attribute with measurement
+ * type-specific request data inside. The attributes used are from the
+ * enums named nl80211_peer_measurement_<type>_req.
+ * @NL80211_PMSR_REQ_ATTR_GET_AP_TSF: include AP TSF timestamp, if supported
+ * (flag attribute)
+ *
+ * @NUM_NL80211_PMSR_REQ_ATTRS: internal
+ * @NL80211_PMSR_REQ_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_req {
+ __NL80211_PMSR_REQ_ATTR_INVALID,
+
+ NL80211_PMSR_REQ_ATTR_DATA,
+ NL80211_PMSR_REQ_ATTR_GET_AP_TSF,
+
+ /* keep last */
+ NUM_NL80211_PMSR_REQ_ATTRS,
+ NL80211_PMSR_REQ_ATTR_MAX = NUM_NL80211_PMSR_REQ_ATTRS - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_resp - peer measurement response attributes
+ * @__NL80211_PMSR_RESP_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_RESP_ATTR_DATA: This is a nested attribute with measurement
+ * type-specific results inside. The attributes used are from the enums
+ * named nl80211_peer_measurement_<type>_resp.
+ * @NL80211_PMSR_RESP_ATTR_STATUS: u32 value with the measurement status
+ * (using values from &enum nl80211_peer_measurement_status.)
+ * @NL80211_PMSR_RESP_ATTR_HOST_TIME: host time (%CLOCK_BOOTTIME) when the
+ * result was measured; this value is not expected to be accurate to
+ * more than 20ms. (u64, nanoseconds)
+ * @NL80211_PMSR_RESP_ATTR_AP_TSF: TSF of the AP that the interface
+ * doing the measurement is connected to when the result was measured.
+ * This shall be accurately reported if supported and requested
+ * (u64, usec)
+ * @NL80211_PMSR_RESP_ATTR_FINAL: If results are sent to the host partially
+ * (*e.g. with FTM per-burst data) this flag will be cleared on all but
+ * the last result; if all results are combined it's set on the single
+ * result.
+ * @NL80211_PMSR_RESP_ATTR_PAD: padding for 64-bit attributes, ignore
+ *
+ * @NUM_NL80211_PMSR_RESP_ATTRS: internal
+ * @NL80211_PMSR_RESP_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_resp {
+ __NL80211_PMSR_RESP_ATTR_INVALID,
+
+ NL80211_PMSR_RESP_ATTR_DATA,
+ NL80211_PMSR_RESP_ATTR_STATUS,
+ NL80211_PMSR_RESP_ATTR_HOST_TIME,
+ NL80211_PMSR_RESP_ATTR_AP_TSF,
+ NL80211_PMSR_RESP_ATTR_FINAL,
+ NL80211_PMSR_RESP_ATTR_PAD,
+
+ /* keep last */
+ NUM_NL80211_PMSR_RESP_ATTRS,
+ NL80211_PMSR_RESP_ATTR_MAX = NUM_NL80211_PMSR_RESP_ATTRS - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_peer_attrs - peer attributes for measurement
+ * @__NL80211_PMSR_PEER_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_PEER_ATTR_ADDR: peer's MAC address
+ * @NL80211_PMSR_PEER_ATTR_CHAN: channel definition, nested, using top-level
+ * attributes like %NL80211_ATTR_WIPHY_FREQ etc.
+ * @NL80211_PMSR_PEER_ATTR_REQ: This is a nested attribute indexed by
+ * measurement type, with attributes from the
+ * &enum nl80211_peer_measurement_req inside.
+ * @NL80211_PMSR_PEER_ATTR_RESP: This is a nested attribute indexed by
+ * measurement type, with attributes from the
+ * &enum nl80211_peer_measurement_resp inside.
+ *
+ * @NUM_NL80211_PMSR_PEER_ATTRS: internal
+ * @NL80211_PMSR_PEER_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_peer_attrs {
+ __NL80211_PMSR_PEER_ATTR_INVALID,
+
+ NL80211_PMSR_PEER_ATTR_ADDR,
+ NL80211_PMSR_PEER_ATTR_CHAN,
+ NL80211_PMSR_PEER_ATTR_REQ,
+ NL80211_PMSR_PEER_ATTR_RESP,
+
+ /* keep last */
+ NUM_NL80211_PMSR_PEER_ATTRS,
+ NL80211_PMSR_PEER_ATTR_MAX = NUM_NL80211_PMSR_PEER_ATTRS - 1,
+};
+
+/**
+ * enum nl80211_peer_measurement_attrs - peer measurement attributes
+ * @__NL80211_PMSR_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_ATTR_MAX_PEERS: u32 attribute used for capability
+ * advertisement only, indicates the maximum number of peers
+ * measurements can be done with in a single request
+ * @NL80211_PMSR_ATTR_REPORT_AP_TSF: flag attribute in capability
+ * indicating that the connected AP's TSF can be reported in
+ * measurement results
+ * @NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR: flag attribute in capability
+ * indicating that MAC address randomization is supported.
+ * @NL80211_PMSR_ATTR_TYPE_CAPA: capabilities reported by the device,
+ * this contains a nesting indexed by measurement type, and
+ * type-specific capabilities inside, which are from the enums
+ * named nl80211_peer_measurement_<type>_capa.
+ * @NL80211_PMSR_ATTR_PEERS: nested attribute, the nesting index is
+ * meaningless, just a list of peers to measure with, with the
+ * sub-attributes taken from
+ * &enum nl80211_peer_measurement_peer_attrs.
+ *
+ * @NUM_NL80211_PMSR_ATTR: internal
+ * @NL80211_PMSR_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_attrs {
+ __NL80211_PMSR_ATTR_INVALID,
+
+ NL80211_PMSR_ATTR_MAX_PEERS,
+ NL80211_PMSR_ATTR_REPORT_AP_TSF,
+ NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR,
+ NL80211_PMSR_ATTR_TYPE_CAPA,
+ NL80211_PMSR_ATTR_PEERS,
+
+ /* keep last */
+ NUM_NL80211_PMSR_ATTR,
+ NL80211_PMSR_ATTR_MAX = NUM_NL80211_PMSR_ATTR - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_ftm_capa - FTM capabilities
+ * @__NL80211_PMSR_FTM_CAPA_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_FTM_CAPA_ATTR_ASAP: flag attribute indicating ASAP mode
+ * is supported
+ * @NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP: flag attribute indicating non-ASAP
+ * mode is supported
+ * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI: flag attribute indicating if LCI
+ * data can be requested during the measurement
+ * @NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC: flag attribute indicating if civic
+ * location data can be requested during the measurement
+ * @NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES: u32 bitmap attribute of bits
+ * from &enum nl80211_preamble.
+ * @NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS: bitmap of values from
+ * &enum nl80211_chan_width indicating the supported channel
+ * bandwidths for FTM. Note that a higher channel bandwidth may be
+ * configured to allow for other measurements types with different
+ * bandwidth requirement in the same measurement.
+ * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT: u32 attribute indicating
+ * the maximum bursts exponent that can be used (if not present anything
+ * is valid)
+ * @NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST: u32 attribute indicating
+ * the maximum FTMs per burst (if not present anything is valid)
+ *
+ * @NUM_NL80211_PMSR_FTM_CAPA_ATTR: internal
+ * @NL80211_PMSR_FTM_CAPA_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_ftm_capa {
+ __NL80211_PMSR_FTM_CAPA_ATTR_INVALID,
+
+ NL80211_PMSR_FTM_CAPA_ATTR_ASAP,
+ NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP,
+ NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI,
+ NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC,
+ NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES,
+ NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS,
+ NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT,
+ NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST,
+
+ /* keep last */
+ NUM_NL80211_PMSR_FTM_CAPA_ATTR,
+ NL80211_PMSR_FTM_CAPA_ATTR_MAX = NUM_NL80211_PMSR_FTM_CAPA_ATTR - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_ftm_req - FTM request attributes
+ * @__NL80211_PMSR_FTM_REQ_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_FTM_REQ_ATTR_ASAP: ASAP mode requested (flag)
+ * @NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE: preamble type (see
+ * &enum nl80211_preamble), optional for DMG (u32)
+ * @NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP: number of bursts exponent as in
+ * 802.11-2016 9.4.2.168 "Fine Timing Measurement Parameters element"
+ * (u8, 0-15, optional with default 15 i.e. "no preference")
+ * @NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD: interval between bursts in units
+ * of 100ms (u16, optional with default 0)
+ * @NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION: burst duration, as in 802.11-2016
+ * Table 9-257 "Burst Duration field encoding" (u8, 0-15, optional with
+ * default 15 i.e. "no preference")
+ * @NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST: number of successful FTM frames
+ * requested per burst
+ * (u8, 0-31, optional with default 0 i.e. "no preference")
+ * @NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES: number of FTMR frame retries
+ * (u8, default 3)
+ * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI: request LCI data (flag)
+ * @NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC: request civic location data
+ * (flag)
+ *
+ * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal
+ * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_ftm_req {
+ __NL80211_PMSR_FTM_REQ_ATTR_INVALID,
+
+ NL80211_PMSR_FTM_REQ_ATTR_ASAP,
+ NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE,
+ NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP,
+ NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD,
+ NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION,
+ NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST,
+ NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES,
+ NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI,
+ NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC,
+
+ /* keep last */
+ NUM_NL80211_PMSR_FTM_REQ_ATTR,
+ NL80211_PMSR_FTM_REQ_ATTR_MAX = NUM_NL80211_PMSR_FTM_REQ_ATTR - 1
+};
+
+/**
+ * enum nl80211_peer_measurement_ftm_failure_reasons - FTM failure reasons
+ * @NL80211_PMSR_FTM_FAILURE_UNSPECIFIED: unspecified failure, not used
+ * @NL80211_PMSR_FTM_FAILURE_NO_RESPONSE: no response from the FTM responder
+ * @NL80211_PMSR_FTM_FAILURE_REJECTED: FTM responder rejected measurement
+ * @NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL: we already know the peer is
+ * on a different channel, so can't measure (if we didn't know, we'd
+ * try and get no response)
+ * @NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE: peer can't actually do FTM
+ * @NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP: invalid T1/T4 timestamps
+ * received
+ * @NL80211_PMSR_FTM_FAILURE_PEER_BUSY: peer reports busy, you may retry
+ * later (see %NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME)
+ * @NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS: parameters were changed
+ * by the peer and are no longer supported
+ */
+enum nl80211_peer_measurement_ftm_failure_reasons {
+ NL80211_PMSR_FTM_FAILURE_UNSPECIFIED,
+ NL80211_PMSR_FTM_FAILURE_NO_RESPONSE,
+ NL80211_PMSR_FTM_FAILURE_REJECTED,
+ NL80211_PMSR_FTM_FAILURE_WRONG_CHANNEL,
+ NL80211_PMSR_FTM_FAILURE_PEER_NOT_CAPABLE,
+ NL80211_PMSR_FTM_FAILURE_INVALID_TIMESTAMP,
+ NL80211_PMSR_FTM_FAILURE_PEER_BUSY,
+ NL80211_PMSR_FTM_FAILURE_BAD_CHANGED_PARAMS,
+};
+
+/**
+ * enum nl80211_peer_measurement_ftm_resp - FTM response attributes
+ * @__NL80211_PMSR_FTM_RESP_ATTR_INVALID: invalid
+ *
+ * @NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON: FTM-specific failure reason
+ * (u32, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX: optional, if bursts are reported
+ * as separate results then it will be the burst index 0...(N-1) and
+ * the top level will indicate partial results (u32)
+ * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS: number of FTM Request frames
+ * transmitted (u32, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES: number of FTM Request frames
+ * that were acknowleged (u32, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME: retry time received from the
+ * busy peer (u32, seconds)
+ * @NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP: actual number of bursts exponent
+ * used by the responder (similar to request, u8)
+ * @NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION: actual burst duration used by
+ * the responder (similar to request, u8)
+ * @NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST: actual FTMs per burst used
+ * by the responder (similar to request, u8)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG: average RSSI across all FTM action
+ * frames (optional, s32, 1/2 dBm)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD: RSSI spread across all FTM action
+ * frames (optional, s32, 1/2 dBm)
+ * @NL80211_PMSR_FTM_RESP_ATTR_TX_RATE: bitrate we used for the response to the
+ * FTM action frame (optional, nested, using &enum nl80211_rate_info
+ * attributes)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RX_RATE: bitrate the responder used for the FTM
+ * action frame (optional, nested, using &enum nl80211_rate_info attrs)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG: average RTT (s64, picoseconds, optional
+ * but one of RTT/DIST must be present)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE: RTT variance (u64, ps^2, note that
+ * standard deviation is the square root of variance, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD: RTT spread (u64, picoseconds,
+ * optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG: average distance (s64, mm, optional
+ * but one of RTT/DIST must be present)
+ * @NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE: distance variance (u64, mm^2, note
+ * that standard deviation is the square root of variance, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD: distance spread (u64, mm, optional)
+ * @NL80211_PMSR_FTM_RESP_ATTR_LCI: LCI data from peer (binary, optional);
+ * this is the contents of the Measurement Report Element (802.11-2016
+ * 9.4.2.22.1) starting with the Measurement Token, with Measurement
+ * Type 8.
+ * @NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC: civic location data from peer
+ * (binary, optional);
+ * this is the contents of the Measurement Report Element (802.11-2016
+ * 9.4.2.22.1) starting with the Measurement Token, with Measurement
+ * Type 11.
+ * @NL80211_PMSR_FTM_RESP_ATTR_PAD: ignore, for u64/s64 padding only
+ *
+ * @NUM_NL80211_PMSR_FTM_RESP_ATTR: internal
+ * @NL80211_PMSR_FTM_RESP_ATTR_MAX: highest attribute number
+ */
+enum nl80211_peer_measurement_ftm_resp {
+ __NL80211_PMSR_FTM_RESP_ATTR_INVALID,
+
+ NL80211_PMSR_FTM_RESP_ATTR_FAIL_REASON,
+ NL80211_PMSR_FTM_RESP_ATTR_BURST_INDEX,
+ NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_ATTEMPTS,
+ NL80211_PMSR_FTM_RESP_ATTR_NUM_FTMR_SUCCESSES,
+ NL80211_PMSR_FTM_RESP_ATTR_BUSY_RETRY_TIME,
+ NL80211_PMSR_FTM_RESP_ATTR_NUM_BURSTS_EXP,
+ NL80211_PMSR_FTM_RESP_ATTR_BURST_DURATION,
+ NL80211_PMSR_FTM_RESP_ATTR_FTMS_PER_BURST,
+ NL80211_PMSR_FTM_RESP_ATTR_RSSI_AVG,
+ NL80211_PMSR_FTM_RESP_ATTR_RSSI_SPREAD,
+ NL80211_PMSR_FTM_RESP_ATTR_TX_RATE,
+ NL80211_PMSR_FTM_RESP_ATTR_RX_RATE,
+ NL80211_PMSR_FTM_RESP_ATTR_RTT_AVG,
+ NL80211_PMSR_FTM_RESP_ATTR_RTT_VARIANCE,
+ NL80211_PMSR_FTM_RESP_ATTR_RTT_SPREAD,
+ NL80211_PMSR_FTM_RESP_ATTR_DIST_AVG,
+ NL80211_PMSR_FTM_RESP_ATTR_DIST_VARIANCE,
+ NL80211_PMSR_FTM_RESP_ATTR_DIST_SPREAD,
+ NL80211_PMSR_FTM_RESP_ATTR_LCI,
+ NL80211_PMSR_FTM_RESP_ATTR_CIVICLOC,
+ NL80211_PMSR_FTM_RESP_ATTR_PAD,
+
+ /* keep last */
+ NUM_NL80211_PMSR_FTM_RESP_ATTR,
+ NL80211_PMSR_FTM_RESP_ATTR_MAX = NUM_NL80211_PMSR_FTM_RESP_ATTR - 1
+};
+
+/**
+ * enum nl80211_obss_pd_attributes - OBSS packet detection attributes
+ * @__NL80211_HE_OBSS_PD_ATTR_INVALID: Invalid
+ *
+ * @NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET: the OBSS PD minimum tx power offset.
+ * @NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET: the OBSS PD maximum tx power offset.
+ *
+ * @__NL80211_HE_OBSS_PD_ATTR_LAST: Internal
+ * @NL80211_HE_OBSS_PD_ATTR_MAX: highest OBSS PD attribute.
+ */
+enum nl80211_obss_pd_attributes {
+ __NL80211_HE_OBSS_PD_ATTR_INVALID,
+
+ NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET,
+ NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET,
+
+ /* keep last */
+ __NL80211_HE_OBSS_PD_ATTR_LAST,
+ NL80211_HE_OBSS_PD_ATTR_MAX = __NL80211_HE_OBSS_PD_ATTR_LAST - 1,
+};
+
+
+#endif /* __LINUX_NL80211_H */
diff --git a/src/shared/local-addresses.c b/src/shared/local-addresses.c
new file mode 100644
index 0000000..2c860f7
--- /dev/null
+++ b/src/shared/local-addresses.c
@@ -0,0 +1,315 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-netlink.h"
+
+#include "alloc-util.h"
+#include "local-addresses.h"
+#include "macro.h"
+#include "netlink-util.h"
+#include "sort-util.h"
+
+static int address_compare(const struct local_address *a, const struct local_address *b) {
+ int r;
+
+ /* Order lowest scope first, IPv4 before IPv6, lowest interface index first */
+
+ if (a->family == AF_INET && b->family == AF_INET6)
+ return -1;
+ if (a->family == AF_INET6 && b->family == AF_INET)
+ return 1;
+
+ r = CMP(a->scope, b->scope);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->metric, b->metric);
+ if (r != 0)
+ return r;
+
+ r = CMP(a->ifindex, b->ifindex);
+ if (r != 0)
+ return r;
+
+ return memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
+}
+
+int local_addresses(sd_netlink *context, int ifindex, int af, struct local_address **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_free_ struct local_address *list = NULL;
+ size_t n_list = 0, n_allocated = 0;
+ sd_netlink_message *m;
+ int r;
+
+ if (context)
+ rtnl = sd_netlink_ref(context);
+ else {
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_addr(rtnl, &req, RTM_GETADDR, 0, af);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (m = reply; m; m = sd_netlink_message_next(m)) {
+ struct local_address *a;
+ unsigned char flags;
+ uint16_t type;
+ int ifi, family;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+ if (type != RTM_NEWADDR)
+ continue;
+
+ r = sd_rtnl_message_addr_get_ifindex(m, &ifi);
+ if (r < 0)
+ return r;
+ if (ifindex > 0 && ifi != ifindex)
+ continue;
+
+ r = sd_rtnl_message_addr_get_family(m, &family);
+ if (r < 0)
+ return r;
+ if (af != AF_UNSPEC && af != family)
+ continue;
+
+ r = sd_rtnl_message_addr_get_flags(m, &flags);
+ if (r < 0)
+ return r;
+ if (flags & IFA_F_DEPRECATED)
+ continue;
+
+ if (!GREEDY_REALLOC0(list, n_allocated, n_list+1))
+ return -ENOMEM;
+
+ a = list + n_list;
+
+ r = sd_rtnl_message_addr_get_scope(m, &a->scope);
+ if (r < 0)
+ return r;
+
+ if (ifindex == 0 && IN_SET(a->scope, RT_SCOPE_HOST, RT_SCOPE_NOWHERE))
+ continue;
+
+ switch (family) {
+
+ case AF_INET:
+ r = sd_netlink_message_read_in_addr(m, IFA_LOCAL, &a->address.in);
+ if (r < 0) {
+ r = sd_netlink_message_read_in_addr(m, IFA_ADDRESS, &a->address.in);
+ if (r < 0)
+ continue;
+ }
+ break;
+
+ case AF_INET6:
+ r = sd_netlink_message_read_in6_addr(m, IFA_LOCAL, &a->address.in6);
+ if (r < 0) {
+ r = sd_netlink_message_read_in6_addr(m, IFA_ADDRESS, &a->address.in6);
+ if (r < 0)
+ continue;
+ }
+ break;
+
+ default:
+ continue;
+ }
+
+ a->ifindex = ifi;
+ a->family = family;
+
+ n_list++;
+ };
+
+ if (ret) {
+ typesafe_qsort(list, n_list, address_compare);
+ *ret = TAKE_PTR(list);
+ }
+
+ return (int) n_list;
+}
+
+static int add_local_gateway(
+ struct local_address **list,
+ size_t *n_list,
+ size_t *n_allocated,
+ int af,
+ int ifindex,
+ uint32_t metric,
+ const RouteVia *via) {
+
+ assert(list);
+ assert(n_list);
+ assert(n_allocated);
+ assert(via);
+
+ if (af != AF_UNSPEC && af != via->family)
+ return 0;
+
+ if (!GREEDY_REALLOC(*list, *n_allocated, *n_list + 1))
+ return -ENOMEM;
+
+ (*list)[(*n_list)++] = (struct local_address) {
+ .ifindex = ifindex,
+ .metric = metric,
+ .family = via->family,
+ .address = via->address,
+ };
+
+ return 0;
+}
+
+int local_gateways(sd_netlink *context, int ifindex, int af, struct local_address **ret) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
+ _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+ _cleanup_free_ struct local_address *list = NULL;
+ size_t n_list = 0, n_allocated = 0;
+ int r;
+
+ if (context)
+ rtnl = sd_netlink_ref(context);
+ else {
+ r = sd_netlink_open(&rtnl);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_rtnl_message_new_route(rtnl, &req, RTM_GETROUTE, af, RTPROT_UNSPEC);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_request_dump(req, true);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_call(rtnl, req, 0, &reply);
+ if (r < 0)
+ return r;
+
+ for (sd_netlink_message *m = reply; m; m = sd_netlink_message_next(m)) {
+ _cleanup_ordered_set_free_free_ OrderedSet *multipath_routes = NULL;
+ _cleanup_free_ void *rta_multipath = NULL;
+ union in_addr_union gateway;
+ uint16_t type;
+ unsigned char dst_len, src_len, table;
+ uint32_t ifi, metric = 0;
+ size_t rta_len;
+ int family;
+ RouteVia via;
+
+ r = sd_netlink_message_get_errno(m);
+ if (r < 0)
+ return r;
+
+ r = sd_netlink_message_get_type(m, &type);
+ if (r < 0)
+ return r;
+ if (type != RTM_NEWROUTE)
+ continue;
+
+ /* We only care for default routes */
+ r = sd_rtnl_message_route_get_dst_prefixlen(m, &dst_len);
+ if (r < 0)
+ return r;
+ if (dst_len != 0)
+ continue;
+
+ r = sd_rtnl_message_route_get_src_prefixlen(m, &src_len);
+ if (r < 0)
+ return r;
+ if (src_len != 0)
+ continue;
+
+ r = sd_rtnl_message_route_get_table(m, &table);
+ if (r < 0)
+ return r;
+ if (table != RT_TABLE_MAIN)
+ continue;
+
+ r = sd_netlink_message_read_u32(m, RTA_PRIORITY, &metric);
+ if (r < 0 && r != -ENODATA)
+ return r;
+
+ r = sd_rtnl_message_route_get_family(m, &family);
+ if (r < 0)
+ return r;
+ if (!IN_SET(family, AF_INET, AF_INET6))
+ continue;
+
+ r = sd_netlink_message_read_u32(m, RTA_OIF, &ifi);
+ if (r < 0 && r != -ENODATA)
+ return r;
+ if (r >= 0) {
+ if (ifi <= 0)
+ return -EINVAL;
+ if (ifindex > 0 && (int) ifi != ifindex)
+ continue;
+
+ r = netlink_message_read_in_addr_union(m, RTA_GATEWAY, family, &gateway);
+ if (r < 0 && r != -ENODATA)
+ return r;
+ if (r >= 0) {
+ via.family = family;
+ via.address = gateway;
+ r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &via);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+
+ if (family != AF_INET)
+ continue;
+
+ r = sd_netlink_message_read(m, RTA_VIA, sizeof(via), &via);
+ if (r < 0 && r != -ENODATA)
+ return r;
+ if (r >= 0) {
+ r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &via);
+ if (r < 0)
+ return r;
+
+ continue;
+ }
+ }
+
+ r = sd_netlink_message_read_data(m, RTA_MULTIPATH, &rta_len, &rta_multipath);
+ if (r < 0 && r != -ENODATA)
+ return r;
+ if (r >= 0) {
+ MultipathRoute *mr;
+
+ r = rtattr_read_nexthop(rta_multipath, rta_len, family, &multipath_routes);
+ if (r < 0)
+ return r;
+
+ ORDERED_SET_FOREACH(mr, multipath_routes) {
+ if (ifindex > 0 && mr->ifindex != ifindex)
+ continue;
+
+ r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &mr->gateway);
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ if (ret) {
+ typesafe_qsort(list, n_list, address_compare);
+ *ret = TAKE_PTR(list);
+ }
+
+ return (int) n_list;
+}
diff --git a/src/shared/local-addresses.h b/src/shared/local-addresses.h
new file mode 100644
index 0000000..c633995
--- /dev/null
+++ b/src/shared/local-addresses.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "in-addr-util.h"
+
+struct local_address {
+ int family, ifindex;
+ unsigned char scope;
+ uint32_t metric;
+ union in_addr_union address;
+};
+
+int local_addresses(sd_netlink *rtnl, int ifindex, int af, struct local_address **ret);
+
+int local_gateways(sd_netlink *rtnl, int ifindex, int af, struct local_address **ret);
diff --git a/src/shared/lockfile-util.c b/src/shared/lockfile-util.c
new file mode 100644
index 0000000..6f059ab
--- /dev/null
+++ b/src/shared/lockfile-util.c
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "lockfile-util.h"
+#include "macro.h"
+#include "missing_fcntl.h"
+#include "path-util.h"
+
+int make_lock_file(const char *p, int operation, LockFile *ret) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ char *t = NULL;
+ int r;
+
+ /*
+ * We use UNPOSIX locks if they are available. They have nice
+ * semantics, and are mostly compatible with NFS. However,
+ * they are only available on new kernels. When we detect we
+ * are running on an older kernel, then we fall back to good
+ * old BSD locks. They also have nice semantics, but are
+ * slightly problematic on NFS, where they are upgraded to
+ * POSIX locks, even though locally they are orthogonal to
+ * POSIX locks.
+ */
+
+ t = strdup(p);
+ if (!t)
+ return -ENOMEM;
+
+ for (;;) {
+ struct flock fl = {
+ .l_type = (operation & ~LOCK_NB) == LOCK_EX ? F_WRLCK : F_RDLCK,
+ .l_whence = SEEK_SET,
+ };
+ struct stat st;
+
+ fd = open(p, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600);
+ if (fd < 0)
+ return -errno;
+
+ r = fcntl(fd, (operation & LOCK_NB) ? F_OFD_SETLK : F_OFD_SETLKW, &fl);
+ if (r < 0) {
+
+ /* If the kernel is too old, use good old BSD locks */
+ if (errno == EINVAL)
+ r = flock(fd, operation);
+
+ if (r < 0)
+ return errno == EAGAIN ? -EBUSY : -errno;
+ }
+
+ /* If we acquired the lock, let's check if the file
+ * still exists in the file system. If not, then the
+ * previous exclusive owner removed it and then closed
+ * it. In such a case our acquired lock is worthless,
+ * hence try again. */
+
+ r = fstat(fd, &st);
+ if (r < 0)
+ return -errno;
+ if (st.st_nlink > 0)
+ break;
+
+ fd = safe_close(fd);
+ }
+
+ ret->path = t;
+ ret->fd = fd;
+ ret->operation = operation;
+
+ fd = -1;
+ t = NULL;
+
+ return r;
+}
+
+int make_lock_file_for(const char *p, int operation, LockFile *ret) {
+ const char *fn;
+ char *t;
+
+ assert(p);
+ assert(ret);
+
+ fn = basename(p);
+ if (!filename_is_valid(fn))
+ return -EINVAL;
+
+ t = newa(char, strlen(p) + 2 + 4 + 1);
+ stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), fn), ".lck");
+
+ return make_lock_file(t, operation, ret);
+}
+
+void release_lock_file(LockFile *f) {
+ int r;
+
+ if (!f)
+ return;
+
+ if (f->path) {
+
+ /* If we are the exclusive owner we can safely delete
+ * the lock file itself. If we are not the exclusive
+ * owner, we can try becoming it. */
+
+ if (f->fd >= 0 &&
+ (f->operation & ~LOCK_NB) == LOCK_SH) {
+ static const struct flock fl = {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ };
+
+ r = fcntl(f->fd, F_OFD_SETLK, &fl);
+ if (r < 0 && errno == EINVAL)
+ r = flock(f->fd, LOCK_EX|LOCK_NB);
+
+ if (r >= 0)
+ f->operation = LOCK_EX|LOCK_NB;
+ }
+
+ if ((f->operation & ~LOCK_NB) == LOCK_EX)
+ unlink_noerrno(f->path);
+
+ f->path = mfree(f->path);
+ }
+
+ f->fd = safe_close(f->fd);
+ f->operation = 0;
+}
diff --git a/src/shared/lockfile-util.h b/src/shared/lockfile-util.h
new file mode 100644
index 0000000..3606327
--- /dev/null
+++ b/src/shared/lockfile-util.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef struct LockFile {
+ char *path;
+ int fd;
+ int operation;
+} LockFile;
+
+int make_lock_file(const char *p, int operation, LockFile *ret);
+int make_lock_file_for(const char *p, int operation, LockFile *ret);
+void release_lock_file(LockFile *f);
+
+#define LOCK_FILE_INIT { .fd = -1, .path = NULL }
diff --git a/src/shared/log-link.h b/src/shared/log-link.h
new file mode 100644
index 0000000..bb692e0
--- /dev/null
+++ b/src/shared/log-link.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "log.h"
+
+/*
+ * The following macros append INTERFACE= to the message.
+ * The macros require a struct named 'Link' which contains 'char *ifname':
+ *
+ * typedef struct Link {
+ * char *ifname;
+ * } Link;
+ *
+ * See, network/networkd-link.h for example.
+ */
+
+#define log_link_full_errno(link, level, error, ...) \
+ ({ \
+ const Link *_l = (link); \
+ (_l && _l->ifname) ? log_object_internal(level, error, PROJECT_FILE, __LINE__, __func__, "INTERFACE=", _l->ifname, NULL, NULL, ##__VA_ARGS__) : \
+ log_internal(level, error, PROJECT_FILE, __LINE__, __func__, ##__VA_ARGS__); \
+ }) \
+
+#define log_link_full(link, level, ...) (void) log_link_full_errno(link, level, 0, __VA_ARGS__)
+
+#define log_link_debug(link, ...) log_link_full_errno(link, LOG_DEBUG, 0, __VA_ARGS__)
+#define log_link_info(link, ...) log_link_full(link, LOG_INFO, __VA_ARGS__)
+#define log_link_notice(link, ...) log_link_full(link, LOG_NOTICE, __VA_ARGS__)
+#define log_link_warning(link, ...) log_link_full(link, LOG_WARNING, __VA_ARGS__)
+#define log_link_error(link, ...) log_link_full(link, LOG_ERR, __VA_ARGS__)
+
+#define log_link_debug_errno(link, error, ...) log_link_full_errno(link, LOG_DEBUG, error, __VA_ARGS__)
+#define log_link_info_errno(link, error, ...) log_link_full_errno(link, LOG_INFO, error, __VA_ARGS__)
+#define log_link_notice_errno(link, error, ...) log_link_full_errno(link, LOG_NOTICE, error, __VA_ARGS__)
+#define log_link_warning_errno(link, error, ...) log_link_full_errno(link, LOG_WARNING, error, __VA_ARGS__)
+#define log_link_error_errno(link, error, ...) log_link_full_errno(link, LOG_ERR, error, __VA_ARGS__)
+
+#define LOG_LINK_MESSAGE(link, fmt, ...) "MESSAGE=%s: " fmt, (link)->ifname, ##__VA_ARGS__
+#define LOG_LINK_INTERFACE(link) "INTERFACE=%s", (link)->ifname
diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c
new file mode 100644
index 0000000..840f221
--- /dev/null
+++ b/src/shared/logs-show.c
@@ -0,0 +1,1672 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "sd-id128.h"
+#include "sd-journal.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "format-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "io-util.h"
+#include "journal-internal.h"
+#include "journal-util.h"
+#include "json.h"
+#include "locale-util.h"
+#include "log.h"
+#include "logs-show.h"
+#include "macro.h"
+#include "namespace-util.h"
+#include "output-mode.h"
+#include "parse-util.h"
+#include "pretty-print.h"
+#include "process-util.h"
+#include "sparse-endian.h"
+#include "stdio-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "util.h"
+#include "web-util.h"
+
+/* up to three lines (each up to 100 characters) or 300 characters, whichever is less */
+#define PRINT_LINE_THRESHOLD 3
+#define PRINT_CHAR_THRESHOLD 300
+
+#define JSON_THRESHOLD 4096U
+
+static int print_catalog(FILE *f, sd_journal *j) {
+ _cleanup_free_ char *t = NULL, *z = NULL;
+ const char *newline, *prefix;
+ int r;
+
+ assert(j);
+
+ r = sd_journal_get_catalog(j, &t);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to find catalog entry: %m");
+
+ if (is_locale_utf8())
+ prefix = strjoina(special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), special_glyph(SPECIAL_GLYPH_LIGHT_SHADE));
+ else
+ prefix = "--";
+
+ if (colors_enabled())
+ newline = strjoina(ANSI_NORMAL "\n" ANSI_GREY, prefix, ANSI_NORMAL " " ANSI_GREEN);
+ else
+ newline = strjoina("\n", prefix, " ");
+
+ z = strreplace(strstrip(t), "\n", newline);
+ if (!z)
+ return log_oom();
+
+ if (colors_enabled())
+ fprintf(f, ANSI_GREY "%s" ANSI_NORMAL " " ANSI_GREEN, prefix);
+ else
+ fprintf(f, "%s ", prefix);
+
+ fputs(z, f);
+
+ if (colors_enabled())
+ fputs(ANSI_NORMAL "\n", f);
+ else
+ fputc('\n', f);
+
+ return 1;
+}
+
+static int url_from_catalog(sd_journal *j, char **ret) {
+ _cleanup_free_ char *t = NULL, *url = NULL;
+ const char *weblink;
+ int r;
+
+ assert(j);
+ assert(ret);
+
+ r = sd_journal_get_catalog(j, &t);
+ if (r == -ENOENT)
+ goto notfound;
+ if (r < 0)
+ return log_error_errno(r, "Failed to find catalog entry: %m");
+
+ weblink = startswith(t, "Documentation:");
+ if (!weblink) {
+ weblink = strstr(t + 1, "\nDocumentation:");
+ if (!weblink)
+ goto notfound;
+
+ weblink += 15;
+ }
+
+ /* Skip whitespace to value */
+ weblink += strspn(weblink, " \t");
+
+ /* Cut out till next whitespace/newline */
+ url = strndup(weblink, strcspn(weblink, WHITESPACE));
+ if (!url)
+ return log_oom();
+
+ if (!documentation_url_is_valid(url))
+ goto notfound;
+
+ *ret = TAKE_PTR(url);
+ return 1;
+
+notfound:
+ *ret = NULL;
+ return 0;
+}
+
+static int parse_field(const void *data, size_t length, const char *field, size_t field_len, char **target, size_t *target_len) {
+ size_t nl;
+ char *buf;
+
+ assert(data);
+ assert(field);
+ assert(target);
+
+ if (length < field_len)
+ return 0;
+
+ if (memcmp(data, field, field_len))
+ return 0;
+
+ nl = length - field_len;
+
+ buf = newdup_suffix0(char, (const char*) data + field_len, nl);
+ if (!buf)
+ return log_oom();
+
+ free(*target);
+ *target = buf;
+
+ if (target_len)
+ *target_len = nl;
+
+ return 1;
+}
+
+typedef struct ParseFieldVec {
+ const char *field;
+ size_t field_len;
+ char **target;
+ size_t *target_len;
+} ParseFieldVec;
+
+#define PARSE_FIELD_VEC_ENTRY(_field, _target, _target_len) { \
+ .field = _field, \
+ .field_len = strlen(_field), \
+ .target = _target, \
+ .target_len = _target_len \
+ }
+
+static int parse_fieldv(const void *data, size_t length, const ParseFieldVec *fields, unsigned n_fields) {
+ unsigned i;
+
+ for (i = 0; i < n_fields; i++) {
+ const ParseFieldVec *f = &fields[i];
+ int r;
+
+ r = parse_field(data, length, f->field, f->field_len, f->target, f->target_len);
+ if (r < 0)
+ return r;
+ else if (r > 0)
+ break;
+ }
+
+ return 0;
+}
+
+static int field_set_test(const Set *fields, const char *name, size_t n) {
+ char *s;
+
+ if (!fields)
+ return 1;
+
+ s = strndupa(name, n);
+ return set_contains(fields, s);
+}
+
+static bool shall_print(const char *p, size_t l, OutputFlags flags) {
+ assert(p);
+
+ if (flags & OUTPUT_SHOW_ALL)
+ return true;
+
+ if (l >= PRINT_CHAR_THRESHOLD)
+ return false;
+
+ if (!utf8_is_printable(p, l))
+ return false;
+
+ return true;
+}
+
+static bool print_multiline(
+ FILE *f,
+ unsigned prefix,
+ unsigned n_columns,
+ OutputFlags flags,
+ int priority,
+ bool audit,
+ const char* message,
+ size_t message_len,
+ size_t highlight[2]) {
+
+ const char *color_on = "", *color_off = "", *highlight_on = "";
+ const char *pos, *end;
+ bool ellipsized = false;
+ int line = 0;
+
+ if (flags & OUTPUT_COLOR) {
+ get_log_colors(priority, &color_on, &color_off, &highlight_on);
+
+ if (audit && strempty(color_on)) {
+ color_on = ANSI_BLUE;
+ color_off = ANSI_NORMAL;
+ }
+ }
+
+ /* A special case: make sure that we print a newline when
+ the message is empty. */
+ if (message_len == 0)
+ fputs("\n", f);
+
+ for (pos = message;
+ pos < message + message_len;
+ pos = end + 1, line++) {
+ bool continuation = line > 0;
+ bool tail_line;
+ int len;
+ for (end = pos; end < message + message_len && *end != '\n'; end++)
+ ;
+ len = end - pos;
+ assert(len >= 0);
+
+ /* We need to figure out when we are showing not-last line, *and*
+ * will skip subsequent lines. In that case, we will put the dots
+ * at the end of the line, instead of putting dots in the middle
+ * or not at all.
+ */
+ tail_line =
+ line + 1 == PRINT_LINE_THRESHOLD ||
+ end + 1 >= message + PRINT_CHAR_THRESHOLD;
+
+ if (flags & (OUTPUT_FULL_WIDTH | OUTPUT_SHOW_ALL) ||
+ (prefix + len + 1 < n_columns && !tail_line)) {
+ if (highlight &&
+ (size_t) (pos - message) <= highlight[0] &&
+ highlight[0] < (size_t) len) {
+
+ fprintf(f, "%*s%s%.*s",
+ continuation * prefix, "",
+ color_on, (int) highlight[0], pos);
+ fprintf(f, "%s%.*s",
+ highlight_on,
+ (int) (MIN((size_t) len, highlight[1]) - highlight[0]),
+ pos + highlight[0]);
+ if ((size_t) len > highlight[1])
+ fprintf(f, "%s%.*s",
+ color_on,
+ (int) (len - highlight[1]),
+ pos + highlight[1]);
+ fprintf(f, "%s\n", color_off);
+
+ } else
+ fprintf(f, "%*s%s%.*s%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ continue;
+ }
+
+ /* Beyond this point, ellipsization will happen. */
+ ellipsized = true;
+
+ if (prefix < n_columns && n_columns - prefix >= 3) {
+ if (n_columns - prefix > (unsigned) len + 3)
+ fprintf(f, "%*s%s%.*s...%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ else {
+ _cleanup_free_ char *e;
+
+ e = ellipsize_mem(pos, len, n_columns - prefix,
+ tail_line ? 100 : 90);
+ if (!e)
+ fprintf(f, "%*s%s%.*s%s\n",
+ continuation * prefix, "",
+ color_on, len, pos, color_off);
+ else
+ fprintf(f, "%*s%s%s%s\n",
+ continuation * prefix, "",
+ color_on, e, color_off);
+ }
+ } else
+ fputs("...\n", f);
+
+ if (tail_line)
+ break;
+ }
+
+ return ellipsized;
+}
+
+static int output_timestamp_monotonic(FILE *f, sd_journal *j, const char *monotonic) {
+ sd_id128_t boot_id;
+ uint64_t t;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ r = -ENXIO;
+ if (monotonic)
+ r = safe_atou64(monotonic, &t);
+ if (r < 0)
+ r = sd_journal_get_monotonic_usec(j, &t, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ fprintf(f, "[%5"PRI_USEC".%06"PRI_USEC"]", t / USEC_PER_SEC, t % USEC_PER_SEC);
+ return 1 + 5 + 1 + 6 + 1;
+}
+
+static int output_timestamp_realtime(FILE *f, sd_journal *j, OutputMode mode, OutputFlags flags, const char *realtime) {
+ char buf[MAX(FORMAT_TIMESTAMP_MAX, 64U)];
+ uint64_t x;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ if (realtime)
+ r = safe_atou64(realtime, &x);
+ if (!realtime || r < 0 || !VALID_REALTIME(x))
+ r = sd_journal_get_realtime_usec(j, &x);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ if (IN_SET(mode, OUTPUT_SHORT_FULL, OUTPUT_WITH_UNIT)) {
+ const char *k;
+
+ if (flags & OUTPUT_UTC)
+ k = format_timestamp_style(buf, sizeof(buf), x, TIMESTAMP_UTC);
+ else
+ k = format_timestamp(buf, sizeof(buf), x);
+ if (!k)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format timestamp: %" PRIu64, x);
+
+ } else {
+ struct tm tm;
+ time_t t;
+
+ t = (time_t) (x / USEC_PER_SEC);
+
+ switch (mode) {
+
+ case OUTPUT_SHORT_UNIX:
+ xsprintf(buf, "%10"PRI_TIME".%06"PRIu64, t, x % USEC_PER_SEC);
+ break;
+
+ case OUTPUT_SHORT_ISO:
+ if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S%z",
+ localtime_or_gmtime_r(&t, &tm, flags & OUTPUT_UTC)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format ISO time");
+ break;
+
+ case OUTPUT_SHORT_ISO_PRECISE: {
+ char usec[7];
+
+ /* No usec in strftime, so we leave space and copy over */
+ if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S.xxxxxx%z",
+ localtime_or_gmtime_r(&t, &tm, flags & OUTPUT_UTC)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format ISO-precise time");
+ xsprintf(usec, "%06"PRI_USEC, x % USEC_PER_SEC);
+ memcpy(buf + 20, usec, 6);
+ break;
+ }
+ case OUTPUT_SHORT:
+ case OUTPUT_SHORT_PRECISE:
+
+ if (strftime(buf, sizeof(buf), "%b %d %H:%M:%S",
+ localtime_or_gmtime_r(&t, &tm, flags & OUTPUT_UTC)) <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format syslog time");
+
+ if (mode == OUTPUT_SHORT_PRECISE) {
+ size_t k;
+
+ assert(sizeof(buf) > strlen(buf));
+ k = sizeof(buf) - strlen(buf);
+
+ r = snprintf(buf + strlen(buf), k, ".%06"PRIu64, x % USEC_PER_SEC);
+ if (r <= 0 || (size_t) r >= k) /* too long? */
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to format precise time");
+ }
+ break;
+
+ default:
+ assert_not_reached("Unknown time format");
+ }
+ }
+
+ fputs(buf, f);
+ return (int) strlen(buf);
+}
+
+static int output_short(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) {
+
+ int r;
+ const void *data;
+ size_t length, n = 0;
+ _cleanup_free_ char *hostname = NULL, *identifier = NULL, *comm = NULL, *pid = NULL, *fake_pid = NULL,
+ *message = NULL, *realtime = NULL, *monotonic = NULL, *priority = NULL, *transport = NULL,
+ *config_file = NULL, *unit = NULL, *user_unit = NULL, *documentation_url = NULL;
+ size_t hostname_len = 0, identifier_len = 0, comm_len = 0, pid_len = 0, fake_pid_len = 0, message_len = 0,
+ realtime_len = 0, monotonic_len = 0, priority_len = 0, transport_len = 0, config_file_len = 0,
+ unit_len = 0, user_unit_len = 0, documentation_url_len = 0;
+ int p = LOG_INFO;
+ bool ellipsized = false, audit;
+ const ParseFieldVec fields[] = {
+ PARSE_FIELD_VEC_ENTRY("_PID=", &pid, &pid_len),
+ PARSE_FIELD_VEC_ENTRY("_COMM=", &comm, &comm_len),
+ PARSE_FIELD_VEC_ENTRY("MESSAGE=", &message, &message_len),
+ PARSE_FIELD_VEC_ENTRY("PRIORITY=", &priority, &priority_len),
+ PARSE_FIELD_VEC_ENTRY("_TRANSPORT=", &transport, &transport_len),
+ PARSE_FIELD_VEC_ENTRY("_HOSTNAME=", &hostname, &hostname_len),
+ PARSE_FIELD_VEC_ENTRY("SYSLOG_PID=", &fake_pid, &fake_pid_len),
+ PARSE_FIELD_VEC_ENTRY("SYSLOG_IDENTIFIER=", &identifier, &identifier_len),
+ PARSE_FIELD_VEC_ENTRY("_SOURCE_REALTIME_TIMESTAMP=", &realtime, &realtime_len),
+ PARSE_FIELD_VEC_ENTRY("_SOURCE_MONOTONIC_TIMESTAMP=", &monotonic, &monotonic_len),
+ PARSE_FIELD_VEC_ENTRY("CONFIG_FILE=", &config_file, &config_file_len),
+ PARSE_FIELD_VEC_ENTRY("_SYSTEMD_UNIT=", &unit, &unit_len),
+ PARSE_FIELD_VEC_ENTRY("_SYSTEMD_USER_UNIT=", &user_unit, &user_unit_len),
+ PARSE_FIELD_VEC_ENTRY("DOCUMENTATION=", &documentation_url, &documentation_url_len),
+ };
+ size_t highlight_shifted[] = {highlight ? highlight[0] : 0, highlight ? highlight[1] : 0};
+
+ assert(f);
+ assert(j);
+
+ /* Set the threshold to one bigger than the actual print
+ * threshold, so that if the line is actually longer than what
+ * we're willing to print, ellipsization will occur. This way
+ * we won't output a misleading line without any indication of
+ * truncation.
+ */
+ sd_journal_set_data_threshold(j, flags & (OUTPUT_SHOW_ALL|OUTPUT_FULL_WIDTH) ? 0 : PRINT_CHAR_THRESHOLD + 1);
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ r = parse_fieldv(data, length, fields, ELEMENTSOF(fields));
+ if (r < 0)
+ return r;
+ }
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+ if (r < 0)
+ return log_error_errno(r, "Failed to get journal fields: %m");
+
+ if (!message) {
+ log_debug("Skipping message without MESSAGE= field.");
+ return 0;
+ }
+
+ if (!(flags & OUTPUT_SHOW_ALL))
+ strip_tab_ansi(&message, &message_len, highlight_shifted);
+
+ if (priority_len == 1 && *priority >= '0' && *priority <= '7')
+ p = *priority - '0';
+
+ audit = streq_ptr(transport, "audit");
+
+ if (mode == OUTPUT_SHORT_MONOTONIC)
+ r = output_timestamp_monotonic(f, j, monotonic);
+ else
+ r = output_timestamp_realtime(f, j, mode, flags, realtime);
+ if (r < 0)
+ return r;
+ n += r;
+
+ if (flags & OUTPUT_NO_HOSTNAME) {
+ /* Suppress display of the hostname if this is requested. */
+ hostname = mfree(hostname);
+ hostname_len = 0;
+ }
+
+ if (hostname && shall_print(hostname, hostname_len, flags)) {
+ fprintf(f, " %.*s", (int) hostname_len, hostname);
+ n += hostname_len + 1;
+ }
+
+ if (mode == OUTPUT_WITH_UNIT && ((unit && shall_print(unit, unit_len, flags)) ||
+ (user_unit && shall_print(user_unit, user_unit_len, flags)))) {
+ if (unit) {
+ fprintf(f, " %.*s", (int) unit_len, unit);
+ n += unit_len + 1;
+ }
+ if (user_unit) {
+ if (unit)
+ fprintf(f, "/%.*s", (int) user_unit_len, user_unit);
+ else
+ fprintf(f, " %.*s", (int) user_unit_len, user_unit);
+ n += unit_len + 1;
+ }
+ } else if (identifier && shall_print(identifier, identifier_len, flags)) {
+ fprintf(f, " %.*s", (int) identifier_len, identifier);
+ n += identifier_len + 1;
+ } else if (comm && shall_print(comm, comm_len, flags)) {
+ fprintf(f, " %.*s", (int) comm_len, comm);
+ n += comm_len + 1;
+ } else
+ fputs(" unknown", f);
+
+ if (pid && shall_print(pid, pid_len, flags)) {
+ fprintf(f, "[%.*s]", (int) pid_len, pid);
+ n += pid_len + 2;
+ } else if (fake_pid && shall_print(fake_pid, fake_pid_len, flags)) {
+ fprintf(f, "[%.*s]", (int) fake_pid_len, fake_pid);
+ n += fake_pid_len + 2;
+ }
+
+ fputs(": ", f);
+
+ if (urlify_enabled()) {
+ _cleanup_free_ char *c = NULL;
+
+ /* Insert a hyperlink to a documentation URL before the message. Note that we don't make the
+ * whole message a hyperlink, since otherwise the whole screen might end up being just
+ * hyperlinks. Moreover, we want to be able to highlight parts of the message (such as the
+ * config file, see below) hence let's keep the documentation URL link separate. */
+
+ if (documentation_url && shall_print(documentation_url, documentation_url_len, flags)) {
+ c = strndup(documentation_url, documentation_url_len);
+ if (!c)
+ return log_oom();
+
+ if (!documentation_url_is_valid(c)) /* Eat up invalid links */
+ c = mfree(c);
+ }
+
+ if (!c)
+ (void) url_from_catalog(j, &c); /* Acquire from catalog if not embedded in log message itself */
+
+ if (c) {
+ _cleanup_free_ char *urlified = NULL;
+
+ if (terminal_urlify(c, special_glyph(SPECIAL_GLYPH_EXTERNAL_LINK), &urlified) >= 0) {
+ fputs(urlified, f);
+ fputc(' ', f);
+ }
+ }
+ }
+
+ if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(message, message_len)) {
+ char bytes[FORMAT_BYTES_MAX];
+ fprintf(f, "[%s blob data]\n", format_bytes(bytes, sizeof(bytes), message_len));
+ } else {
+
+ /* URLify config_file string in message, if the message starts with it.
+ * Skip URLification if the highlighted pattern overlaps. */
+ if (config_file &&
+ message_len >= config_file_len &&
+ memcmp(message, config_file, config_file_len) == 0 &&
+ (message_len == config_file_len || IN_SET(message[config_file_len], ':', ' ')) &&
+ (!highlight || highlight_shifted[0] == 0 || highlight_shifted[0] > config_file_len)) {
+
+ _cleanup_free_ char *t = NULL, *urlified = NULL;
+
+ t = strndup(config_file, config_file_len);
+ if (t && terminal_urlify_path(t, NULL, &urlified) >= 0) {
+ size_t urlified_len = strlen(urlified);
+ size_t shift = urlified_len - config_file_len;
+ char *joined;
+
+ joined = realloc(urlified, message_len + shift);
+ if (joined) {
+ memcpy(joined + urlified_len, message + config_file_len, message_len - config_file_len);
+ free_and_replace(message, joined);
+ TAKE_PTR(urlified);
+ message_len += shift;
+ if (highlight) {
+ highlight_shifted[0] += shift;
+ highlight_shifted[1] += shift;
+ }
+ }
+ }
+ }
+
+ ellipsized |=
+ print_multiline(f, n + 2, n_columns, flags, p, audit,
+ message, message_len,
+ highlight_shifted);
+ }
+
+ if (flags & OUTPUT_CATALOG)
+ (void) print_catalog(f, j);
+
+ return ellipsized;
+}
+
+static int output_verbose(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) {
+
+ const void *data;
+ size_t length;
+ _cleanup_free_ char *cursor = NULL;
+ uint64_t realtime = 0;
+ char ts[FORMAT_TIMESTAMP_MAX + 7];
+ const char *timestamp;
+ int r;
+
+ assert(f);
+ assert(j);
+
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_data(j, "_SOURCE_REALTIME_TIMESTAMP", &data, &length);
+ if (r == -ENOENT)
+ log_debug("Source realtime timestamp not found");
+ else if (r < 0)
+ return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get source realtime timestamp: %m");
+ else {
+ _cleanup_free_ char *value = NULL;
+
+ r = parse_field(data, length, "_SOURCE_REALTIME_TIMESTAMP=",
+ STRLEN("_SOURCE_REALTIME_TIMESTAMP="), &value,
+ NULL);
+ if (r < 0)
+ return r;
+ assert(r > 0);
+
+ r = safe_atou64(value, &realtime);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse realtime timestamp: %m");
+ }
+
+ if (r < 0) {
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get realtime timestamp: %m");
+ }
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ timestamp = format_timestamp_style(ts, sizeof ts, realtime,
+ flags & OUTPUT_UTC ? TIMESTAMP_US_UTC : TIMESTAMP_US);
+ fprintf(f, "%s [%s]\n",
+ timestamp ?: "(no timestamp)",
+ cursor);
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ const char *c, *p;
+ int fieldlen;
+ const char *on = "", *off = "";
+ _cleanup_free_ char *urlified = NULL;
+ size_t valuelen;
+
+ c = memchr(data, '=', length);
+ if (!c)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field.");
+
+ fieldlen = c - (const char*) data;
+ if (!journal_field_valid(data, fieldlen, true))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field.");
+
+ r = field_set_test(output_fields, data, fieldlen);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ continue;
+
+ valuelen = length - 1 - fieldlen;
+
+ if ((flags & OUTPUT_COLOR) && (p = startswith(data, "MESSAGE="))) {
+ on = ANSI_HIGHLIGHT;
+ off = ANSI_NORMAL;
+ } else if ((p = startswith(data, "CONFIG_FILE="))) {
+ if (terminal_urlify_path(p, NULL, &urlified) >= 0) {
+ p = urlified;
+ valuelen = strlen(urlified);
+ }
+ } else
+ p = c + 1;
+
+ if ((flags & OUTPUT_SHOW_ALL) ||
+ (((length < PRINT_CHAR_THRESHOLD) || flags & OUTPUT_FULL_WIDTH)
+ && utf8_is_printable(data, length))) {
+ fprintf(f, " %s%.*s=", on, fieldlen, (const char*)data);
+ print_multiline(f, 4 + fieldlen + 1, 0, OUTPUT_FULL_WIDTH, 0, false,
+ p, valuelen,
+ NULL);
+ fputs(off, f);
+ } else {
+ char bytes[FORMAT_BYTES_MAX];
+
+ fprintf(f, " %s%.*s=[%s blob data]%s\n",
+ on,
+ (int) (c - (const char*) data),
+ (const char*) data,
+ format_bytes(bytes, sizeof(bytes), length - (c - (const char *) data) - 1),
+ off);
+ }
+ }
+
+ if (r < 0)
+ return r;
+
+ if (flags & OUTPUT_CATALOG)
+ (void) print_catalog(f, j);
+
+ return 0;
+}
+
+static int output_export(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) {
+
+ sd_id128_t boot_id;
+ char sid[SD_ID128_STRING_MAX];
+ int r;
+ usec_t realtime, monotonic;
+ _cleanup_free_ char *cursor = NULL;
+ const void *data;
+ size_t length;
+
+ assert(j);
+
+ sd_journal_set_data_threshold(j, 0);
+
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ fprintf(f,
+ "__CURSOR=%s\n"
+ "__REALTIME_TIMESTAMP="USEC_FMT"\n"
+ "__MONOTONIC_TIMESTAMP="USEC_FMT"\n"
+ "_BOOT_ID=%s\n",
+ cursor,
+ realtime,
+ monotonic,
+ sd_id128_to_string(boot_id, sid));
+
+ JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) {
+ size_t fieldlen;
+ const char *c;
+
+ /* We already printed the boot id from the data in the header, hence let's suppress it here */
+ if (memory_startswith(data, length, "_BOOT_ID="))
+ continue;
+
+ c = memchr(data, '=', length);
+ if (!c)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field.");
+
+ fieldlen = c - (const char*) data;
+ if (!journal_field_valid(data, fieldlen, true))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field.");
+
+ r = field_set_test(output_fields, data, fieldlen);
+ if (r < 0)
+ return r;
+ if (!r)
+ continue;
+
+ if (utf8_is_printable_newline(data, length, false))
+ fwrite(data, length, 1, f);
+ else {
+ uint64_t le64;
+
+ fwrite(data, fieldlen, 1, f);
+ fputc('\n', f);
+ le64 = htole64(length - fieldlen - 1);
+ fwrite(&le64, sizeof(le64), 1, f);
+ fwrite(c + 1, length - fieldlen - 1, 1, f);
+ }
+
+ fputc('\n', f);
+ }
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+
+ if (r < 0)
+ return r;
+
+ fputc('\n', f);
+
+ return 0;
+}
+
+void json_escape(
+ FILE *f,
+ const char* p,
+ size_t l,
+ OutputFlags flags) {
+
+ assert(f);
+ assert(p);
+
+ if (!(flags & OUTPUT_SHOW_ALL) && l >= JSON_THRESHOLD)
+ fputs("null", f);
+
+ else if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(p, l)) {
+ bool not_first = false;
+
+ fputs("[ ", f);
+
+ while (l > 0) {
+ if (not_first)
+ fprintf(f, ", %u", (uint8_t) *p);
+ else {
+ not_first = true;
+ fprintf(f, "%u", (uint8_t) *p);
+ }
+
+ p++;
+ l--;
+ }
+
+ fputs(" ]", f);
+ } else {
+ fputc('"', f);
+
+ while (l > 0) {
+ if (IN_SET(*p, '"', '\\')) {
+ fputc('\\', f);
+ fputc(*p, f);
+ } else if (*p == '\n')
+ fputs("\\n", f);
+ else if ((uint8_t) *p < ' ')
+ fprintf(f, "\\u%04x", (uint8_t) *p);
+ else
+ fputc(*p, f);
+
+ p++;
+ l--;
+ }
+
+ fputc('"', f);
+ }
+}
+
+struct json_data {
+ JsonVariant* name;
+ size_t n_values;
+ JsonVariant* values[];
+};
+
+static int update_json_data(
+ Hashmap *h,
+ OutputFlags flags,
+ const char *name,
+ const void *value,
+ size_t size) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ struct json_data *d;
+ int r;
+
+ if (!(flags & OUTPUT_SHOW_ALL) && strlen(name) + 1 + size >= JSON_THRESHOLD)
+ r = json_variant_new_null(&v);
+ else if (utf8_is_printable(value, size))
+ r = json_variant_new_stringn(&v, value, size);
+ else
+ r = json_variant_new_array_bytes(&v, value, size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate JSON data: %m");
+
+ d = hashmap_get(h, name);
+ if (d) {
+ struct json_data *w;
+
+ w = realloc(d, offsetof(struct json_data, values) + sizeof(JsonVariant*) * (d->n_values + 1));
+ if (!w)
+ return log_oom();
+
+ d = w;
+ assert_se(hashmap_update(h, json_variant_string(d->name), d) >= 0);
+ } else {
+ _cleanup_(json_variant_unrefp) JsonVariant *n = NULL;
+
+ r = json_variant_new_string(&n, name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate JSON name variant: %m");
+
+ d = malloc0(offsetof(struct json_data, values) + sizeof(JsonVariant*));
+ if (!d)
+ return log_oom();
+
+ r = hashmap_put(h, json_variant_string(n), d);
+ if (r < 0) {
+ free(d);
+ return log_error_errno(r, "Failed to insert JSON name into hashmap: %m");
+ }
+
+ d->name = TAKE_PTR(n);
+ }
+
+ d->values[d->n_values++] = TAKE_PTR(v);
+ return 0;
+}
+
+static int update_json_data_split(
+ Hashmap *h,
+ OutputFlags flags,
+ const Set *output_fields,
+ const void *data,
+ size_t size) {
+
+ size_t fieldlen;
+ const char *eq;
+ char *name;
+
+ assert(h);
+ assert(data || size == 0);
+
+ if (memory_startswith(data, size, "_BOOT_ID="))
+ return 0;
+
+ eq = memchr(data, '=', MIN(size, JSON_THRESHOLD));
+ if (!eq)
+ return 0;
+
+ fieldlen = eq - (const char*) data;
+ if (!journal_field_valid(data, fieldlen, true))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid field.");
+
+ name = strndupa(data, fieldlen);
+ if (output_fields && !set_contains(output_fields, name))
+ return 0;
+
+ return update_json_data(h, flags, name, eq + 1, size - fieldlen - 1);
+}
+
+static int output_json(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) {
+
+ char sid[SD_ID128_STRING_MAX], usecbuf[DECIMAL_STR_MAX(usec_t)];
+ _cleanup_(json_variant_unrefp) JsonVariant *object = NULL;
+ _cleanup_free_ char *cursor = NULL;
+ uint64_t realtime, monotonic;
+ JsonVariant **array = NULL;
+ struct json_data *d;
+ sd_id128_t boot_id;
+ Hashmap *h = NULL;
+ size_t n = 0;
+ int r;
+
+ assert(j);
+
+ (void) sd_journal_set_data_threshold(j, flags & OUTPUT_SHOW_ALL ? 0 : JSON_THRESHOLD);
+
+ r = sd_journal_get_realtime_usec(j, &realtime);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get realtime timestamp: %m");
+
+ r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get monotonic timestamp: %m");
+
+ r = sd_journal_get_cursor(j, &cursor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get cursor: %m");
+
+ h = hashmap_new(&string_hash_ops);
+ if (!h)
+ return log_oom();
+
+ r = update_json_data(h, flags, "__CURSOR", cursor, strlen(cursor));
+ if (r < 0)
+ goto finish;
+
+ xsprintf(usecbuf, USEC_FMT, realtime);
+ r = update_json_data(h, flags, "__REALTIME_TIMESTAMP", usecbuf, strlen(usecbuf));
+ if (r < 0)
+ goto finish;
+
+ xsprintf(usecbuf, USEC_FMT, monotonic);
+ r = update_json_data(h, flags, "__MONOTONIC_TIMESTAMP", usecbuf, strlen(usecbuf));
+ if (r < 0)
+ goto finish;
+
+ sd_id128_to_string(boot_id, sid);
+ r = update_json_data(h, flags, "_BOOT_ID", sid, strlen(sid));
+ if (r < 0)
+ goto finish;
+
+ for (;;) {
+ const void *data;
+ size_t size;
+
+ r = sd_journal_enumerate_data(j, &data, &size);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ r = 0;
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to read journal: %m");
+ goto finish;
+ }
+ if (r == 0)
+ break;
+
+ r = update_json_data_split(h, flags, output_fields, data, size);
+ if (r < 0)
+ goto finish;
+ }
+
+ array = new(JsonVariant*, hashmap_size(h)*2);
+ if (!array) {
+ r = log_oom();
+ goto finish;
+ }
+
+ HASHMAP_FOREACH(d, h) {
+ assert(d->n_values > 0);
+
+ array[n++] = json_variant_ref(d->name);
+
+ if (d->n_values == 1)
+ array[n++] = json_variant_ref(d->values[0]);
+ else {
+ _cleanup_(json_variant_unrefp) JsonVariant *q = NULL;
+
+ r = json_variant_new_array(&q, d->values, d->n_values);
+ if (r < 0) {
+ log_error_errno(r, "Failed to create JSON array: %m");
+ goto finish;
+ }
+
+ array[n++] = TAKE_PTR(q);
+ }
+ }
+
+ r = json_variant_new_object(&object, array, n);
+ if (r < 0) {
+ log_error_errno(r, "Failed to allocate JSON object: %m");
+ goto finish;
+ }
+
+ json_variant_dump(object,
+ output_mode_to_json_format_flags(mode) |
+ (FLAGS_SET(flags, OUTPUT_COLOR) ? JSON_FORMAT_COLOR : 0),
+ f, NULL);
+
+ r = 0;
+
+finish:
+ while ((d = hashmap_steal_first(h))) {
+ size_t k;
+
+ json_variant_unref(d->name);
+ for (k = 0; k < d->n_values; k++)
+ json_variant_unref(d->values[k]);
+
+ free(d);
+ }
+
+ hashmap_free(h);
+
+ json_variant_unref_many(array, n);
+ free(array);
+
+ return r;
+}
+
+static int output_cat_field(
+ FILE *f,
+ sd_journal *j,
+ OutputFlags flags,
+ int prio,
+ const char *field,
+ const size_t highlight[2]) {
+
+ const char *color_on = "", *color_off = "", *highlight_on = "";
+ const void *data;
+ size_t l, fl;
+ int r;
+
+ if (FLAGS_SET(flags, OUTPUT_COLOR))
+ get_log_colors(prio, &color_on, &color_off, &highlight_on);
+
+ r = sd_journal_get_data(j, field, &data, &l);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+ if (r == -ENOENT) /* An entry without the requested field */
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to get data: %m");
+
+ fl = strlen(field);
+ assert(l >= fl + 1);
+ assert(((char*) data)[fl] == '=');
+
+ data = (const uint8_t*) data + fl + 1;
+ l -= fl + 1;
+
+ if (FLAGS_SET(flags, OUTPUT_COLOR)) {
+ if (highlight) {
+ assert(highlight[0] <= highlight[1]);
+ assert(highlight[1] <= l);
+
+ fputs(color_on, f);
+ fwrite((const char*) data, 1, highlight[0], f);
+ fputs(highlight_on, f);
+ fwrite((const char*) data + highlight[0], 1, highlight[1] - highlight[0], f);
+ fputs(color_on, f);
+ fwrite((const char*) data + highlight[1], 1, l - highlight[1], f);
+ fputs(color_off, f);
+ } else {
+ fputs(color_on, f);
+ fwrite((const char*) data, 1, l, f);
+ fputs(color_off, f);
+ }
+ } else
+ fwrite((const char*) data, 1, l, f);
+
+ fputc('\n', f);
+ return 0;
+}
+
+static int output_cat(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) {
+
+ int r, prio = LOG_INFO;
+ const char *field;
+
+ assert(j);
+ assert(f);
+
+ (void) sd_journal_set_data_threshold(j, 0);
+
+ if (FLAGS_SET(flags, OUTPUT_COLOR)) {
+ const void *data;
+ size_t l;
+
+ /* Determine priority of this entry, so that we can color it nicely */
+
+ r = sd_journal_get_data(j, "PRIORITY", &data, &l);
+ if (r == -EBADMSG) {
+ log_debug_errno(r, "Skipping message we can't read: %m");
+ return 0;
+ }
+ if (r < 0) {
+ if (r != -ENOENT)
+ return log_error_errno(r, "Failed to get data: %m");
+
+ /* An entry without PRIORITY */
+ } else if (l == 10 && memcmp(data, "PRIORITY=", 9) == 0) {
+ char c = ((char*) data)[9];
+
+ if (c >= '0' && c <= '7')
+ prio = c - '0';
+ }
+ }
+
+ if (set_isempty(output_fields))
+ return output_cat_field(f, j, flags, prio, "MESSAGE", highlight);
+
+ SET_FOREACH(field, output_fields) {
+ r = output_cat_field(f, j, flags, prio, field, streq(field, "MESSAGE") ? highlight : NULL);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int (*output_funcs[_OUTPUT_MODE_MAX])(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ const Set *output_fields,
+ const size_t highlight[2]) = {
+
+ [OUTPUT_SHORT] = output_short,
+ [OUTPUT_SHORT_ISO] = output_short,
+ [OUTPUT_SHORT_ISO_PRECISE] = output_short,
+ [OUTPUT_SHORT_PRECISE] = output_short,
+ [OUTPUT_SHORT_MONOTONIC] = output_short,
+ [OUTPUT_SHORT_UNIX] = output_short,
+ [OUTPUT_SHORT_FULL] = output_short,
+ [OUTPUT_VERBOSE] = output_verbose,
+ [OUTPUT_EXPORT] = output_export,
+ [OUTPUT_JSON] = output_json,
+ [OUTPUT_JSON_PRETTY] = output_json,
+ [OUTPUT_JSON_SSE] = output_json,
+ [OUTPUT_JSON_SEQ] = output_json,
+ [OUTPUT_CAT] = output_cat,
+ [OUTPUT_WITH_UNIT] = output_short,
+};
+
+int show_journal_entry(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ char **output_fields,
+ const size_t highlight[2],
+ bool *ellipsized) {
+
+ _cleanup_set_free_ Set *fields = NULL;
+ int r;
+
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+
+ if (n_columns <= 0)
+ n_columns = columns();
+
+ r = set_put_strdupv(&fields, output_fields);
+ if (r < 0)
+ return r;
+
+ r = output_funcs[mode](f, j, mode, n_columns, flags, fields, highlight);
+
+ if (ellipsized && r > 0)
+ *ellipsized = true;
+
+ return r;
+}
+
+static int maybe_print_begin_newline(FILE *f, OutputFlags *flags) {
+ assert(f);
+ assert(flags);
+
+ if (!(*flags & OUTPUT_BEGIN_NEWLINE))
+ return 0;
+
+ /* Print a beginning new line if that's request, but only once
+ * on the first line we print. */
+
+ fputc('\n', f);
+ *flags &= ~OUTPUT_BEGIN_NEWLINE;
+ return 0;
+}
+
+int show_journal(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ OutputFlags flags,
+ bool *ellipsized) {
+
+ int r;
+ unsigned line = 0;
+ bool need_seek = false;
+ int warn_cutoff = flags & OUTPUT_WARN_CUTOFF;
+
+ assert(j);
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+
+ if (how_many == (unsigned) -1)
+ need_seek = true;
+ else {
+ /* Seek to end */
+ r = sd_journal_seek_tail(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to seek to tail: %m");
+
+ r = sd_journal_previous_skip(j, how_many);
+ if (r < 0)
+ return log_error_errno(r, "Failed to skip previous: %m");
+ }
+
+ for (;;) {
+ usec_t usec;
+
+ if (need_seek) {
+ r = sd_journal_next(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to iterate through journal: %m");
+ }
+
+ if (r == 0)
+ break;
+
+ need_seek = true;
+
+ if (not_before > 0) {
+ r = sd_journal_get_monotonic_usec(j, &usec, NULL);
+
+ /* -ESTALE is returned if the timestamp is not from this boot */
+ if (r == -ESTALE)
+ continue;
+ else if (r < 0)
+ return log_error_errno(r, "Failed to get journal time: %m");
+
+ if (usec < not_before)
+ continue;
+ }
+
+ line++;
+ maybe_print_begin_newline(f, &flags);
+
+ r = show_journal_entry(f, j, mode, n_columns, flags, NULL, NULL, ellipsized);
+ if (r < 0)
+ return r;
+ }
+
+ if (warn_cutoff && line < how_many && not_before > 0) {
+ sd_id128_t boot_id;
+ usec_t cutoff = 0;
+
+ /* Check whether the cutoff line is too early */
+
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id: %m");
+
+ r = sd_journal_get_cutoff_monotonic_usec(j, boot_id, &cutoff, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get journal cutoff time: %m");
+
+ if (r > 0 && not_before < cutoff) {
+ maybe_print_begin_newline(f, &flags);
+
+ /* If we logged *something* and no permission error happened, than we can reliably
+ * emit the warning about rotation. If we didn't log anything and access errors
+ * happened, emit hint about permissions. Otherwise, give a generic message, since we
+ * can't diagnose the issue. */
+
+ bool noaccess = journal_access_blocked(j);
+
+ if (line == 0 && noaccess)
+ fprintf(f, "Warning: some journal files were not opened due to insufficient permissions.");
+ else if (!noaccess)
+ fprintf(f, "Warning: journal has been rotated since unit was started, output may be incomplete.\n");
+ else
+ fprintf(f, "Warning: journal has been rotated since unit was started and some journal "
+ "files were not opened due to insufficient permissions, output may be incomplete.\n");
+ }
+
+ warn_cutoff = false;
+ }
+
+ return 0;
+}
+
+int add_matches_for_unit(sd_journal *j, const char *unit) {
+ const char *m1, *m2, *m3, *m4;
+ int r;
+
+ assert(j);
+ assert(unit);
+
+ m1 = strjoina("_SYSTEMD_UNIT=", unit);
+ m2 = strjoina("COREDUMP_UNIT=", unit);
+ m3 = strjoina("UNIT=", unit);
+ m4 = strjoina("OBJECT_SYSTEMD_UNIT=", unit);
+
+ (void)(
+ /* Look for messages from the service itself */
+ (r = sd_journal_add_match(j, m1, 0)) ||
+
+ /* Look for coredumps of the service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "MESSAGE_ID=fc2e22bc6ee647b6b90729ab34a250b1", 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+ (r = sd_journal_add_match(j, m2, 0)) ||
+
+ /* Look for messages from PID 1 about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "_PID=1", 0)) ||
+ (r = sd_journal_add_match(j, m3, 0)) ||
+
+ /* Look for messages from authorized daemons about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+ (r = sd_journal_add_match(j, m4, 0))
+ );
+
+ if (r == 0 && endswith(unit, ".slice")) {
+ const char *m5;
+
+ m5 = strjoina("_SYSTEMD_SLICE=", unit);
+
+ /* Show all messages belonging to a slice */
+ (void)(
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m5, 0))
+ );
+ }
+
+ return r;
+}
+
+int add_matches_for_user_unit(sd_journal *j, const char *unit, uid_t uid) {
+ int r;
+ char *m1, *m2, *m3, *m4;
+ char muid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)];
+
+ assert(j);
+ assert(unit);
+
+ m1 = strjoina("_SYSTEMD_USER_UNIT=", unit);
+ m2 = strjoina("USER_UNIT=", unit);
+ m3 = strjoina("COREDUMP_USER_UNIT=", unit);
+ m4 = strjoina("OBJECT_SYSTEMD_USER_UNIT=", unit);
+ sprintf(muid, "_UID="UID_FMT, uid);
+
+ (void) (
+ /* Look for messages from the user service itself */
+ (r = sd_journal_add_match(j, m1, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+
+ /* Look for messages from systemd about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m2, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+
+ /* Look for coredumps of the service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m3, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0)) ||
+
+ /* Look for messages from authorized daemons about this service */
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m4, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0)) ||
+ (r = sd_journal_add_match(j, "_UID=0", 0))
+ );
+
+ if (r == 0 && endswith(unit, ".slice")) {
+ const char *m5;
+
+ m5 = strjoina("_SYSTEMD_SLICE=", unit);
+
+ /* Show all messages belonging to a slice */
+ (void)(
+ (r = sd_journal_add_disjunction(j)) ||
+ (r = sd_journal_add_match(j, m5, 0)) ||
+ (r = sd_journal_add_match(j, muid, 0))
+ );
+ }
+
+ return r;
+}
+
+static int get_boot_id_for_machine(const char *machine, sd_id128_t *boot_id) {
+ _cleanup_close_pair_ int pair[2] = { -1, -1 };
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, rootfd = -1;
+ char buf[ID128_UUID_STRING_MAX];
+ pid_t pid, child;
+ ssize_t k;
+ int r;
+
+ assert(machine);
+ assert(boot_id);
+
+ if (!machine_name_is_valid(machine))
+ return -EINVAL;
+
+ r = container_get_leader(machine, &pid);
+ if (r < 0)
+ return r;
+
+ r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, NULL, &rootfd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ r = namespace_fork("(sd-bootidns)", "(sd-bootid)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG,
+ pidnsfd, mntnsfd, -1, -1, rootfd, &child);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ int fd;
+
+ pair[0] = safe_close(pair[0]);
+
+ fd = open("/proc/sys/kernel/random/boot_id", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ _exit(EXIT_FAILURE);
+
+ r = loop_read_exact(fd, buf, 36, false);
+ safe_close(fd);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ k = send(pair[1], buf, 36, MSG_NOSIGNAL);
+ if (k != 36)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-bootidns)", child, 0);
+ if (r < 0)
+ return r;
+ if (r != EXIT_SUCCESS)
+ return -EIO;
+
+ k = recv(pair[0], buf, 36, 0);
+ if (k != 36)
+ return -EIO;
+
+ buf[36] = 0;
+ r = sd_id128_from_string(buf, boot_id);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int add_match_this_boot(sd_journal *j, const char *machine) {
+ char match[9+32+1] = "_BOOT_ID=";
+ sd_id128_t boot_id;
+ int r;
+
+ assert(j);
+
+ if (machine) {
+ r = get_boot_id_for_machine(machine, &boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id of container %s: %m", machine);
+ } else {
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get boot id: %m");
+ }
+
+ sd_id128_to_string(boot_id, match + 9);
+ r = sd_journal_add_match(j, match, strlen(match));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add match: %m");
+
+ r = sd_journal_add_conjunction(j);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add conjunction: %m");
+
+ return 0;
+}
+
+int show_journal_by_unit(
+ FILE *f,
+ const char *unit,
+ const char *log_namespace,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ uid_t uid,
+ OutputFlags flags,
+ int journal_open_flags,
+ bool system_unit,
+ bool *ellipsized) {
+
+ _cleanup_(sd_journal_closep) sd_journal *j = NULL;
+ int r;
+
+ assert(mode >= 0);
+ assert(mode < _OUTPUT_MODE_MAX);
+ assert(unit);
+
+ if (how_many <= 0)
+ return 0;
+
+ r = sd_journal_open_namespace(&j, log_namespace, journal_open_flags | SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open journal: %m");
+
+ r = add_match_this_boot(j, NULL);
+ if (r < 0)
+ return r;
+
+ if (system_unit)
+ r = add_matches_for_unit(j, unit);
+ else
+ r = add_matches_for_user_unit(j, unit, uid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add unit matches: %m");
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *filter;
+
+ filter = journal_make_match_string(j);
+ if (!filter)
+ return log_oom();
+
+ log_debug("Journal filter: %s", filter);
+ }
+
+ return show_journal(f, j, mode, n_columns, not_before, how_many, flags, ellipsized);
+}
diff --git a/src/shared/logs-show.h b/src/shared/logs-show.h
new file mode 100644
index 0000000..71ebe13
--- /dev/null
+++ b/src/shared/logs-show.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "sd-journal.h"
+
+#include "macro.h"
+#include "output-mode.h"
+#include "time-util.h"
+#include "util.h"
+
+int show_journal_entry(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ OutputFlags flags,
+ char **output_fields,
+ const size_t highlight[2],
+ bool *ellipsized);
+int show_journal(
+ FILE *f,
+ sd_journal *j,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ OutputFlags flags,
+ bool *ellipsized);
+
+int add_match_this_boot(sd_journal *j, const char *machine);
+
+int add_matches_for_unit(
+ sd_journal *j,
+ const char *unit);
+
+int add_matches_for_user_unit(
+ sd_journal *j,
+ const char *unit,
+ uid_t uid);
+
+int show_journal_by_unit(
+ FILE *f,
+ const char *unit,
+ const char *namespace,
+ OutputMode mode,
+ unsigned n_columns,
+ usec_t not_before,
+ unsigned how_many,
+ uid_t uid,
+ OutputFlags flags,
+ int journal_open_flags,
+ bool system_unit,
+ bool *ellipsized);
+
+void json_escape(
+ FILE *f,
+ const char* p,
+ size_t l,
+ OutputFlags flags);
diff --git a/src/shared/loop-util.c b/src/shared/loop-util.c
new file mode 100644
index 0000000..84f415a
--- /dev/null
+++ b/src/shared/loop-util.c
@@ -0,0 +1,722 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+#include <valgrind/memcheck.h>
+#endif
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/blkpg.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <sys/file.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "sd-device.h"
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "device-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "loop-util.h"
+#include "missing_loop.h"
+#include "parse-util.h"
+#include "random-util.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "tmpfile-util.h"
+
+static void cleanup_clear_loop_close(int *fd) {
+ if (*fd < 0)
+ return;
+
+ (void) ioctl(*fd, LOOP_CLR_FD);
+ (void) safe_close(*fd);
+}
+
+static int loop_is_bound(int fd) {
+ struct loop_info64 info;
+
+ assert(fd >= 0);
+
+ if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0) {
+ if (errno == ENXIO)
+ return false; /* not bound! */
+
+ return -errno;
+ }
+
+ return true; /* bound! */
+}
+
+static int device_has_block_children(sd_device *d) {
+ _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
+ const char *main_sn, *main_ss;
+ sd_device *q;
+ int r;
+
+ assert(d);
+
+ /* Checks if the specified device currently has block device children (i.e. partition block
+ * devices). */
+
+ r = sd_device_get_sysname(d, &main_sn);
+ if (r < 0)
+ return r;
+
+ r = sd_device_get_subsystem(d, &main_ss);
+ if (r < 0)
+ return r;
+
+ if (!streq(main_ss, "block"))
+ return -EINVAL;
+
+ r = sd_device_enumerator_new(&e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_allow_uninitialized(e);
+ if (r < 0)
+ return r;
+
+ r = sd_device_enumerator_add_match_parent(e, d);
+ if (r < 0)
+ return r;
+
+ FOREACH_DEVICE(e, q) {
+ const char *ss, *sn;
+
+ r = sd_device_get_subsystem(q, &ss);
+ if (r < 0)
+ continue;
+
+ if (!streq(ss, "block"))
+ continue;
+
+ r = sd_device_get_sysname(q, &sn);
+ if (r < 0)
+ continue;
+
+ if (streq(sn, main_sn))
+ continue;
+
+ return 1; /* we have block device children */
+ }
+
+ return 0;
+}
+
+static int loop_configure(
+ int fd,
+ int nr,
+ const struct loop_config *c,
+ bool *try_loop_configure) {
+
+ _cleanup_(sd_device_unrefp) sd_device *d = NULL;
+ _cleanup_free_ char *sysname = NULL;
+ _cleanup_close_ int lock_fd = -1;
+ int r;
+
+ assert(fd >= 0);
+ assert(nr >= 0);
+ assert(c);
+ assert(try_loop_configure);
+
+ if (asprintf(&sysname, "loop%i", nr) < 0)
+ return -ENOMEM;
+
+ r = sd_device_new_from_subsystem_sysname(&d, "block", sysname);
+ if (r < 0)
+ return r;
+
+ /* Let's lock the device before we do anything. We take the BSD lock on a second, separately opened
+ * fd for the device. udev after all watches for close() events (specifically IN_CLOSE_WRITE) on
+ * block devices to reprobe them, hence by having a separate fd we will later close() we can ensure
+ * we trigger udev after everything is done. If we'd lock our own fd instead and keep it open for a
+ * long time udev would possibly never run on it again, even though the fd is unlocked, simply
+ * because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to
+ * automatically release the lock, after we are done. */
+ lock_fd = fd_reopen(fd, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (lock_fd < 0)
+ return lock_fd;
+ if (flock(lock_fd, LOCK_EX) < 0)
+ return -errno;
+
+ /* Let's see if the device is really detached, i.e. currently has no associated partition block
+ * devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
+ * superficially is detached but still has partition block devices associated for it. They only go
+ * away when the device is reattached. (Yes, LOOP_CLR_FD doesn't work then, because officially
+ * nothing is attached and LOOP_CTL_REMOVE doesn't either, since it doesn't care about partition
+ * block devices. */
+ r = device_has_block_children(d);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ r = loop_is_bound(fd);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return -EBUSY;
+
+ return -EUCLEAN; /* Bound but children? Tell caller to reattach something so that the
+ * partition block devices are gone too. */
+ }
+
+ if (*try_loop_configure) {
+ if (ioctl(fd, LOOP_CONFIGURE, c) < 0) {
+ /* Do fallback only if LOOP_CONFIGURE is not supported, propagate all other
+ * errors. Note that the kernel is weird: non-existing ioctls currently return EINVAL
+ * rather than ENOTTY on loopback block devices. They should fix that in the kernel,
+ * but in the meantime we accept both here. */
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && errno != EINVAL)
+ return -errno;
+
+ *try_loop_configure = false;
+ } else {
+ bool good = true;
+
+ if (c->info.lo_sizelimit != 0) {
+ /* Kernel 5.8 vanilla doesn't properly propagate the size limit into the
+ * block device. If it's used, let's immediately check if it had the desired
+ * effect hence. And if not use classic LOOP_SET_STATUS64. */
+ uint64_t z;
+
+ if (ioctl(fd, BLKGETSIZE64, &z) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ if (z != c->info.lo_sizelimit) {
+ log_debug("LOOP_CONFIGURE is broken, doesn't honour .lo_sizelimit. Falling back to LOOP_SET_STATUS64.");
+ good = false;
+ }
+ }
+
+ if (FLAGS_SET(c->info.lo_flags, LO_FLAGS_PARTSCAN)) {
+ /* Kernel 5.8 vanilla doesn't properly propagate the partition scanning flag
+ * into the block device. Let's hence verify if things work correctly here
+ * before returning. */
+
+ r = blockdev_partscan_enabled(fd);
+ if (r < 0)
+ goto fail;
+ if (r == 0) {
+ log_debug("LOOP_CONFIGURE is broken, doesn't honour LO_FLAGS_PARTSCAN. Falling back to LOOP_SET_STATUS64.");
+ good = false;
+ }
+ }
+
+ if (!good) {
+ /* LOOP_CONFIGURE doesn't work. Remember that. */
+ *try_loop_configure = false;
+
+ /* We return EBUSY here instead of retrying immediately with LOOP_SET_FD,
+ * because LOOP_CLR_FD is async: if the operation cannot be executed right
+ * away it just sets the autoclear flag on the device. This means there's a
+ * good chance we cannot actually reuse the loopback device right-away. Hence
+ * let's assume it's busy, avoid the trouble and let the calling loop call us
+ * again with a new, likely unused device. */
+ r = -EBUSY;
+ goto fail;
+ }
+
+ return 0;
+ }
+ }
+
+ /* Since kernel commit 5db470e229e22b7eda6e23b5566e532c96fb5bc3 (kernel v5.0) the LOOP_SET_STATUS64
+ * ioctl can return EAGAIN in case we change the lo_offset field, if someone else is accessing the
+ * block device while we try to reconfigure it. This is a pretty common case, since udev might
+ * instantly start probing the device as soon as we attach an fd to it. Hence handle it in two ways:
+ * first, let's take the BSD lock to ensure that udev will not step in between the point in
+ * time where we attach the fd and where we reconfigure the device. Secondly, let's wait 50ms on
+ * EAGAIN and retry. The former should be an efficient mechanism to avoid we have to wait 50ms
+ * needlessly if we are just racing against udev. The latter is protection against all other cases,
+ * i.e. peers that do not take the BSD lock. */
+
+ if (ioctl(fd, LOOP_SET_FD, c->fd) < 0)
+ return -errno;
+
+ for (unsigned n_attempts = 0;;) {
+ if (ioctl(fd, LOOP_SET_STATUS64, &c->info) >= 0)
+ break;
+ if (errno != EAGAIN || ++n_attempts >= 64) {
+ r = log_debug_errno(errno, "Failed to configure loopback device: %m");
+ goto fail;
+ }
+
+ /* Sleep some random time, but at least 10ms, at most 250ms. Increase the delay the more
+ * failed attempts we see */
+ (void) usleep(UINT64_C(10) * USEC_PER_MSEC +
+ random_u64() % (UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
+ }
+
+ return 0;
+
+fail:
+ (void) ioctl(fd, LOOP_CLR_FD);
+ return r;
+}
+
+static int attach_empty_file(int loop, int nr) {
+ _cleanup_close_ int fd = -1;
+
+ /* So here's the thing: on various kernels (5.8 at least) loop block devices might enter a state
+ * where they are detached but nonetheless have partitions, when used heavily. Accessing these
+ * partitions results in immediatey IO errors. There's no pretty way to get rid of them
+ * again. Neither LOOP_CLR_FD nor LOOP_CTL_REMOVE suffice (see above). What does work is to
+ * reassociate them with a new fd however. This is what we do here hence: we associate the devices
+ * with an empty file (i.e. an image that definitely has no partitions). We then immediately clear it
+ * again. This suffices to make the partitions go away. Ugly but appears to work. */
+
+ log_debug("Found unattached loopback block device /dev/loop%i with partitions. Attaching empty file to remove them.", nr);
+
+ fd = open_tmpfile_unlinkable(NULL, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ if (flock(loop, LOCK_EX) < 0)
+ return -errno;
+
+ if (ioctl(loop, LOOP_SET_FD, fd) < 0)
+ return -errno;
+
+ if (ioctl(loop, LOOP_SET_STATUS64, &(struct loop_info64) {
+ .lo_flags = LO_FLAGS_READ_ONLY|
+ LO_FLAGS_AUTOCLEAR|
+ LO_FLAGS_PARTSCAN, /* enable partscan, so that the partitions really go away */
+ }) < 0)
+ return -errno;
+
+ if (ioctl(loop, LOOP_CLR_FD) < 0)
+ return -errno;
+
+ /* The caller is expected to immediately close the loopback device after this, so that the BSD lock
+ * is released, and udev sees the changes. */
+ return 0;
+}
+
+int loop_device_make(
+ int fd,
+ int open_flags,
+ uint64_t offset,
+ uint64_t size,
+ uint32_t loop_flags,
+ LoopDevice **ret) {
+
+ _cleanup_free_ char *loopdev = NULL;
+ bool try_loop_configure = true;
+ struct loop_config config;
+ LoopDevice *d = NULL;
+ struct stat st;
+ int nr = -1, r;
+
+ assert(fd >= 0);
+ assert(ret);
+ assert(IN_SET(open_flags, O_RDWR, O_RDONLY));
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ if (S_ISBLK(st.st_mode)) {
+ if (ioctl(fd, LOOP_GET_STATUS64, &config.info) >= 0) {
+ /* Oh! This is a loopback device? That's interesting! */
+
+#if HAVE_VALGRIND_MEMCHECK_H
+ /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
+ VALGRIND_MAKE_MEM_DEFINED(&config.info, sizeof(config.info));
+#endif
+ nr = config.info.lo_number;
+
+ if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
+ return -ENOMEM;
+ }
+
+ if (offset == 0 && IN_SET(size, 0, UINT64_MAX)) {
+ _cleanup_close_ int copy = -1;
+
+ /* If this is already a block device, store a copy of the fd as it is */
+
+ copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ if (copy < 0)
+ return -errno;
+
+ d = new(LoopDevice, 1);
+ if (!d)
+ return -ENOMEM;
+ *d = (LoopDevice) {
+ .fd = TAKE_FD(copy),
+ .nr = nr,
+ .node = TAKE_PTR(loopdev),
+ .relinquished = true, /* It's not allocated by us, don't destroy it when this object is freed */
+ };
+
+ *ret = d;
+ return d->fd;
+ }
+ } else {
+ r = stat_verify_regular(&st);
+ if (r < 0)
+ return r;
+ }
+
+ _cleanup_close_ int control = -1;
+ _cleanup_(cleanup_clear_loop_close) int loop_with_fd = -1;
+
+ control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (control < 0)
+ return -errno;
+
+ config = (struct loop_config) {
+ .fd = fd,
+ .info = {
+ /* Use the specified flags, but configure the read-only flag from the open flags, and force autoclear */
+ .lo_flags = (loop_flags & ~LO_FLAGS_READ_ONLY) | ((open_flags & O_ACCMODE) == O_RDONLY ? LO_FLAGS_READ_ONLY : 0) | LO_FLAGS_AUTOCLEAR,
+ .lo_offset = offset,
+ .lo_sizelimit = size == UINT64_MAX ? 0 : size,
+ },
+ };
+
+ /* Loop around LOOP_CTL_GET_FREE, since at the moment we attempt to open the returned device it might
+ * be gone already, taken by somebody else racing against us. */
+ for (unsigned n_attempts = 0;;) {
+ _cleanup_close_ int loop = -1;
+
+ nr = ioctl(control, LOOP_CTL_GET_FREE);
+ if (nr < 0)
+ return -errno;
+
+ if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
+ return -ENOMEM;
+
+ loop = open(loopdev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
+ if (loop < 0) {
+ /* Somebody might've gotten the same number from the kernel, used the device,
+ * and called LOOP_CTL_REMOVE on it. Let's retry with a new number. */
+ if (!IN_SET(errno, ENOENT, ENXIO))
+ return -errno;
+ } else {
+ r = loop_configure(loop, nr, &config, &try_loop_configure);
+ if (r >= 0) {
+ loop_with_fd = TAKE_FD(loop);
+ break;
+ }
+ if (r == -EUCLEAN) {
+ /* Make left-over partition disappear hack (see above) */
+ r = attach_empty_file(loop, nr);
+ if (r < 0 && r != -EBUSY)
+ return r;
+ } else if (r != -EBUSY)
+ return r;
+ }
+
+ if (++n_attempts >= 64) /* Give up eventually */
+ return -EBUSY;
+
+ loopdev = mfree(loopdev);
+
+ /* Wait some random time, to make collision less likely. Let's pick a random time in the
+ * range 0ms…250ms, linearly scaled by the number of failed attempts. */
+ (void) usleep(random_u64() % (UINT64_C(10) * USEC_PER_MSEC +
+ UINT64_C(240) * USEC_PER_MSEC * n_attempts/64));
+ }
+
+ d = new(LoopDevice, 1);
+ if (!d)
+ return -ENOMEM;
+ *d = (LoopDevice) {
+ .fd = TAKE_FD(loop_with_fd),
+ .node = TAKE_PTR(loopdev),
+ .nr = nr,
+ };
+
+ *ret = d;
+ return 0;
+}
+
+int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, LoopDevice **ret) {
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert(path);
+ assert(ret);
+ assert(open_flags < 0 || IN_SET(open_flags, O_RDWR, O_RDONLY));
+
+ /* Passing < 0 as open_flags here means we'll try to open the device writable if we can, retrying
+ * read-only if we cannot. */
+
+ fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|(open_flags >= 0 ? open_flags : O_RDWR));
+ if (fd < 0) {
+ r = -errno;
+
+ /* Retry read-only? */
+ if (open_flags >= 0 || !(ERRNO_IS_PRIVILEGE(r) || r == -EROFS))
+ return r;
+
+ fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|O_RDONLY);
+ if (fd < 0)
+ return r; /* Propagate original error */
+
+ open_flags = O_RDONLY;
+ } else if (open_flags < 0)
+ open_flags = O_RDWR;
+
+ return loop_device_make(fd, open_flags, 0, 0, loop_flags, ret);
+}
+
+LoopDevice* loop_device_unref(LoopDevice *d) {
+ if (!d)
+ return NULL;
+
+ if (d->fd >= 0) {
+ /* Implicitly sync the device, since otherwise in-flight blocks might not get written */
+ if (fsync(d->fd) < 0)
+ log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m");
+
+ if (d->nr >= 0 && !d->relinquished) {
+ if (ioctl(d->fd, LOOP_CLR_FD) < 0)
+ log_debug_errno(errno, "Failed to clear loop device: %m");
+
+ }
+
+ safe_close(d->fd);
+ }
+
+ if (d->nr >= 0 && !d->relinquished) {
+ _cleanup_close_ int control = -1;
+
+ control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (control < 0)
+ log_warning_errno(errno,
+ "Failed to open loop control device, cannot remove loop device %s: %m",
+ strna(d->node));
+ else
+ for (unsigned n_attempts = 0;;) {
+ if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
+ break;
+ if (errno != EBUSY || ++n_attempts >= 64) {
+ log_warning_errno(errno, "Failed to remove device %s: %m", strna(d->node));
+ break;
+ }
+ (void) usleep(50 * USEC_PER_MSEC);
+ }
+ }
+
+ free(d->node);
+ return mfree(d);
+}
+
+void loop_device_relinquish(LoopDevice *d) {
+ assert(d);
+
+ /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel
+ * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */
+
+ d->relinquished = true;
+}
+
+int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret) {
+ _cleanup_close_ int loop_fd = -1;
+ _cleanup_free_ char *p = NULL;
+ struct loop_info64 info;
+ struct stat st;
+ LoopDevice *d;
+ int nr;
+
+ assert(loop_path);
+ assert(ret);
+
+ loop_fd = open(loop_path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags);
+ if (loop_fd < 0)
+ return -errno;
+
+ if (fstat(loop_fd, &st) < 0)
+ return -errno;
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTBLK;
+
+ if (ioctl(loop_fd, LOOP_GET_STATUS64, &info) >= 0) {
+#if HAVE_VALGRIND_MEMCHECK_H
+ /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
+ VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
+#endif
+ nr = info.lo_number;
+ } else
+ nr = -1;
+
+ p = strdup(loop_path);
+ if (!p)
+ return -ENOMEM;
+
+ d = new(LoopDevice, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (LoopDevice) {
+ .fd = TAKE_FD(loop_fd),
+ .nr = nr,
+ .node = TAKE_PTR(p),
+ .relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
+ };
+
+ *ret = d;
+ return d->fd;
+}
+
+static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
+ char sysfs[STRLEN("/sys/dev/block/:/partition") + 2*DECIMAL_STR_MAX(dev_t) + 1];
+ _cleanup_free_ char *whole = NULL, *buffer = NULL;
+ uint64_t current_offset, current_size, partno;
+ _cleanup_close_ int whole_fd = -1;
+ struct stat st;
+ dev_t devno;
+ int r;
+
+ assert(partition_fd >= 0);
+
+ /* Resizes the partition the loopback device refer to (assuming it refers to one instead of an actual
+ * loopback device), and changes the offset, if needed. This is a fancy wrapper around
+ * BLKPG_RESIZE_PARTITION. */
+
+ if (fstat(partition_fd, &st) < 0)
+ return -errno;
+
+ assert(S_ISBLK(st.st_mode));
+
+ xsprintf(sysfs, "/sys/dev/block/%u:%u/partition", major(st.st_rdev), minor(st.st_rdev));
+ r = read_one_line_file(sysfs, &buffer);
+ if (r == -ENOENT) /* not a partition, cannot resize */
+ return -ENOTTY;
+ if (r < 0)
+ return r;
+ r = safe_atou64(buffer, &partno);
+ if (r < 0)
+ return r;
+
+ xsprintf(sysfs, "/sys/dev/block/%u:%u/start", major(st.st_rdev), minor(st.st_rdev));
+
+ buffer = mfree(buffer);
+ r = read_one_line_file(sysfs, &buffer);
+ if (r < 0)
+ return r;
+ r = safe_atou64(buffer, &current_offset);
+ if (r < 0)
+ return r;
+ if (current_offset > UINT64_MAX/512U)
+ return -EINVAL;
+ current_offset *= 512U;
+
+ if (ioctl(partition_fd, BLKGETSIZE64, &current_size) < 0)
+ return -EINVAL;
+
+ if (size == UINT64_MAX && offset == UINT64_MAX)
+ return 0;
+ if (current_size == size && current_offset == offset)
+ return 0;
+
+ xsprintf(sysfs, "/sys/dev/block/%u:%u/../dev", major(st.st_rdev), minor(st.st_rdev));
+
+ buffer = mfree(buffer);
+ r = read_one_line_file(sysfs, &buffer);
+ if (r < 0)
+ return r;
+ r = parse_dev(buffer, &devno);
+ if (r < 0)
+ return r;
+
+ r = device_path_make_major_minor(S_IFBLK, devno, &whole);
+ if (r < 0)
+ return r;
+
+ whole_fd = open(whole, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (whole_fd < 0)
+ return -errno;
+
+ struct blkpg_partition bp = {
+ .pno = partno,
+ .start = offset == UINT64_MAX ? current_offset : offset,
+ .length = size == UINT64_MAX ? current_size : size,
+ };
+
+ struct blkpg_ioctl_arg ba = {
+ .op = BLKPG_RESIZE_PARTITION,
+ .data = &bp,
+ .datalen = sizeof(bp),
+ };
+
+ if (ioctl(whole_fd, BLKPG, &ba) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
+ struct loop_info64 info;
+ assert(d);
+
+ /* Changes the offset/start of the loop device relative to the beginning of the underlying file or
+ * block device. If this loop device actually refers to a partition and not a loopback device, we'll
+ * try to adjust the partition offsets instead.
+ *
+ * If either offset or size is UINT64_MAX we won't change that parameter. */
+
+ if (d->fd < 0)
+ return -EBADF;
+
+ if (d->nr < 0) /* not a loopback device */
+ return resize_partition(d->fd, offset, size);
+
+ if (ioctl(d->fd, LOOP_GET_STATUS64, &info) < 0)
+ return -errno;
+
+#if HAVE_VALGRIND_MEMCHECK_H
+ /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */
+ VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
+#endif
+
+ if (size == UINT64_MAX && offset == UINT64_MAX)
+ return 0;
+ if (info.lo_sizelimit == size && info.lo_offset == offset)
+ return 0;
+
+ if (size != UINT64_MAX)
+ info.lo_sizelimit = size;
+ if (offset != UINT64_MAX)
+ info.lo_offset = offset;
+
+ if (ioctl(d->fd, LOOP_SET_STATUS64, &info) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int loop_device_flock(LoopDevice *d, int operation) {
+ assert(d);
+
+ if (d->fd < 0)
+ return -EBADF;
+
+ if (flock(d->fd, operation) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int loop_device_sync(LoopDevice *d) {
+ assert(d);
+
+ /* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
+ * we can check the return value though. */
+
+ if (d->fd < 0)
+ return -EBADF;
+
+ if (fsync(d->fd) < 0)
+ return -errno;
+
+ return 0;
+}
diff --git a/src/shared/loop-util.h b/src/shared/loop-util.h
new file mode 100644
index 0000000..9538dae
--- /dev/null
+++ b/src/shared/loop-util.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+typedef struct LoopDevice LoopDevice;
+
+/* Some helpers for setting up loopback block devices */
+
+struct LoopDevice {
+ int fd;
+ int nr;
+ char *node;
+ bool relinquished;
+};
+
+int loop_device_make(int fd, int open_flags, uint64_t offset, uint64_t size, uint32_t loop_flags, LoopDevice **ret);
+int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, LoopDevice **ret);
+int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret);
+
+LoopDevice* loop_device_unref(LoopDevice *d);
+DEFINE_TRIVIAL_CLEANUP_FUNC(LoopDevice*, loop_device_unref);
+
+void loop_device_relinquish(LoopDevice *d);
+
+int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size);
+
+int loop_device_flock(LoopDevice *d, int operation);
+int loop_device_sync(LoopDevice *d);
diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c
new file mode 100644
index 0000000..671a56b
--- /dev/null
+++ b/src/shared/machine-image.c
@@ -0,0 +1,1274 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <linux/loop.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/file.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "btrfs-util.h"
+#include "chattr-util.h"
+#include "copy.h"
+#include "dirent-util.h"
+#include "dissect-image.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "hostname-util.h"
+#include "id128-util.h"
+#include "lockfile-util.h"
+#include "log.h"
+#include "loop-util.h"
+#include "machine-image.h"
+#include "macro.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "os-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "utf8.h"
+#include "xattr-util.h"
+
+static const char* const image_search_path[_IMAGE_CLASS_MAX] = {
+ [IMAGE_MACHINE] = "/etc/machines\0" /* only place symlinks here */
+ "/run/machines\0" /* and here too */
+ "/var/lib/machines\0" /* the main place for images */
+ "/var/lib/container\0" /* legacy */
+ "/usr/local/lib/machines\0"
+ "/usr/lib/machines\0",
+
+ [IMAGE_PORTABLE] = "/etc/portables\0" /* only place symlinks here */
+ "/run/portables\0" /* and here too */
+ "/var/lib/portables\0" /* the main place for images */
+ "/usr/local/lib/portables\0"
+ "/usr/lib/portables\0",
+};
+
+static Image *image_free(Image *i) {
+ assert(i);
+
+ free(i->name);
+ free(i->path);
+
+ free(i->hostname);
+ strv_free(i->machine_info);
+ strv_free(i->os_release);
+
+ return mfree(i);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Image, image, image_free);
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(image_hash_ops, char, string_hash_func, string_compare_func,
+ Image, image_unref);
+
+static char **image_settings_path(Image *image) {
+ _cleanup_strv_free_ char **l = NULL;
+ const char *fn, *s;
+ unsigned i = 0;
+
+ assert(image);
+
+ l = new0(char*, 4);
+ if (!l)
+ return NULL;
+
+ fn = strjoina(image->name, ".nspawn");
+
+ FOREACH_STRING(s, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
+ l[i] = path_join(s, fn);
+ if (!l[i])
+ return NULL;
+
+ i++;
+ }
+
+ l[i] = file_in_same_dir(image->path, fn);
+ if (!l[i])
+ return NULL;
+
+ return TAKE_PTR(l);
+}
+
+static char *image_roothash_path(Image *image) {
+ const char *fn;
+
+ assert(image);
+
+ fn = strjoina(image->name, ".roothash");
+
+ return file_in_same_dir(image->path, fn);
+}
+
+static int image_new(
+ ImageType t,
+ const char *pretty,
+ const char *path,
+ const char *filename,
+ bool read_only,
+ usec_t crtime,
+ usec_t mtime,
+ Image **ret) {
+
+ _cleanup_(image_unrefp) Image *i = NULL;
+
+ assert(t >= 0);
+ assert(t < _IMAGE_TYPE_MAX);
+ assert(pretty);
+ assert(filename);
+ assert(ret);
+
+ i = new0(Image, 1);
+ if (!i)
+ return -ENOMEM;
+
+ i->n_ref = 1;
+ i->type = t;
+ i->read_only = read_only;
+ i->crtime = crtime;
+ i->mtime = mtime;
+ i->usage = i->usage_exclusive = (uint64_t) -1;
+ i->limit = i->limit_exclusive = (uint64_t) -1;
+
+ i->name = strdup(pretty);
+ if (!i->name)
+ return -ENOMEM;
+
+ i->path = path_join(path, filename);
+ if (!i->path)
+ return -ENOMEM;
+
+ path_simplify(i->path, false);
+
+ *ret = TAKE_PTR(i);
+
+ return 0;
+}
+
+static int extract_pretty(const char *path, const char *suffix, char **ret) {
+ _cleanup_free_ char *name = NULL;
+ const char *p;
+ size_t n;
+
+ assert(path);
+ assert(ret);
+
+ p = last_path_component(path);
+ n = strcspn(p, "/");
+
+ name = strndup(p, n);
+ if (!name)
+ return -ENOMEM;
+
+ if (suffix) {
+ char *e;
+
+ e = endswith(name, suffix);
+ if (!e)
+ return -EINVAL;
+
+ *e = 0;
+ }
+
+ if (!image_name_is_valid(name))
+ return -EINVAL;
+
+ *ret = TAKE_PTR(name);
+ return 0;
+}
+
+static int image_make(
+ const char *pretty,
+ int dfd,
+ const char *path,
+ const char *filename,
+ const struct stat *st,
+ Image **ret) {
+
+ _cleanup_free_ char *pretty_buffer = NULL, *parent = NULL;
+ struct stat stbuf;
+ bool read_only;
+ int r;
+
+ assert(dfd >= 0 || dfd == AT_FDCWD);
+ assert(path || dfd == AT_FDCWD);
+ assert(filename);
+
+ /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
+ * devices into /var/lib/machines/, and treat them normally.
+ *
+ * This function returns -ENOENT if we can't find the image after all, and -EMEDIUMTYPE if it's not a file we
+ * recognize. */
+
+ if (!st) {
+ if (fstatat(dfd, filename, &stbuf, 0) < 0)
+ return -errno;
+
+ st = &stbuf;
+ }
+
+ if (!path) {
+ if (dfd == AT_FDCWD)
+ (void) safe_getcwd(&parent);
+ else
+ (void) fd_get_path(dfd, &parent);
+ }
+
+ read_only =
+ (path && path_startswith(path, "/usr")) ||
+ (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
+
+ if (S_ISDIR(st->st_mode)) {
+ _cleanup_close_ int fd = -1;
+ unsigned file_attr = 0;
+
+ if (!ret)
+ return 0;
+
+ if (!pretty) {
+ r = extract_pretty(filename, NULL, &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
+ if (fd < 0)
+ return -errno;
+
+ /* btrfs subvolumes have inode 256 */
+ if (st->st_ino == 256) {
+
+ r = btrfs_is_filesystem(fd);
+ if (r < 0)
+ return r;
+ if (r) {
+ BtrfsSubvolInfo info;
+
+ /* It's a btrfs subvolume */
+
+ r = btrfs_subvol_get_info_fd(fd, 0, &info);
+ if (r < 0)
+ return r;
+
+ r = image_new(IMAGE_SUBVOLUME,
+ pretty,
+ path,
+ filename,
+ info.read_only || read_only,
+ info.otime,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ if (btrfs_quota_scan_ongoing(fd) == 0) {
+ BtrfsQuotaInfo quota;
+
+ r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
+ if (r >= 0) {
+ (*ret)->usage = quota.referenced;
+ (*ret)->usage_exclusive = quota.exclusive;
+
+ (*ret)->limit = quota.referenced_max;
+ (*ret)->limit_exclusive = quota.exclusive_max;
+ }
+ }
+
+ return 0;
+ }
+ }
+
+ /* If the IMMUTABLE bit is set, we consider the
+ * directory read-only. Since the ioctl is not
+ * supported everywhere we ignore failures. */
+ (void) read_attr_fd(fd, &file_attr);
+
+ /* It's just a normal directory. */
+ r = image_new(IMAGE_DIRECTORY,
+ pretty,
+ path,
+ filename,
+ read_only || (file_attr & FS_IMMUTABLE_FL),
+ 0,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ return 0;
+
+ } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) {
+ usec_t crtime = 0;
+
+ /* It's a RAW disk image */
+
+ if (!ret)
+ return 0;
+
+ (void) fd_getcrtime_at(dfd, filename, &crtime, 0);
+
+ if (!pretty) {
+ r = extract_pretty(filename, ".raw", &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ r = image_new(IMAGE_RAW,
+ pretty,
+ path,
+ filename,
+ !(st->st_mode & 0222) || read_only,
+ crtime,
+ timespec_load(&st->st_mtim),
+ ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->usage = (*ret)->usage_exclusive = st->st_blocks * 512;
+ (*ret)->limit = (*ret)->limit_exclusive = st->st_size;
+
+ return 0;
+
+ } else if (S_ISBLK(st->st_mode)) {
+ _cleanup_close_ int block_fd = -1;
+ uint64_t size = UINT64_MAX;
+
+ /* A block device */
+
+ if (!ret)
+ return 0;
+
+ if (!pretty) {
+ r = extract_pretty(filename, NULL, &pretty_buffer);
+ if (r < 0)
+ return r;
+
+ pretty = pretty_buffer;
+ }
+
+ block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
+ if (block_fd < 0)
+ log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
+ else {
+ /* Refresh stat data after opening the node */
+ if (fstat(block_fd, &stbuf) < 0)
+ return -errno;
+ st = &stbuf;
+
+ if (!S_ISBLK(st->st_mode)) /* Verify that what we opened is actually what we think it is */
+ return -ENOTTY;
+
+ if (!read_only) {
+ int state = 0;
+
+ if (ioctl(block_fd, BLKROGET, &state) < 0)
+ log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
+ else if (state)
+ read_only = true;
+ }
+
+ if (ioctl(block_fd, BLKGETSIZE64, &size) < 0)
+ log_debug_errno(errno, "Failed to issue BLKGETSIZE64 on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
+
+ block_fd = safe_close(block_fd);
+ }
+
+ r = image_new(IMAGE_BLOCK,
+ pretty,
+ path,
+ filename,
+ !(st->st_mode & 0222) || read_only,
+ 0,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ if (!IN_SET(size, 0, UINT64_MAX))
+ (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
+
+ return 0;
+ }
+
+ return -EMEDIUMTYPE;
+}
+
+int image_find(ImageClass class, const char *name, Image **ret) {
+ const char *path;
+ int r;
+
+ assert(class >= 0);
+ assert(class < _IMAGE_CLASS_MAX);
+ assert(name);
+
+ /* There are no images with invalid names */
+ if (!image_name_is_valid(name))
+ return -ENOENT;
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct stat st;
+
+ d = opendir(path);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people to
+ * symlink block devices into the search path */
+ if (fstatat(dirfd(d), name, &st, 0) < 0) {
+ _cleanup_free_ char *raw = NULL;
+
+ if (errno != ENOENT)
+ return -errno;
+
+ raw = strjoin(name, ".raw");
+ if (!raw)
+ return -ENOMEM;
+
+ if (fstatat(dirfd(d), raw, &st, 0) < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ if (!S_ISREG(st.st_mode))
+ continue;
+
+ r = image_make(name, dirfd(d), path, raw, &st, ret);
+
+ } else {
+ if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode))
+ continue;
+
+ r = image_make(name, dirfd(d), path, name, &st, ret);
+ }
+ if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
+ continue;
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->discoverable = true;
+
+ return 1;
+ }
+
+ if (class == IMAGE_MACHINE && streq(name, ".host")) {
+ r = image_make(".host", AT_FDCWD, NULL, "/", NULL, ret);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->discoverable = true;
+
+ return r;
+ }
+
+ return -ENOENT;
+};
+
+int image_from_path(const char *path, Image **ret) {
+
+ /* Note that we don't set the 'discoverable' field of the returned object, because we don't check here whether
+ * the image is in the image search path. And if it is we don't know if the path we used is actually not
+ * overridden by another, different image earlier in the search path */
+
+ if (path_equal(path, "/"))
+ return image_make(".host", AT_FDCWD, NULL, "/", NULL, ret);
+
+ return image_make(NULL, AT_FDCWD, NULL, path, NULL, ret);
+}
+
+int image_find_harder(ImageClass class, const char *name_or_path, Image **ret) {
+ if (image_name_is_valid(name_or_path))
+ return image_find(class, name_or_path, ret);
+
+ return image_from_path(name_or_path, ret);
+}
+
+int image_discover(ImageClass class, Hashmap *h) {
+ const char *path;
+ int r;
+
+ assert(class >= 0);
+ assert(class < _IMAGE_CLASS_MAX);
+ assert(h);
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+
+ d = opendir(path);
+ if (!d) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, return -errno) {
+ _cleanup_(image_unrefp) Image *image = NULL;
+ _cleanup_free_ char *truncated = NULL;
+ const char *pretty;
+ struct stat st;
+
+ if (dot_or_dot_dot(de->d_name))
+ continue;
+
+ /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people
+ * to symlink block devices into the search path */
+ if (fstatat(dirfd(d), de->d_name, &st, 0) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ return -errno;
+ }
+
+ if (S_ISREG(st.st_mode)) {
+ const char *e;
+
+ e = endswith(de->d_name, ".raw");
+ if (!e)
+ continue;
+
+ truncated = strndup(de->d_name, e - de->d_name);
+ if (!truncated)
+ return -ENOMEM;
+
+ pretty = truncated;
+ } else if (S_ISDIR(st.st_mode) || S_ISBLK(st.st_mode))
+ pretty = de->d_name;
+ else
+ continue;
+
+ if (!image_name_is_valid(pretty))
+ continue;
+
+ if (hashmap_contains(h, pretty))
+ continue;
+
+ r = image_make(pretty, dirfd(d), path, de->d_name, &st, &image);
+ if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
+ continue;
+ if (r < 0)
+ return r;
+
+ image->discoverable = true;
+
+ r = hashmap_put(h, image->name, image);
+ if (r < 0)
+ return r;
+
+ image = NULL;
+ }
+ }
+
+ if (class == IMAGE_MACHINE && !hashmap_contains(h, ".host")) {
+ _cleanup_(image_unrefp) Image *image = NULL;
+
+ r = image_make(".host", AT_FDCWD, NULL, "/", NULL, &image);
+ if (r < 0)
+ return r;
+
+ image->discoverable = true;
+
+ r = hashmap_put(h, image->name, image);
+ if (r < 0)
+ return r;
+
+ image = NULL;
+ }
+
+ return 0;
+}
+
+int image_remove(Image *i) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ _cleanup_strv_free_ char **settings = NULL;
+ _cleanup_free_ char *roothash = NULL;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+
+ /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
+ * big guns */
+ if (unlink(i->path) < 0) {
+ r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ return r;
+ }
+
+ break;
+
+ case IMAGE_DIRECTORY:
+ /* Allow deletion of read-only directories */
+ (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
+ r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_BLOCK:
+
+ /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
+ * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
+ * the thing (it's most likely a symlink after all). */
+
+ if (path_startswith(i->path, "/dev"))
+ break;
+
+ _fallthrough_;
+ case IMAGE_RAW:
+ if (unlink(i->path) < 0)
+ return -errno;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ STRV_FOREACH(j, settings) {
+ if (unlink(*j) < 0 && errno != ENOENT)
+ log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
+ }
+
+ if (unlink(roothash) < 0 && errno != ENOENT)
+ log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
+ _cleanup_free_ char *rs = NULL;
+ const char *fn;
+
+ fn = strjoina(new_name, suffix);
+
+ rs = file_in_same_dir(path, fn);
+ if (!rs)
+ return -ENOMEM;
+
+ return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
+}
+
+int image_rename(Image *i, const char *new_name) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
+ _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
+ _cleanup_strv_free_ char **settings = NULL;
+ unsigned file_attr = 0;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (!image_name_is_valid(new_name))
+ return -EINVAL;
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ /* Make sure nobody takes the new name, between the time we
+ * checked it is currently unused in all search paths, and the
+ * time we take possession of it */
+ r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, new_name, NULL);
+ if (r >= 0)
+ return -EEXIST;
+ if (r != -ENOENT)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_DIRECTORY:
+ /* Turn of the immutable bit while we rename the image, so that we can rename it */
+ (void) read_attr_path(i->path, &file_attr);
+
+ if (file_attr & FS_IMMUTABLE_FL)
+ (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
+
+ _fallthrough_;
+ case IMAGE_SUBVOLUME:
+ new_path = file_in_same_dir(i->path, new_name);
+ break;
+
+ case IMAGE_BLOCK:
+
+ /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
+ if (path_startswith(i->path, "/dev"))
+ return -EROFS;
+
+ new_path = file_in_same_dir(i->path, new_name);
+ break;
+
+ case IMAGE_RAW: {
+ const char *fn;
+
+ fn = strjoina(new_name, ".raw");
+ new_path = file_in_same_dir(i->path, fn);
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (!new_path)
+ return -ENOMEM;
+
+ nn = strdup(new_name);
+ if (!nn)
+ return -ENOMEM;
+
+ r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
+ if (r < 0)
+ return r;
+
+ /* Restore the immutable bit, if it was set before */
+ if (file_attr & FS_IMMUTABLE_FL)
+ (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
+
+ free_and_replace(i->path, new_path);
+ free_and_replace(i->name, nn);
+
+ STRV_FOREACH(j, settings) {
+ r = rename_auxiliary_file(*j, new_name, ".nspawn");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
+ }
+
+ r = rename_auxiliary_file(roothash, new_name, ".roothash");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
+ _cleanup_free_ char *rs = NULL;
+ const char *fn;
+
+ fn = strjoina(new_name, suffix);
+
+ rs = file_in_same_dir(path, fn);
+ if (!rs)
+ return -ENOMEM;
+
+ return copy_file_atomic(path, rs, 0664, 0, 0, COPY_REFLINK);
+}
+
+int image_clone(Image *i, const char *new_name, bool read_only) {
+ _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
+ _cleanup_strv_free_ char **settings = NULL;
+ _cleanup_free_ char *roothash = NULL;
+ const char *new_path;
+ char **j;
+ int r;
+
+ assert(i);
+
+ if (!image_name_is_valid(new_name))
+ return -EINVAL;
+
+ settings = image_settings_path(i);
+ if (!settings)
+ return -ENOMEM;
+
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
+ /* Make sure nobody takes the new name, between the time we
+ * checked it is currently unused in all search paths, and the
+ * time we take possession of it */
+ r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
+ if (r < 0)
+ return r;
+
+ r = image_find(IMAGE_MACHINE, new_name, NULL);
+ if (r >= 0)
+ return -EEXIST;
+ if (r != -ENOENT)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+ case IMAGE_DIRECTORY:
+ /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
+ * directory. */
+
+ new_path = strjoina("/var/lib/machines/", new_name);
+
+ r = btrfs_subvol_snapshot(i->path, new_path,
+ (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
+ BTRFS_SNAPSHOT_FALLBACK_COPY |
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+ BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
+ BTRFS_SNAPSHOT_RECURSIVE |
+ BTRFS_SNAPSHOT_QUOTA);
+ if (r >= 0)
+ /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
+ (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
+
+ break;
+
+ case IMAGE_RAW:
+ new_path = strjoina("/var/lib/machines/", new_name, ".raw");
+
+ r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, FS_NOCOW_FL, COPY_REFLINK|COPY_CRTIME);
+ break;
+
+ case IMAGE_BLOCK:
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(j, settings) {
+ r = clone_auxiliary_file(*j, new_name, ".nspawn");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
+ }
+
+ r = clone_auxiliary_file(roothash, new_name, ".roothash");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
+
+ return 0;
+}
+
+int image_read_only(Image *i, bool b) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ int r;
+
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ /* Make sure we don't interfere with a running nspawn */
+ r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+
+ /* Note that we set the flag only on the top-level
+ * subvolume of the image. */
+
+ r = btrfs_subvol_set_read_only(i->path, b);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_DIRECTORY:
+ /* For simple directory trees we cannot use the access
+ mode of the top-level directory, since it has an
+ effect on the container itself. However, we can
+ use the "immutable" flag, to at least make the
+ top-level directory read-only. It's not as good as
+ a read-only subvolume, but at least something, and
+ we can read the value back. */
+
+ r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL, NULL);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case IMAGE_RAW: {
+ struct stat st;
+
+ if (stat(i->path, &st) < 0)
+ return -errno;
+
+ if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
+ return -errno;
+
+ /* If the images is now read-only, it's a good time to
+ * defrag it, given that no write patterns will
+ * fragment it again. */
+ if (b)
+ (void) btrfs_defrag(i->path);
+ break;
+ }
+
+ case IMAGE_BLOCK: {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int state = b;
+
+ fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTTY;
+
+ if (ioctl(fd, BLKROSET, &state) < 0)
+ return -errno;
+
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
+ _cleanup_free_ char *p = NULL;
+ LockFile t = LOCK_FILE_INIT;
+ struct stat st;
+ bool exclusive;
+ int r;
+
+ assert(path);
+ assert(global);
+ assert(local);
+
+ /* Locks an image path. This actually creates two locks: one "local" one, next to the image path
+ * itself, which might be shared via NFS. And another "global" one, in /run, that uses the
+ * device/inode number. This has the benefit that we can even lock a tree that is a mount point,
+ * correctly. */
+
+ if (!path_is_absolute(path))
+ return -EINVAL;
+
+ switch (operation & (LOCK_SH|LOCK_EX)) {
+ case LOCK_SH:
+ exclusive = false;
+ break;
+ case LOCK_EX:
+ exclusive = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+ *local = *global = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
+ /* Prohibit taking exclusive locks on the host image. We can't allow this, since we ourselves are
+ * running off it after all, and we don't want any images to manipulate the host image. We make an
+ * exception for shared locks however: we allow those (and make them NOPs since there's no point in
+ * taking them if there can't be exclusive locks). Strictly speaking these are questionable as well,
+ * since it means changes made to the host might propagate to the container as they happen (and a
+ * shared lock kinda suggests that no changes happen at all while it is in place), but it's too
+ * useful not to allow read-only containers off the host root, hence let's support this, and trust
+ * the user to do the right thing with this. */
+ if (path_equal(path, "/")) {
+ if (exclusive)
+ return -EBUSY;
+
+ *local = *global = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
+ if (stat(path, &st) >= 0) {
+ if (S_ISBLK(st.st_mode))
+ r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
+ else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
+ r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
+ else
+ return -ENOTTY;
+
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ /* For block devices we don't need the "local" lock, as the major/minor lock above should be
+ * sufficient, since block devices are host local anyway. */
+ if (!path_startswith(path, "/dev/")) {
+ r = make_lock_file_for(path, operation, &t);
+ if (r < 0) {
+ if (!exclusive && r == -EROFS)
+ log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path);
+ else
+ return r;
+ }
+ }
+
+ if (p) {
+ (void) mkdir_p("/run/systemd/nspawn/locks", 0700);
+
+ r = make_lock_file(p, operation, global);
+ if (r < 0) {
+ release_lock_file(&t);
+ return r;
+ }
+ } else
+ *global = (LockFile) LOCK_FILE_INIT;
+
+ *local = t;
+ return 0;
+}
+
+int image_set_limit(Image *i, uint64_t referenced_max) {
+ assert(i);
+
+ if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
+ return -EROFS;
+
+ if (i->type != IMAGE_SUBVOLUME)
+ return -EOPNOTSUPP;
+
+ /* We set the quota both for the subvolume as well as for the
+ * subtree. The latter is mostly for historical reasons, since
+ * we didn't use to have a concept of subtree quota, and hence
+ * only modified the subvolume quota. */
+
+ (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
+ (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
+ return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
+}
+
+int image_read_metadata(Image *i) {
+ _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
+ int r;
+
+ assert(i);
+
+ r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock);
+ if (r < 0)
+ return r;
+
+ switch (i->type) {
+
+ case IMAGE_SUBVOLUME:
+ case IMAGE_DIRECTORY: {
+ _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL;
+ sd_id128_t machine_id = SD_ID128_NULL;
+ _cleanup_free_ char *hostname = NULL;
+ _cleanup_free_ char *path = NULL;
+
+ r = chase_symlinks("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name);
+ else if (r >= 0) {
+ r = read_etc_hostname(path, &hostname);
+ if (r < 0)
+ log_debug_errno(errno, "Failed to read /etc/hostname of image %s: %m", i->name);
+ }
+
+ path = mfree(path);
+
+ r = chase_symlinks("/etc/machine-id", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/machine-id in image %s: %m", i->name);
+ else if (r >= 0) {
+ _cleanup_close_ int fd = -1;
+
+ fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ log_debug_errno(errno, "Failed to open %s: %m", path);
+ else {
+ r = id128_read_fd(fd, ID128_PLAIN, &machine_id);
+ if (r < 0)
+ log_debug_errno(r, "Image %s contains invalid machine ID.", i->name);
+ }
+ }
+
+ path = mfree(path);
+
+ r = chase_symlinks("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name);
+ else if (r >= 0) {
+ r = load_env_file_pairs(NULL, path, &machine_info);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name);
+ }
+
+ r = load_os_release_pairs(i->path, &os_release);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read os-release in image, ignoring: %m");
+
+ free_and_replace(i->hostname, hostname);
+ i->machine_id = machine_id;
+ strv_free_and_replace(i->machine_info, machine_info);
+ strv_free_and_replace(i->os_release, os_release);
+
+ break;
+ }
+
+ case IMAGE_RAW:
+ case IMAGE_BLOCK: {
+ _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
+ _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
+
+ r = loop_device_make_by_path(i->path, O_RDONLY, LO_FLAGS_PARTSCAN, &d);
+ if (r < 0)
+ return r;
+
+ r = dissect_image(d->fd, NULL, NULL, DISSECT_IMAGE_REQUIRE_ROOT|DISSECT_IMAGE_RELAX_VAR_CHECK, &m);
+ if (r < 0)
+ return r;
+
+ r = dissected_image_acquire_metadata(m);
+ if (r < 0)
+ return r;
+
+ free_and_replace(i->hostname, m->hostname);
+ i->machine_id = m->machine_id;
+ strv_free_and_replace(i->machine_info, m->machine_info);
+ strv_free_and_replace(i->os_release, m->os_release);
+
+ break;
+ }
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ i->metadata_valid = true;
+
+ return 0;
+}
+
+int image_name_lock(const char *name, int operation, LockFile *ret) {
+ assert(name);
+ assert(ret);
+
+ /* Locks an image name, regardless of the precise path used. */
+
+ if (!image_name_is_valid(name))
+ return -EINVAL;
+
+ if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+ *ret = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
+ if (streq(name, ".host"))
+ return -EBUSY;
+
+ const char *p = strjoina("/run/systemd/nspawn/locks/name-", name);
+ (void) mkdir_p("/run/systemd/nspawn/locks", 0700);
+ return make_lock_file(p, operation, ret);
+}
+
+bool image_name_is_valid(const char *s) {
+ if (!filename_is_valid(s))
+ return false;
+
+ if (string_has_cc(s, NULL))
+ return false;
+
+ if (!utf8_is_valid(s))
+ return false;
+
+ /* Temporary files for atomically creating new files */
+ if (startswith(s, ".#"))
+ return false;
+
+ return true;
+}
+
+bool image_in_search_path(ImageClass class, const char *image) {
+ const char *path;
+
+ assert(image);
+
+ NULSTR_FOREACH(path, image_search_path[class]) {
+ const char *p;
+ size_t k;
+
+ p = path_startswith(image, path);
+ if (!p)
+ continue;
+
+ /* Make sure there's a filename following */
+ k = strcspn(p, "/");
+ if (k == 0)
+ continue;
+
+ p += k;
+
+ /* Accept trailing slashes */
+ if (p[strspn(p, "/")] == 0)
+ return true;
+
+ }
+
+ return false;
+}
+
+static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
+ [IMAGE_DIRECTORY] = "directory",
+ [IMAGE_SUBVOLUME] = "subvolume",
+ [IMAGE_RAW] = "raw",
+ [IMAGE_BLOCK] = "block",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);
diff --git a/src/shared/machine-image.h b/src/shared/machine-image.h
new file mode 100644
index 0000000..95a8f5c
--- /dev/null
+++ b/src/shared/machine-image.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "sd-id128.h"
+
+#include "hashmap.h"
+#include "lockfile-util.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "time-util.h"
+
+typedef enum ImageClass {
+ IMAGE_MACHINE,
+ IMAGE_PORTABLE,
+ _IMAGE_CLASS_MAX,
+ _IMAGE_CLASS_INVALID = -1
+} ImageClass;
+
+typedef enum ImageType {
+ IMAGE_DIRECTORY,
+ IMAGE_SUBVOLUME,
+ IMAGE_RAW,
+ IMAGE_BLOCK,
+ _IMAGE_TYPE_MAX,
+ _IMAGE_TYPE_INVALID = -1
+} ImageType;
+
+typedef struct Image {
+ unsigned n_ref;
+
+ ImageType type;
+ char *name;
+ char *path;
+ bool read_only;
+
+ usec_t crtime;
+ usec_t mtime;
+
+ uint64_t usage;
+ uint64_t usage_exclusive;
+ uint64_t limit;
+ uint64_t limit_exclusive;
+
+ char *hostname;
+ sd_id128_t machine_id;
+ char **machine_info;
+ char **os_release;
+
+ bool metadata_valid:1;
+ bool discoverable:1; /* true if we know for sure that image_find() would find the image given just the short name */
+
+ void *userdata;
+} Image;
+
+Image *image_unref(Image *i);
+Image *image_ref(Image *i);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Image*, image_unref);
+
+int image_find(ImageClass class, const char *name, Image **ret);
+int image_from_path(const char *path, Image **ret);
+int image_find_harder(ImageClass class, const char *name_or_path, Image **ret);
+int image_discover(ImageClass class, Hashmap *map);
+
+int image_remove(Image *i);
+int image_rename(Image *i, const char *new_name);
+int image_clone(Image *i, const char *new_name, bool read_only);
+int image_read_only(Image *i, bool b);
+
+const char* image_type_to_string(ImageType t) _const_;
+ImageType image_type_from_string(const char *s) _pure_;
+
+bool image_name_is_valid(const char *s) _pure_;
+
+int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local);
+int image_name_lock(const char *name, int operation, LockFile *ret);
+
+int image_set_limit(Image *i, uint64_t referenced_max);
+
+int image_read_metadata(Image *i);
+
+bool image_in_search_path(ImageClass class, const char *image);
+
+static inline bool IMAGE_IS_HIDDEN(const struct Image *i) {
+ assert(i);
+
+ return i->name && i->name[0] == '.';
+}
+
+static inline bool IMAGE_IS_VENDOR(const struct Image *i) {
+ assert(i);
+
+ return i->path && path_startswith(i->path, "/usr");
+}
+
+static inline bool IMAGE_IS_HOST(const struct Image *i) {
+ assert(i);
+
+ if (i->name && streq(i->name, ".host"))
+ return true;
+
+ if (i->path && path_equal(i->path, "/"))
+ return true;
+
+ return false;
+}
+
+extern const struct hash_ops image_hash_ops;
diff --git a/src/shared/machine-pool.c b/src/shared/machine-pool.c
new file mode 100644
index 0000000..1f0b0b4
--- /dev/null
+++ b/src/shared/machine-pool.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "btrfs-util.h"
+#include "label.h"
+#include "machine-pool.h"
+#include "missing_magic.h"
+#include "stat-util.h"
+
+static int check_btrfs(void) {
+ struct statfs sfs;
+
+ if (statfs("/var/lib/machines", &sfs) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ if (statfs("/var/lib", &sfs) < 0)
+ return -errno;
+ }
+
+ return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
+}
+
+int setup_machine_directory(sd_bus_error *error) {
+ int r;
+
+ r = check_btrfs();
+ if (r < 0)
+ return sd_bus_error_set_errnof(error, r, "Failed to determine whether /var/lib/machines is located on btrfs: %m");
+ if (r == 0)
+ return 0;
+
+ (void) btrfs_subvol_make_label("/var/lib/machines");
+
+ r = btrfs_quota_enable("/var/lib/machines", true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to enable quota for /var/lib/machines, ignoring: %m");
+
+ r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true);
+ if (r < 0)
+ log_warning_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, ignoring: %m");
+
+ return 1;
+}
diff --git a/src/shared/machine-pool.h b/src/shared/machine-pool.h
new file mode 100644
index 0000000..3f528ab
--- /dev/null
+++ b/src/shared/machine-pool.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdint.h>
+
+#include "sd-bus.h"
+
+int setup_machine_directory(sd_bus_error *error);
diff --git a/src/shared/macvlan-util.c b/src/shared/macvlan-util.c
new file mode 100644
index 0000000..11dffe9
--- /dev/null
+++ b/src/shared/macvlan-util.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "conf-parser.h"
+#include "macvlan-util.h"
+#include "string-table.h"
+
+static const char* const macvlan_mode_table[_NETDEV_MACVLAN_MODE_MAX] = {
+ [NETDEV_MACVLAN_MODE_PRIVATE] = "private",
+ [NETDEV_MACVLAN_MODE_VEPA] = "vepa",
+ [NETDEV_MACVLAN_MODE_BRIDGE] = "bridge",
+ [NETDEV_MACVLAN_MODE_PASSTHRU] = "passthru",
+ [NETDEV_MACVLAN_MODE_SOURCE] = "source",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(macvlan_mode, MacVlanMode);
diff --git a/src/shared/macvlan-util.h b/src/shared/macvlan-util.h
new file mode 100644
index 0000000..0d3a5f4
--- /dev/null
+++ b/src/shared/macvlan-util.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/if_link.h>
+
+typedef enum MacVlanMode {
+ NETDEV_MACVLAN_MODE_PRIVATE = MACVLAN_MODE_PRIVATE,
+ NETDEV_MACVLAN_MODE_VEPA = MACVLAN_MODE_VEPA,
+ NETDEV_MACVLAN_MODE_BRIDGE = MACVLAN_MODE_BRIDGE,
+ NETDEV_MACVLAN_MODE_PASSTHRU = MACVLAN_MODE_PASSTHRU,
+ NETDEV_MACVLAN_MODE_SOURCE = MACVLAN_MODE_SOURCE,
+ _NETDEV_MACVLAN_MODE_MAX,
+ _NETDEV_MACVLAN_MODE_INVALID = -1
+} MacVlanMode;
+
+const char *macvlan_mode_to_string(MacVlanMode d) _const_;
+MacVlanMode macvlan_mode_from_string(const char *d) _pure_;
diff --git a/src/shared/main-func.h b/src/shared/main-func.h
new file mode 100644
index 0000000..05cdffe
--- /dev/null
+++ b/src/shared/main-func.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdlib.h>
+
+#include "sd-daemon.h"
+
+#include "pager.h"
+#include "selinux-util.h"
+#include "spawn-ask-password-agent.h"
+#include "spawn-polkit-agent.h"
+#include "static-destruct.h"
+#include "util.h"
+
+#define _DEFINE_MAIN_FUNCTION(intro, impl, ret) \
+ int main(int argc, char *argv[]) { \
+ int r; \
+ save_argc_argv(argc, argv); \
+ intro; \
+ r = impl; \
+ if (r < 0) \
+ (void) sd_notifyf(0, "ERRNO=%i", -r); \
+ ask_password_agent_close(); \
+ polkit_agent_close(); \
+ pager_close(); \
+ mac_selinux_finish(); \
+ static_destruct(); \
+ return ret; \
+ }
+
+/* Negative return values from impl are mapped to EXIT_FAILURE, and
+ * everything else means success! */
+#define DEFINE_MAIN_FUNCTION(impl) \
+ _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : EXIT_SUCCESS)
+
+/* Zero is mapped to EXIT_SUCCESS, negative values are mapped to EXIT_FAILURE,
+ * and positive values are propagated.
+ * Note: "true" means failure! */
+#define DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(impl) \
+ _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : r)
diff --git a/src/shared/meson.build b/src/shared/meson.build
new file mode 100644
index 0000000..f30fe44
--- /dev/null
+++ b/src/shared/meson.build
@@ -0,0 +1,399 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+shared_sources = files('''
+ acl-util.h
+ acpi-fpdt.c
+ acpi-fpdt.h
+ apparmor-util.c
+ apparmor-util.h
+ ask-password-api.c
+ ask-password-api.h
+ barrier.c
+ barrier.h
+ base-filesystem.c
+ base-filesystem.h
+ binfmt-util.c
+ binfmt-util.h
+ bitmap.c
+ bitmap.h
+ blkid-util.h
+ bond-util.c
+ bond-util.h
+ boot-timestamps.c
+ boot-timestamps.h
+ bootspec.c
+ bootspec.h
+ bpf-program.c
+ bpf-program.h
+ bridge-util.c
+ bridge-util.h
+ bus-get-properties.c
+ bus-get-properties.h
+ bus-locator.c
+ bus-locator.h
+ bus-log-control-api.c
+ bus-log-control-api.h
+ bus-map-properties.c
+ bus-map-properties.h
+ bus-message-util.c
+ bus-message-util.h
+ bus-object.c
+ bus-object.h
+ bus-polkit.c
+ bus-polkit.h
+ bus-print-properties.c
+ bus-print-properties.h
+ bus-unit-procs.c
+ bus-unit-procs.h
+ bus-unit-util.c
+ bus-unit-util.h
+ bus-util.c
+ bus-util.h
+ bus-wait-for-jobs.c
+ bus-wait-for-jobs.h
+ bus-wait-for-units.c
+ bus-wait-for-units.h
+ calendarspec.c
+ calendarspec.h
+ cgroup-setup.c
+ cgroup-setup.h
+ cgroup-show.c
+ cgroup-show.h
+ chown-recursive.c
+ chown-recursive.h
+ clean-ipc.c
+ clean-ipc.h
+ clock-util.c
+ clock-util.h
+ condition.c
+ condition.h
+ conf-parser.c
+ conf-parser.h
+ coredump-util.c
+ coredump-util.h
+ cpu-set-util.c
+ cpu-set-util.h
+ cryptsetup-util.c
+ cryptsetup-util.h
+ daemon-util.h
+ dev-setup.c
+ dev-setup.h
+ dissect-image.c
+ dissect-image.h
+ dm-util.c
+ dm-util.h
+ dns-domain.c
+ dns-domain.h
+ dropin.c
+ dropin.h
+ efi-loader.c
+ efi-loader.h
+ enable-mempool.c
+ env-file-label.c
+ env-file-label.h
+ ethtool-util.c
+ ethtool-util.h
+ exec-util.c
+ exec-util.h
+ exit-status.c
+ exit-status.h
+ fdset.c
+ fdset.h
+ fileio-label.c
+ fileio-label.h
+ firewall-util.h
+ format-table.c
+ format-table.h
+ fsck-util.h
+ fstab-util.c
+ fstab-util.h
+ generator.c
+ generator.h
+ geneve-util.c
+ geneve-util.h
+ gpt.c
+ gpt.h
+ group-record.c
+ group-record.h
+ id128-print.c
+ id128-print.h
+ idn-util.c
+ idn-util.h
+ ima-util.c
+ ima-util.h
+ import-util.c
+ import-util.h
+ initreq.h
+ install-printf.c
+ install-printf.h
+ install.c
+ install.h
+ ipvlan-util.c
+ ipvlan-util.h
+ ip-protocol-list.c
+ ip-protocol-list.h
+ journal-importer.c
+ journal-importer.h
+ journal-util.c
+ journal-util.h
+ json-internal.h
+ json.c
+ json.h
+ libcrypt-util.c
+ libcrypt-util.h
+ libmount-util.h
+ linux/auto_dev-ioctl.h
+ linux/bpf.h
+ linux/bpf_common.h
+ linux/bpf_insn.h
+ linux/dm-ioctl.h
+ linux/ethtool.h
+ local-addresses.c
+ local-addresses.h
+ lockfile-util.c
+ lockfile-util.h
+ log-link.h
+ logs-show.c
+ logs-show.h
+ loop-util.c
+ loop-util.h
+ machine-image.c
+ machine-image.h
+ machine-pool.c
+ machine-pool.h
+ macvlan-util.c
+ macvlan-util.h
+ main-func.h
+ mkfs-util.c
+ mkfs-util.h
+ module-util.h
+ mount-util.c
+ mount-util.h
+ netif-naming-scheme.c
+ netif-naming-scheme.h
+ nscd-flush.c
+ nscd-flush.h
+ nsflags.c
+ nsflags.h
+ numa-util.c
+ numa-util.h
+ openssl-util.h
+ os-util.c
+ os-util.h
+ output-mode.c
+ output-mode.h
+ pager.c
+ pager.h
+ pe-header.h
+ pkcs11-util.c
+ pkcs11-util.h
+ pretty-print.c
+ pretty-print.h
+ psi-util.c
+ psi-util.h
+ ptyfwd.c
+ ptyfwd.h
+ pwquality-util.c
+ pwquality-util.h
+ qrcode-util.c
+ qrcode-util.h
+ reboot-util.c
+ reboot-util.h
+ resize-fs.c
+ resize-fs.h
+ resolve-util.c
+ resolve-util.h
+ seccomp-util.h
+ securebits-util.c
+ securebits-util.h
+ serialize.c
+ serialize.h
+ service-util.c
+ service-util.h
+ sleep-config.c
+ sleep-config.h
+ socket-netlink.c
+ socket-netlink.h
+ spawn-ask-password-agent.c
+ spawn-ask-password-agent.h
+ spawn-polkit-agent.c
+ spawn-polkit-agent.h
+ specifier.c
+ specifier.h
+ switch-root.c
+ switch-root.h
+ sysctl-util.c
+ sysctl-util.h
+ tmpfile-util-label.c
+ tmpfile-util-label.h
+ tomoyo-util.c
+ tomoyo-util.h
+ udev-util.c
+ udev-util.h
+ uid-range.c
+ uid-range.h
+ unit-file.c
+ unit-file.h
+ user-record-nss.c
+ user-record-nss.h
+ user-record-show.c
+ user-record-show.h
+ user-record.c
+ user-record.h
+ userdb.c
+ userdb.h
+ utmp-wtmp.h
+ varlink.c
+ varlink.h
+ verbs.c
+ verbs.h
+ vlan-util.c
+ vlan-util.h
+ volatile-util.c
+ volatile-util.h
+ watchdog.c
+ watchdog.h
+ web-util.c
+ web-util.h
+ wifi-util.c
+ wifi-util.h
+ xml.c
+ xml.h
+'''.split())
+
+if get_option('tests') != 'false'
+ shared_sources += files('tests.c', 'tests.h')
+endif
+
+test_tables_h = files('test-tables.h')
+shared_sources += test_tables_h
+
+generate_syscall_list = find_program('generate-syscall-list.py')
+fname = 'syscall-list.h'
+syscall_list_h = custom_target(
+ fname,
+ input : 'syscall-names.text',
+ output : fname,
+ command : [generate_syscall_list,
+ '@INPUT@'],
+ capture : true)
+
+if conf.get('HAVE_ACL') == 1
+ shared_sources += files('acl-util.c')
+endif
+
+if conf.get('ENABLE_UTMP') == 1
+ shared_sources += files('utmp-wtmp.c')
+endif
+
+if conf.get('HAVE_SECCOMP') == 1
+ shared_sources += files('seccomp-util.c')
+ shared_sources += syscall_list_h
+endif
+
+if conf.get('HAVE_LIBIPTC') == 1
+ shared_sources += files('firewall-util.c')
+endif
+
+if conf.get('HAVE_KMOD') == 1
+ shared_sources += files('module-util.c')
+endif
+
+if conf.get('HAVE_PAM') == 1
+ shared_sources += files('''
+ pam-util.c
+ pam-util.h
+'''.split())
+endif
+
+generate_ip_protocol_list = find_program('generate-ip-protocol-list.sh')
+ip_protocol_list_txt = custom_target(
+ 'ip-protocol-list.txt',
+ output : 'ip-protocol-list.txt',
+ command : [generate_ip_protocol_list, cpp],
+ capture : true)
+
+fname = 'ip-protocol-from-name.gperf'
+gperf_file = custom_target(
+ fname,
+ input : ip_protocol_list_txt,
+ output : fname,
+ command : [generate_gperfs, 'ip_protocol', 'IPPROTO_', '@INPUT@'],
+ capture : true)
+
+fname = 'ip-protocol-from-name.h'
+target1 = custom_target(
+ fname,
+ input : gperf_file,
+ output : fname,
+ command : [gperf,
+ '-L', 'ANSI-C', '-t', '--ignore-case',
+ '-N', 'lookup_ip_protocol',
+ '-H', 'hash_ip_protocol_name',
+ '-p', '-C',
+ '@INPUT@'],
+ capture : true)
+
+fname = 'ip-protocol-to-name.h'
+awkscript = 'ip-protocol-to-name.awk'
+target2 = custom_target(
+ fname,
+ input : [awkscript, ip_protocol_list_txt],
+ output : fname,
+ command : [awk, '-f', '@INPUT0@', '@INPUT1@'],
+ capture : true)
+
+shared_generated_gperf_headers = [target1, target2]
+shared_sources += shared_generated_gperf_headers
+
+libshared_name = 'systemd-shared-@0@'.format(meson.project_version())
+
+libshared_deps = [threads,
+ libacl,
+ libblkid,
+ libcap,
+ libcrypt,
+ libgcrypt,
+ libiptc,
+ libkmod,
+ liblz4,
+ libmount,
+ libopenssl,
+ libp11kit,
+ libpam,
+ librt,
+ libseccomp,
+ libselinux,
+ libzstd,
+ libxz]
+
+libshared_sym_path = '@0@/libshared.sym'.format(meson.current_source_dir())
+
+libshared_static = static_library(
+ libshared_name,
+ shared_sources,
+ include_directories : includes,
+ dependencies : libshared_deps,
+ c_args : ['-fvisibility=default'])
+
+libshared = shared_library(
+ libshared_name,
+ libudev_sources,
+ include_directories : includes,
+ link_args : ['-shared',
+ '-Wl,--version-script=' + libshared_sym_path],
+ link_whole : [libshared_static,
+ libbasic,
+ libbasic_gcrypt,
+ libsystemd_static,
+ libjournal_client],
+ c_args : ['-fvisibility=default'],
+ dependencies : libshared_deps,
+ install : true,
+ install_dir : rootlibexecdir)
+
+############################################################
+
+run_target(
+ 'syscall-names-update',
+ command : [syscall_names_update_sh, meson.current_source_dir()])
diff --git a/src/shared/mkfs-util.c b/src/shared/mkfs-util.c
new file mode 100644
index 0000000..ce10e60
--- /dev/null
+++ b/src/shared/mkfs-util.c
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "id128-util.h"
+#include "mkfs-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+int mkfs_exists(const char *fstype) {
+ const char *mkfs;
+ int r;
+
+ assert(fstype);
+
+ if (STR_IN_SET(fstype, "auto", "swap")) /* these aren't real file system types, refuse early */
+ return -EINVAL;
+
+ mkfs = strjoina("mkfs.", fstype);
+ if (!filename_is_valid(mkfs)) /* refuse file system types with slashes and similar */
+ return -EINVAL;
+
+ r = find_executable(mkfs, NULL);
+ if (r == -ENOENT)
+ return false;
+ if (r < 0)
+ return r;
+
+ return true;
+}
+
+int make_filesystem(
+ const char *node,
+ const char *fstype,
+ const char *label,
+ sd_id128_t uuid,
+ bool discard) {
+
+ _cleanup_free_ char *mkfs = NULL;
+ int r;
+
+ assert(node);
+ assert(fstype);
+ assert(label);
+
+ if (streq(fstype, "swap")) {
+ r = find_executable("mkswap", &mkfs);
+ if (r == -ENOENT)
+ return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "mkswap binary not available.");
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether mkswap binary exists: %m");
+ } else {
+ r = mkfs_exists(fstype);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether mkfs binary for %s exists: %m", fstype);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "mkfs binary for %s is not available.", fstype);
+
+ mkfs = strjoin("mkfs.", fstype);
+ if (!mkfs)
+ return log_oom();
+ }
+
+ r = safe_fork("(mkfs)", FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_STDOUT_TO_STDERR, NULL);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ char suuid[ID128_UUID_STRING_MAX];
+
+ /* Child */
+ id128_to_uuid_string(uuid, suuid);
+
+ if (streq(fstype, "ext4"))
+ (void) execlp(mkfs, mkfs,
+ "-L", label,
+ "-U", suuid,
+ "-I", "256",
+ "-O", "has_journal",
+ "-m", "0",
+ "-E", discard ? "lazy_itable_init=1,discard" : "lazy_itable_init=1,nodiscard",
+ node, NULL);
+
+ else if (streq(fstype, "btrfs")) {
+ if (discard)
+ (void) execlp(mkfs, mkfs, "-L", label, "-U", suuid, node, NULL);
+ else
+ (void) execlp(mkfs, mkfs, "-L", label, "-U", suuid, "--nodiscard", node, NULL);
+
+ } else if (streq(fstype, "xfs")) {
+ const char *j;
+
+ j = strjoina("uuid=", suuid);
+ if (discard)
+ (void) execlp(mkfs, mkfs, "-L", label, "-m", j, "-m", "reflink=1", node, NULL);
+ else
+ (void) execlp(mkfs, mkfs, "-L", label, "-m", j, "-m", "reflink=1", "-K", node, NULL);
+
+ } else if (streq(fstype, "vfat")) {
+ char mangled_label[8 + 3 + 1], vol_id[8 + 1];
+
+ /* Classic FAT only allows 11 character uppercase labels */
+ strncpy(mangled_label, label, sizeof(mangled_label)-1);
+ mangled_label[sizeof(mangled_label)-1] = 0;
+ ascii_strupper(mangled_label);
+
+ xsprintf(vol_id, "%08" PRIx32,
+ ((uint32_t) uuid.bytes[0] << 24) |
+ ((uint32_t) uuid.bytes[1] << 16) |
+ ((uint32_t) uuid.bytes[2] << 8) |
+ ((uint32_t) uuid.bytes[3])); /* Take first 32 byte of UUID */
+
+ (void) execlp(mkfs, mkfs,
+ "-i", vol_id,
+ "-n", mangled_label,
+ "-F", "32", /* yes, we force FAT32 here */
+ node, NULL);
+
+ } else if (streq(fstype, "swap")) {
+
+ (void) execlp(mkfs, mkfs,
+ "-L", label,
+ "-U", suuid,
+ node, NULL);
+
+ } else
+ /* Generic fallback for all other file systems */
+ (void) execlp(mkfs, mkfs, node, NULL);
+
+ log_error_errno(errno, "Failed to execute %s: %m", mkfs);
+
+ _exit(EXIT_FAILURE);
+ }
+
+ return 0;
+}
diff --git a/src/shared/mkfs-util.h b/src/shared/mkfs-util.h
new file mode 100644
index 0000000..7647afb
--- /dev/null
+++ b/src/shared/mkfs-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-id128.h"
+
+int mkfs_exists(const char *fstype);
+
+int make_filesystem(const char *node, const char *fstype, const char *label, sd_id128_t uuid, bool discard);
diff --git a/src/shared/module-util.c b/src/shared/module-util.c
new file mode 100644
index 0000000..587e636
--- /dev/null
+++ b/src/shared/module-util.c
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "module-util.h"
+
+int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose) {
+ const int probe_flags = KMOD_PROBE_APPLY_BLACKLIST;
+ struct kmod_list *itr;
+ _cleanup_(kmod_module_unref_listp) struct kmod_list *modlist = NULL;
+ int r;
+
+ /* verbose==true means we should log at non-debug level if we
+ * fail to find or load the module. */
+
+ log_debug("Loading module: %s", module);
+
+ r = kmod_module_new_from_lookup(ctx, module, &modlist);
+ if (r < 0)
+ return log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r,
+ "Failed to look up module alias '%s': %m", module);
+
+ if (!modlist) {
+ log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r,
+ "Failed to find module '%s'", module);
+ return -ENOENT;
+ }
+
+ kmod_list_foreach(itr, modlist) {
+ _cleanup_(kmod_module_unrefp) struct kmod_module *mod = NULL;
+ int state, err;
+
+ mod = kmod_module_get_module(itr);
+ state = kmod_module_get_initstate(mod);
+
+ switch (state) {
+ case KMOD_MODULE_BUILTIN:
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Module '%s' is built in", kmod_module_get_name(mod));
+ break;
+
+ case KMOD_MODULE_LIVE:
+ log_debug("Module '%s' is already loaded", kmod_module_get_name(mod));
+ break;
+
+ default:
+ err = kmod_module_probe_insert_module(mod, probe_flags,
+ NULL, NULL, NULL, NULL);
+ if (err == 0)
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Inserted module '%s'", kmod_module_get_name(mod));
+ else if (err == KMOD_PROBE_APPLY_BLACKLIST)
+ log_full(verbose ? LOG_INFO : LOG_DEBUG,
+ "Module '%s' is deny-listed", kmod_module_get_name(mod));
+ else {
+ assert(err < 0);
+
+ log_full_errno(!verbose ? LOG_DEBUG :
+ err == -ENODEV ? LOG_NOTICE :
+ err == -ENOENT ? LOG_WARNING :
+ LOG_ERR,
+ err,
+ "Failed to insert module '%s': %m",
+ kmod_module_get_name(mod));
+ if (!IN_SET(err, -ENODEV, -ENOENT))
+ r = err;
+ }
+ }
+ }
+
+ return r;
+}
diff --git a/src/shared/module-util.h b/src/shared/module-util.h
new file mode 100644
index 0000000..4db8c5f
--- /dev/null
+++ b/src/shared/module-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <libkmod.h>
+
+#include "macro.h"
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_ctx*, kmod_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_module*, kmod_module_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_list*, kmod_module_unref_list);
+
+int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose);
diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c
new file mode 100644
index 0000000..b19b384
--- /dev/null
+++ b/src/shared/mount-util.c
@@ -0,0 +1,744 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hashmap.h"
+#include "libmount-util.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "set.h"
+#include "stat-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int mount_fd(const char *source,
+ int target_fd,
+ const char *filesystemtype,
+ unsigned long mountflags,
+ const void *data) {
+
+ char path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
+
+ xsprintf(path, "/proc/self/fd/%i", target_fd);
+ if (mount(source, path, filesystemtype, mountflags, data) < 0) {
+ if (errno != ENOENT)
+ return -errno;
+
+ /* ENOENT can mean two things: either that the source is missing, or that /proc/ isn't
+ * mounted. Check for the latter to generate better error messages. */
+ if (proc_mounted() == 0)
+ return -ENOSYS;
+
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+int mount_nofollow(
+ const char *source,
+ const char *target,
+ const char *filesystemtype,
+ unsigned long mountflags,
+ const void *data) {
+
+ _cleanup_close_ int fd = -1;
+
+ /* In almost all cases we want to manipulate the mount table without following symlinks, hence
+ * mount_nofollow() is usually the way to go. The only exceptions are environments where /proc/ is
+ * not available yet, since we need /proc/self/fd/ for this logic to work. i.e. during the early
+ * initialization of namespacing/container stuff where /proc is not yet mounted (and maybe even the
+ * fs to mount) we can only use traditional mount() directly.
+ *
+ * Note that this disables following only for the final component of the target, i.e symlinks within
+ * the path of the target are honoured, as are symlinks in the source path everywhere. */
+
+ fd = open(target, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ return mount_fd(source, fd, filesystemtype, mountflags, data);
+}
+
+int umount_recursive(const char *prefix, int flags) {
+ int n = 0, r;
+ bool again;
+
+ /* Try to umount everything recursively below a
+ * directory. Also, take care of stacked mounts, and keep
+ * unmounting them until they are gone. */
+
+ do {
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+
+ again = false;
+
+ r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
+
+ for (;;) {
+ struct libmnt_fs *fs;
+ const char *path;
+
+ r = mnt_table_next_fs(table, iter, &fs);
+ if (r == 1)
+ break;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
+
+ path = mnt_fs_get_target(fs);
+ if (!path)
+ continue;
+
+ if (!path_startswith(path, prefix))
+ continue;
+
+ if (umount2(path, flags | UMOUNT_NOFOLLOW) < 0) {
+ log_debug_errno(errno, "Failed to umount %s, ignoring: %m", path);
+ continue;
+ }
+
+ log_debug("Successfully unmounted %s", path);
+
+ again = true;
+ n++;
+
+ break;
+ }
+ } while (again);
+
+ return n;
+}
+
+static int get_mount_flags(
+ struct libmnt_table *table,
+ const char *path,
+ unsigned long *ret) {
+
+ _cleanup_close_ int fd = -1;
+ struct libmnt_fs *fs;
+ struct statvfs buf;
+ const char *opts;
+ int r;
+
+ /* Get the mount flags for the mountpoint at "path" from "table". We have a fallback using statvfs()
+ * in place (which provides us with mostly the same info), but it's just a fallback, since using it
+ * means triggering autofs or NFS mounts, which we'd rather avoid needlessly.
+ *
+ * This generally doesn't follow symlinks. */
+
+ fs = mnt_table_find_target(table, path, MNT_ITER_FORWARD);
+ if (!fs) {
+ log_debug("Could not find '%s' in mount table, ignoring.", path);
+ goto fallback;
+ }
+
+ opts = mnt_fs_get_vfs_options(fs);
+ if (!opts) {
+ *ret = 0;
+ return 0;
+ }
+
+ r = mnt_optstr_get_flags(opts, ret, mnt_get_builtin_optmap(MNT_LINUX_MAP));
+ if (r != 0) {
+ log_debug_errno(r, "Could not get flags for '%s', ignoring: %m", path);
+ goto fallback;
+ }
+
+ /* MS_RELATIME is default and trying to set it in an unprivileged container causes EPERM */
+ *ret &= ~MS_RELATIME;
+ return 0;
+
+fallback:
+ fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
+ if (fd < 0)
+ return -errno;
+
+ if (fstatvfs(fd, &buf) < 0)
+ return -errno;
+
+ /* The statvfs() flags and the mount flags mostly have the same values, but for some cases do
+ * not. Hence map the flags manually. (Strictly speaking, ST_RELATIME/MS_RELATIME is the most
+ * prominent one that doesn't match, but that's the one we mask away anyway, see above.) */
+
+ *ret =
+ FLAGS_SET(buf.f_flag, ST_RDONLY) * MS_RDONLY |
+ FLAGS_SET(buf.f_flag, ST_NODEV) * MS_NODEV |
+ FLAGS_SET(buf.f_flag, ST_NOEXEC) * MS_NOEXEC |
+ FLAGS_SET(buf.f_flag, ST_NOSUID) * MS_NOSUID |
+ FLAGS_SET(buf.f_flag, ST_NOATIME) * MS_NOATIME |
+ FLAGS_SET(buf.f_flag, ST_NODIRATIME) * MS_NODIRATIME;
+
+ return 0;
+}
+
+/* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it
+ * for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */
+int bind_remount_recursive_with_mountinfo(
+ const char *prefix,
+ unsigned long new_flags,
+ unsigned long flags_mask,
+ char **deny_list,
+ FILE *proc_self_mountinfo) {
+
+ _cleanup_set_free_free_ Set *done = NULL;
+ _cleanup_free_ char *simplified = NULL;
+ int r;
+
+ assert(prefix);
+ assert(proc_self_mountinfo);
+
+ /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
+ * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
+ * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
+ * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
+ * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
+ * do not have any effect on future submounts that might get propagated, they might be writable. This includes
+ * future submounts that have been triggered via autofs.
+ *
+ * If the "deny_list" parameter is specified it may contain a list of subtrees to exclude from the
+ * remount operation. Note that we'll ignore the deny list for the top-level path. */
+
+ simplified = strdup(prefix);
+ if (!simplified)
+ return -ENOMEM;
+
+ path_simplify(simplified, false);
+
+ done = set_new(&path_hash_ops);
+ if (!done)
+ return -ENOMEM;
+
+ for (;;) {
+ _cleanup_set_free_free_ Set *todo = NULL;
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+ bool top_autofs = false;
+ char *x;
+ unsigned long orig_flags;
+
+ todo = set_new(&path_hash_ops);
+ if (!todo)
+ return -ENOMEM;
+
+ rewind(proc_self_mountinfo);
+
+ r = libmount_parse("/proc/self/mountinfo", proc_self_mountinfo, &table, &iter);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
+
+ for (;;) {
+ struct libmnt_fs *fs;
+ const char *path, *type;
+
+ r = mnt_table_next_fs(table, iter, &fs);
+ if (r == 1)
+ break;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
+
+ path = mnt_fs_get_target(fs);
+ type = mnt_fs_get_fstype(fs);
+ if (!path || !type)
+ continue;
+
+ if (!path_startswith(path, simplified))
+ continue;
+
+ /* Ignore this mount if it is deny-listed, but only if it isn't the top-level mount
+ * we shall operate on. */
+ if (!path_equal(path, simplified)) {
+ bool deny_listed = false;
+ char **i;
+
+ STRV_FOREACH(i, deny_list) {
+ if (path_equal(*i, simplified))
+ continue;
+
+ if (!path_startswith(*i, simplified))
+ continue;
+
+ if (path_startswith(path, *i)) {
+ deny_listed = true;
+ log_debug("Not remounting %s deny-listed by %s, called for %s",
+ path, *i, simplified);
+ break;
+ }
+ }
+ if (deny_listed)
+ continue;
+ }
+
+ /* Let's ignore autofs mounts. If they aren't
+ * triggered yet, we want to avoid triggering
+ * them, as we don't make any guarantees for
+ * future submounts anyway. If they are
+ * already triggered, then we will find
+ * another entry for this. */
+ if (streq(type, "autofs")) {
+ top_autofs = top_autofs || path_equal(path, simplified);
+ continue;
+ }
+
+ if (!set_contains(done, path)) {
+ r = set_put_strdup(&todo, path);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ /* If we have no submounts to process anymore and if
+ * the root is either already done, or an autofs, we
+ * are done */
+ if (set_isempty(todo) &&
+ (top_autofs || set_contains(done, simplified)))
+ return 0;
+
+ if (!set_contains(done, simplified) &&
+ !set_contains(todo, simplified)) {
+ /* The prefix directory itself is not yet a mount, make it one. */
+ r = mount_nofollow(simplified, simplified, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ orig_flags = 0;
+ (void) get_mount_flags(table, simplified, &orig_flags);
+
+ r = mount_nofollow(NULL, simplified, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL);
+ if (r < 0)
+ return r;
+
+ log_debug("Made top-level directory %s a mount point.", prefix);
+
+ r = set_put_strdup(&done, simplified);
+ if (r < 0)
+ return r;
+ }
+
+ while ((x = set_steal_first(todo))) {
+
+ r = set_consume(done, x);
+ if (IN_SET(r, 0, -EEXIST))
+ continue;
+ if (r < 0)
+ return r;
+
+ /* Deal with mount points that are obstructed by a later mount */
+ r = path_is_mount_point(x, NULL, 0);
+ if (IN_SET(r, 0, -ENOENT))
+ continue;
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ return r;
+
+ /* Even if root user invoke this, submounts under private FUSE or NFS mount points
+ * may not be acceessed. E.g.,
+ *
+ * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
+ * $ bindfs --no-allow-other ~/mnt ~/mnt
+ *
+ * Then, root user cannot access the mount point ~/mnt/mnt.
+ * In such cases, the submounts are ignored, as we have no way to manage them. */
+ log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
+ continue;
+ }
+
+ /* Try to reuse the original flag set */
+ orig_flags = 0;
+ (void) get_mount_flags(table, x, &orig_flags);
+
+ r = mount_nofollow(NULL, x, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL);
+ if (r < 0)
+ return r;
+
+ log_debug("Remounted %s read-only.", x);
+ }
+ }
+}
+
+int bind_remount_recursive(
+ const char *prefix,
+ unsigned long new_flags,
+ unsigned long flags_mask,
+ char **deny_list) {
+
+ _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+ int r;
+
+ r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
+ if (r < 0)
+ return r;
+
+ return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, deny_list, proc_self_mountinfo);
+}
+
+int bind_remount_one_with_mountinfo(
+ const char *path,
+ unsigned long new_flags,
+ unsigned long flags_mask,
+ FILE *proc_self_mountinfo) {
+
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ unsigned long orig_flags = 0;
+ int r;
+
+ assert(path);
+ assert(proc_self_mountinfo);
+
+ rewind(proc_self_mountinfo);
+
+ table = mnt_new_table();
+ if (!table)
+ return -ENOMEM;
+
+ r = mnt_table_parse_stream(table, proc_self_mountinfo, "/proc/self/mountinfo");
+ if (r < 0)
+ return r;
+
+ /* Try to reuse the original flag set */
+ (void) get_mount_flags(table, path, &orig_flags);
+
+ r = mount_nofollow(NULL, path, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int mount_move_root(const char *path) {
+ assert(path);
+
+ if (chdir(path) < 0)
+ return -errno;
+
+ if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
+ return -errno;
+
+ if (chroot(".") < 0)
+ return -errno;
+
+ if (chdir("/") < 0)
+ return -errno;
+
+ return 0;
+}
+
+int repeat_unmount(const char *path, int flags) {
+ bool done = false;
+
+ assert(path);
+
+ /* If there are multiple mounts on a mount point, this
+ * removes them all */
+
+ for (;;) {
+ if (umount2(path, flags) < 0) {
+
+ if (errno == EINVAL)
+ return done;
+
+ return -errno;
+ }
+
+ done = true;
+ }
+}
+
+int mode_to_inaccessible_node(
+ const char *runtime_dir,
+ mode_t mode,
+ char **ret) {
+
+ /* This function maps a node type to a corresponding inaccessible file node. These nodes are created
+ * during early boot by PID 1. In some cases we lacked the privs to create the character and block
+ * devices (maybe because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a
+ * devices policy that excludes device nodes with major and minor of 0), but that's fine, in that
+ * case we use an AF_UNIX file node instead, which is not the same, but close enough for most
+ * uses. And most importantly, the kernel allows bind mounts from socket nodes to any non-directory
+ * file nodes, and that's the most important thing that matters.
+ *
+ * Note that the runtime directory argument shall be the top-level runtime directory, i.e. /run/ if
+ * we operate in system context and $XDG_RUNTIME_DIR if we operate in user context. */
+
+ _cleanup_free_ char *d = NULL;
+ const char *node = NULL;
+
+ assert(ret);
+
+ if (!runtime_dir)
+ runtime_dir = "/run";
+
+ switch(mode & S_IFMT) {
+ case S_IFREG:
+ node = "/systemd/inaccessible/reg";
+ break;
+
+ case S_IFDIR:
+ node = "/systemd/inaccessible/dir";
+ break;
+
+ case S_IFCHR:
+ node = "/systemd/inaccessible/chr";
+ break;
+
+ case S_IFBLK:
+ node = "/systemd/inaccessible/blk";
+ break;
+
+ case S_IFIFO:
+ node = "/systemd/inaccessible/fifo";
+ break;
+
+ case S_IFSOCK:
+ node = "/systemd/inaccessible/sock";
+ break;
+ }
+ if (!node)
+ return -EINVAL;
+
+ d = path_join(runtime_dir, node);
+ if (!d)
+ return -ENOMEM;
+
+ /* On new kernels unprivileged users are permitted to create 0:0 char device nodes (because they also
+ * act as whiteout inode for overlayfs), but no other char or block device nodes. On old kernels no
+ * device node whatsoever may be created by unprivileged processes. Hence, if the caller asks for the
+ * inaccessible block device node let's see if the block device node actually exists, and if not,
+ * fall back to the character device node. From there fall back to the socket device node. This means
+ * in the best case we'll get the right device node type — but if not we'll hopefully at least get a
+ * device node at all. */
+
+ if (S_ISBLK(mode) &&
+ access(d, F_OK) < 0 && errno == ENOENT) {
+ free(d);
+ d = path_join(runtime_dir, "/systemd/inaccessible/chr");
+ if (!d)
+ return -ENOMEM;
+ }
+
+ if (IN_SET(mode & S_IFMT, S_IFBLK, S_IFCHR) &&
+ access(d, F_OK) < 0 && errno == ENOENT) {
+ free(d);
+ d = path_join(runtime_dir, "/systemd/inaccessible/sock");
+ if (!d)
+ return -ENOMEM;
+ }
+
+ *ret = TAKE_PTR(d);
+ return 0;
+}
+
+#define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
+static char* mount_flags_to_string(long unsigned flags) {
+ char *x;
+ _cleanup_free_ char *y = NULL;
+ long unsigned overflow;
+
+ overflow = flags & ~(MS_RDONLY |
+ MS_NOSUID |
+ MS_NODEV |
+ MS_NOEXEC |
+ MS_SYNCHRONOUS |
+ MS_REMOUNT |
+ MS_MANDLOCK |
+ MS_DIRSYNC |
+ MS_NOATIME |
+ MS_NODIRATIME |
+ MS_BIND |
+ MS_MOVE |
+ MS_REC |
+ MS_SILENT |
+ MS_POSIXACL |
+ MS_UNBINDABLE |
+ MS_PRIVATE |
+ MS_SLAVE |
+ MS_SHARED |
+ MS_RELATIME |
+ MS_KERNMOUNT |
+ MS_I_VERSION |
+ MS_STRICTATIME |
+ MS_LAZYTIME);
+
+ if (flags == 0 || overflow != 0)
+ if (asprintf(&y, "%lx", overflow) < 0)
+ return NULL;
+
+ x = strjoin(FLAG(MS_RDONLY),
+ FLAG(MS_NOSUID),
+ FLAG(MS_NODEV),
+ FLAG(MS_NOEXEC),
+ FLAG(MS_SYNCHRONOUS),
+ FLAG(MS_REMOUNT),
+ FLAG(MS_MANDLOCK),
+ FLAG(MS_DIRSYNC),
+ FLAG(MS_NOATIME),
+ FLAG(MS_NODIRATIME),
+ FLAG(MS_BIND),
+ FLAG(MS_MOVE),
+ FLAG(MS_REC),
+ FLAG(MS_SILENT),
+ FLAG(MS_POSIXACL),
+ FLAG(MS_UNBINDABLE),
+ FLAG(MS_PRIVATE),
+ FLAG(MS_SLAVE),
+ FLAG(MS_SHARED),
+ FLAG(MS_RELATIME),
+ FLAG(MS_KERNMOUNT),
+ FLAG(MS_I_VERSION),
+ FLAG(MS_STRICTATIME),
+ FLAG(MS_LAZYTIME),
+ y);
+ if (!x)
+ return NULL;
+ if (!y)
+ x[strlen(x) - 1] = '\0'; /* truncate the last | */
+ return x;
+}
+
+int mount_verbose_full(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options,
+ bool follow_symlink) {
+
+ _cleanup_free_ char *fl = NULL, *o = NULL;
+ unsigned long f;
+ int r;
+
+ r = mount_option_mangle(options, flags, &f, &o);
+ if (r < 0)
+ return log_full_errno(error_log_level, r,
+ "Failed to mangle mount options %s: %m",
+ strempty(options));
+
+ fl = mount_flags_to_string(f);
+
+ if ((f & MS_REMOUNT) && !what && !type)
+ log_debug("Remounting %s (%s \"%s\")...",
+ where, strnull(fl), strempty(o));
+ else if (!what && !type)
+ log_debug("Mounting %s (%s \"%s\")...",
+ where, strnull(fl), strempty(o));
+ else if ((f & MS_BIND) && !type)
+ log_debug("Bind-mounting %s on %s (%s \"%s\")...",
+ what, where, strnull(fl), strempty(o));
+ else if (f & MS_MOVE)
+ log_debug("Moving mount %s → %s (%s \"%s\")...",
+ what, where, strnull(fl), strempty(o));
+ else
+ log_debug("Mounting %s (%s) on %s (%s \"%s\")...",
+ strna(what), strna(type), where, strnull(fl), strempty(o));
+
+ if (follow_symlink)
+ r = mount(what, where, type, f, o) < 0 ? -errno : 0;
+ else
+ r = mount_nofollow(what, where, type, f, o);
+ if (r < 0)
+ return log_full_errno(error_log_level, r,
+ "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
+ strna(what), strna(type), where, strnull(fl), strempty(o));
+ return 0;
+}
+
+int umount_verbose(
+ int error_log_level,
+ const char *what,
+ int flags) {
+
+ assert(what);
+
+ log_debug("Umounting %s...", what);
+
+ if (umount2(what, flags) < 0)
+ return log_full_errno(error_log_level, errno,
+ "Failed to unmount %s: %m", what);
+
+ return 0;
+}
+
+int mount_option_mangle(
+ const char *options,
+ unsigned long mount_flags,
+ unsigned long *ret_mount_flags,
+ char **ret_remaining_options) {
+
+ const struct libmnt_optmap *map;
+ _cleanup_free_ char *ret = NULL;
+ const char *p;
+ int r;
+
+ /* This extracts mount flags from the mount options, and store
+ * non-mount-flag options to '*ret_remaining_options'.
+ * E.g.,
+ * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
+ * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
+ * "size=1630748k,mode=700,uid=1000,gid=1000".
+ * See more examples in test-mount-utils.c.
+ *
+ * Note that if 'options' does not contain any non-mount-flag options,
+ * then '*ret_remaining_options' is set to NULL instead of empty string.
+ * Note that this does not check validity of options stored in
+ * '*ret_remaining_options'.
+ * Note that if 'options' is NULL, then this just copies 'mount_flags'
+ * to '*ret_mount_flags'. */
+
+ assert(ret_mount_flags);
+ assert(ret_remaining_options);
+
+ map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
+ if (!map)
+ return -EINVAL;
+
+ p = options;
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ const struct libmnt_optmap *ent;
+
+ r = extract_first_word(&p, &word, ",", EXTRACT_UNQUOTE);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ for (ent = map; ent->name; ent++) {
+ /* All entries in MNT_LINUX_MAP do not take any argument.
+ * Thus, ent->name does not contain "=" or "[=]". */
+ if (!streq(word, ent->name))
+ continue;
+
+ if (!(ent->mask & MNT_INVERT))
+ mount_flags |= ent->id;
+ else if (mount_flags & ent->id)
+ mount_flags ^= ent->id;
+
+ break;
+ }
+
+ /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
+ if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
+ return -ENOMEM;
+ }
+
+ *ret_mount_flags = mount_flags;
+ *ret_remaining_options = TAKE_PTR(ret);
+
+ return 0;
+}
diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h
new file mode 100644
index 0000000..6202008
--- /dev/null
+++ b/src/shared/mount-util.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <mntent.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "errno-util.h"
+#include "macro.h"
+
+/* 4MB for contents of regular files, 64k inodes for directories, symbolic links and device specials, using
+ * large storage array systems as a baseline */
+#define TMPFS_LIMITS_DEV ",size=4m,nr_inodes=64k"
+
+/* Very little, if any use expected */
+#define TMPFS_LIMITS_EMPTY_OR_ALMOST ",size=4m,nr_inodes=1k"
+#define TMPFS_LIMITS_SYS TMPFS_LIMITS_EMPTY_OR_ALMOST
+#define TMPFS_LIMITS_SYS_FS_CGROUP TMPFS_LIMITS_EMPTY_OR_ALMOST
+
+/* On an extremely small device with only 256MB of RAM, 20% of RAM should be enough for the re-execution of
+ * PID1 because 16MB of free space is required. */
+#define TMPFS_LIMITS_RUN ",size=20%,nr_inodes=800k"
+
+/* The limit used for various nested tmpfs mounts, in paricular for guests started by systemd-nspawn.
+ * 10% of RAM (using 16GB of RAM as a baseline) translates to 400k inodes (assuming 4k each) and 25%
+ * translates to 1M inodes.
+ * (On the host, /tmp is configured through a .mount unit file.) */
+#define NESTED_TMPFS_LIMITS ",size=10%,nr_inodes=400k"
+
+/* More space for volatile root and /var */
+#define TMPFS_LIMITS_VAR ",size=25%,nr_inodes=1m"
+#define TMPFS_LIMITS_ROOTFS TMPFS_LIMITS_VAR
+#define TMPFS_LIMITS_VOLATILE_STATE TMPFS_LIMITS_VAR
+
+int mount_fd(const char *source, int target_fd, const char *filesystemtype, unsigned long mountflags, const void *data);
+int mount_nofollow(const char *source, const char *target, const char *filesystemtype, unsigned long mountflags, const void *data);
+
+int repeat_unmount(const char *path, int flags);
+int umount_recursive(const char *target, int flags);
+int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list);
+int bind_remount_recursive_with_mountinfo(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list, FILE *proc_self_mountinfo);
+int bind_remount_one_with_mountinfo(const char *path, unsigned long new_flags, unsigned long flags_mask, FILE *proc_self_mountinfo);
+
+int mount_move_root(const char *path);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, endmntent);
+#define _cleanup_endmntent_ _cleanup_(endmntentp)
+
+int mount_verbose_full(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options,
+ bool follow_symlink);
+
+static inline int mount_follow_verbose(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options) {
+ return mount_verbose_full(error_log_level, what, where, type, flags, options, true);
+}
+
+static inline int mount_nofollow_verbose(
+ int error_log_level,
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options) {
+ return mount_verbose_full(error_log_level, what, where, type, flags, options, false);
+}
+
+int umount_verbose(
+ int error_log_level,
+ const char *where,
+ int flags);
+
+int mount_option_mangle(
+ const char *options,
+ unsigned long mount_flags,
+ unsigned long *ret_mount_flags,
+ char **ret_remaining_options);
+
+int mode_to_inaccessible_node(const char *runtime_dir, mode_t mode, char **dest);
+
+/* Useful for usage with _cleanup_(), unmounts, removes a directory and frees the pointer */
+static inline char* umount_and_rmdir_and_free(char *p) {
+ PROTECT_ERRNO;
+ (void) umount_recursive(p, 0);
+ (void) rmdir(p);
+ free(p);
+ return NULL;
+}
+DEFINE_TRIVIAL_CLEANUP_FUNC(char*, umount_and_rmdir_and_free);
diff --git a/src/shared/netif-naming-scheme.c b/src/shared/netif-naming-scheme.c
new file mode 100644
index 0000000..df520ab
--- /dev/null
+++ b/src/shared/netif-naming-scheme.c
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "netif-naming-scheme.h"
+#include "proc-cmdline.h"
+#include "string-util.h"
+
+static const NamingScheme naming_schemes[] = {
+ { "v238", NAMING_V238 },
+ { "v239", NAMING_V239 },
+ { "v240", NAMING_V240 },
+ { "v241", NAMING_V241 },
+ { "v243", NAMING_V243 },
+ { "v245", NAMING_V245 },
+ { "v247", NAMING_V247 },
+ /* … add more schemes here, as the logic to name devices is updated … */
+};
+
+static const NamingScheme* naming_scheme_from_name(const char *name) {
+ size_t i;
+
+ if (streq(name, "latest"))
+ return naming_schemes + ELEMENTSOF(naming_schemes) - 1;
+
+ for (i = 0; i < ELEMENTSOF(naming_schemes); i++)
+ if (streq(naming_schemes[i].name, name))
+ return naming_schemes + i;
+
+ return NULL;
+}
+
+const NamingScheme* naming_scheme(void) {
+ static const NamingScheme *cache = NULL;
+ _cleanup_free_ char *buffer = NULL;
+ const char *e, *k;
+
+ if (cache)
+ return cache;
+
+ /* Acquire setting from the kernel command line */
+ (void) proc_cmdline_get_key("net.naming-scheme", 0, &buffer);
+
+ /* Also acquire it from an env var */
+ e = getenv("NET_NAMING_SCHEME");
+ if (e) {
+ if (*e == ':') {
+ /* If prefixed with ':' the kernel cmdline takes precedence */
+ k = buffer ?: e + 1;
+ } else
+ k = e; /* Otherwise the env var takes precedence */
+ } else
+ k = buffer;
+
+ if (k) {
+ cache = naming_scheme_from_name(k);
+ if (cache) {
+ log_info("Using interface naming scheme '%s'.", cache->name);
+ return cache;
+ }
+
+ log_warning("Unknown interface naming scheme '%s' requested, ignoring.", k);
+ }
+
+ cache = naming_scheme_from_name(DEFAULT_NET_NAMING_SCHEME);
+ assert(cache);
+ log_info("Using default interface naming scheme '%s'.", cache->name);
+
+ return cache;
+}
diff --git a/src/shared/netif-naming-scheme.h b/src/shared/netif-naming-scheme.h
new file mode 100644
index 0000000..503a74e
--- /dev/null
+++ b/src/shared/netif-naming-scheme.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+/* So here's the deal: net_id is supposed to be an exercise in providing stable names for network devices. However, we
+ * also want to keep updating the naming scheme used in future versions of net_id. These two goals of course are
+ * contradictory: on one hand we want things to not change and on the other hand we want them to improve. Our way out
+ * of this dilemma is to introduce the "naming scheme" concept: each time we improve the naming logic we define a new
+ * flag for it. Then, we keep a list of schemes, each identified by a name associated with the flags it implements. Via
+ * a kernel command line and environment variable we then allow the user to pick the scheme they want us to follow:
+ * installers could "freeze" the used scheme at the moment of installation this way.
+ *
+ * Developers: each time you tweak the naming logic here, define a new flag below, and condition the tweak with
+ * it. Each time we do a release we'll then add a new scheme entry and include all newly defined flags.
+ *
+ * Note that this is only half a solution to the problem though: not only udev/net_id gets updated all the time, the
+ * kernel gets too. And thus a kernel that previously didn't expose some sysfs attribute we look for might eventually
+ * do, and thus affect our naming scheme too. Thus, enforcing a naming scheme will make interfacing more stable across
+ * OS versions, but not fully stabilize them. */
+typedef enum NamingSchemeFlags {
+ /* First, the individual features */
+ NAMING_SR_IOV_V = 1 << 0, /* Use "v" suffix for SR-IOV, see 609948c7043a */
+ NAMING_NPAR_ARI = 1 << 1, /* Use NPAR "ARI", see 6bc04997b6ea */
+ NAMING_INFINIBAND = 1 << 2, /* Use "ib" prefix for infiniband, see 938d30aa98df */
+ NAMING_ZERO_ACPI_INDEX = 1 << 3, /* Use zero acpi_index field, see d81186ef4f6a */
+ NAMING_ALLOW_RERENAMES = 1 << 4, /* Allow re-renaming of devices, see #9006 */
+ NAMING_STABLE_VIRTUAL_MACS = 1 << 5, /* Use device name to generate MAC, see 6d3646406560 */
+ NAMING_NETDEVSIM = 1 << 6, /* Generate names for netdevsim devices, see eaa9d507d855 */
+ NAMING_LABEL_NOPREFIX = 1 << 7, /* Don't prepend ID_NET_LABEL_ONBOARD with interface type prefix */
+ NAMING_NSPAWN_LONG_HASH = 1 << 8, /* Shorten nspawn interfaces by including 24bit hash, instead of simple truncation */
+ NAMING_BRIDGE_NO_SLOT = 1 << 9, /* Don't use PCI hotplug slot information if the corresponding device is a PCI bridge */
+
+ /* And now the masks that combine the features above */
+ NAMING_V238 = 0,
+ NAMING_V239 = NAMING_V238 | NAMING_SR_IOV_V | NAMING_NPAR_ARI,
+ NAMING_V240 = NAMING_V239 | NAMING_INFINIBAND | NAMING_ZERO_ACPI_INDEX | NAMING_ALLOW_RERENAMES,
+ NAMING_V241 = NAMING_V240 | NAMING_STABLE_VIRTUAL_MACS,
+ NAMING_V243 = NAMING_V241 | NAMING_NETDEVSIM | NAMING_LABEL_NOPREFIX,
+ NAMING_V245 = NAMING_V243 | NAMING_NSPAWN_LONG_HASH,
+ NAMING_V247 = NAMING_V245 | NAMING_BRIDGE_NO_SLOT,
+
+ _NAMING_SCHEME_FLAGS_INVALID = -1,
+} NamingSchemeFlags;
+
+typedef struct NamingScheme {
+ const char *name;
+ NamingSchemeFlags flags;
+} NamingScheme;
+
+const NamingScheme* naming_scheme(void);
+
+static inline bool naming_scheme_has(NamingSchemeFlags flags) {
+ return FLAGS_SET(naming_scheme()->flags, flags);
+}
diff --git a/src/shared/nscd-flush.c b/src/shared/nscd-flush.c
new file mode 100644
index 0000000..dfc47c4
--- /dev/null
+++ b/src/shared/nscd-flush.c
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include <sys/poll.h>
+
+#include "fd-util.h"
+#include "io-util.h"
+#include "nscd-flush.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+#define NSCD_FLUSH_CACHE_TIMEOUT_USEC (5*USEC_PER_SEC)
+
+struct nscdInvalidateRequest {
+ int32_t version;
+ int32_t type; /* in glibc this is an enum. We don't replicate this here 1:1. Also, wtf, how unportable is that
+ * even? */
+ int32_t key_len;
+ char dbname[];
+};
+
+static const union sockaddr_union nscd_sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/nscd/socket",
+};
+
+static int nscd_flush_cache_one(const char *database, usec_t end) {
+ size_t req_size, has_written = 0, has_read = 0, l;
+ struct nscdInvalidateRequest *req;
+ _cleanup_close_ int fd = -1;
+ int32_t resp;
+ int events;
+
+ assert(database);
+
+ l = strlen(database);
+ req_size = offsetof(struct nscdInvalidateRequest, dbname) + l + 1;
+
+ req = alloca(req_size);
+ *req = (struct nscdInvalidateRequest) {
+ .version = 2,
+ .type = 10,
+ .key_len = l + 1,
+ };
+
+ strcpy(req->dbname, database);
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to allocate nscd socket: %m");
+
+ /* Note: connect() returns EINPROGRESS if O_NONBLOCK is set and establishing a connection takes time. The
+ * kernel lets us know this way that the connection is now being established, and we should watch with poll()
+ * to learn when it is fully established. That said, AF_UNIX on Linux never triggers this IRL (connect() is
+ * always instant on AF_UNIX), hence handling this is mostly just an exercise in defensive, protocol-agnostic
+ * programming.
+ *
+ * connect() returns EAGAIN if the socket's backlog limit has been reached. When we see this we give up right
+ * away, after all this entire function here is written in a defensive style so that a non-responding nscd
+ * doesn't stall us for good. (Even if we wanted to handle this better: the Linux kernel doesn't really have a
+ * nice way to connect() to a server synchronously with a time limit that would also cover dealing with the
+ * backlog limit. After all SO_RCVTIMEO and SR_SNDTIMEO don't apply to connect(), and alarm() is frickin' ugly
+ * and not really reasonably usable from threads-aware code.) */
+ if (connect(fd, &nscd_sa.sa, SOCKADDR_UN_LEN(nscd_sa.un)) < 0) {
+ if (errno == EAGAIN)
+ return log_debug_errno(errno, "nscd is overloaded (backlog limit reached) and refuses to take further connections: %m");
+ if (errno != EINPROGRESS)
+ return log_debug_errno(errno, "Failed to connect to nscd socket: %m");
+
+ /* Continue in case of EINPROGRESS, but don't bother with send() or recv() until being notified that
+ * establishing the connection is complete. */
+ events = 0;
+ } else
+ events = POLLIN|POLLOUT; /* Let's assume initially that we can write and read to the fd, to suppress
+ * one poll() invocation */
+ for (;;) {
+ usec_t p;
+
+ if (events & POLLOUT) {
+ ssize_t m;
+
+ assert(has_written < req_size);
+
+ m = send(fd, (uint8_t*) req + has_written, req_size - has_written, MSG_NOSIGNAL);
+ if (m < 0) {
+ if (errno != EAGAIN) /* Note that EAGAIN is returned by the kernel whenever it can't
+ * take the data right now, and that includes if the connect() is
+ * asynchronous and we saw EINPROGRESS on it, and it hasn't
+ * completed yet. */
+ return log_debug_errno(errno, "Failed to write to nscd socket: %m");
+ } else
+ has_written += m;
+ }
+
+ if (events & (POLLIN|POLLERR|POLLHUP)) {
+ ssize_t m;
+
+ if (has_read >= sizeof(resp))
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Response from nscd longer than expected: %m");
+
+ m = recv(fd, (uint8_t*) &resp + has_read, sizeof(resp) - has_read, 0);
+ if (m < 0) {
+ if (errno != EAGAIN)
+ return log_debug_errno(errno, "Failed to read from nscd socket: %m");
+ } else if (m == 0) { /* EOF */
+ if (has_read == 0 && has_written >= req_size) /* Older nscd immediately terminated the
+ * connection, accept that as OK */
+ return 1;
+
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "nscd prematurely ended connection.");
+ } else
+ has_read += m;
+ }
+
+ if (has_written >= req_size && has_read >= sizeof(resp)) { /* done? */
+ if (resp < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "nscd sent us a negative error number: %i", resp);
+ if (resp > 0)
+ return log_debug_errno(resp, "nscd return failure code on invalidating '%s'.", database);
+ return 1;
+ }
+
+ p = now(CLOCK_MONOTONIC);
+ if (p >= end)
+ return -ETIMEDOUT;
+
+ events = fd_wait_for_event(fd, POLLIN | (has_written < req_size ? POLLOUT : 0), end - p);
+ if (events < 0)
+ return events;
+ }
+}
+
+int nscd_flush_cache(char **databases) {
+ usec_t end;
+ int r = 0;
+ char **i;
+
+ /* Tries to invalidate the specified database in nscd. We do this carefully, with a 5s timeout, so that we
+ * don't block indefinitely on another service. */
+
+ end = usec_add(now(CLOCK_MONOTONIC), NSCD_FLUSH_CACHE_TIMEOUT_USEC);
+
+ STRV_FOREACH(i, databases) {
+ int k;
+
+ k = nscd_flush_cache_one(*i, end);
+ if (k < 0 && r >= 0)
+ r = k;
+ }
+
+ return r;
+}
diff --git a/src/shared/nscd-flush.h b/src/shared/nscd-flush.h
new file mode 100644
index 0000000..5aafa9a
--- /dev/null
+++ b/src/shared/nscd-flush.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int nscd_flush_cache(char **databases);
diff --git a/src/shared/nsflags.c b/src/shared/nsflags.c
new file mode 100644
index 0000000..2845041
--- /dev/null
+++ b/src/shared/nsflags.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "nsflags.h"
+#include "string-util.h"
+
+const struct namespace_flag_map namespace_flag_map[] = {
+ { CLONE_NEWCGROUP, "cgroup" },
+ { CLONE_NEWIPC, "ipc" },
+ { CLONE_NEWNET, "net" },
+ /* So, the mount namespace flag is called CLONE_NEWNS for historical reasons. Let's expose it here under a more
+ * explanatory name: "mnt". This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
+ { CLONE_NEWNS, "mnt" },
+ { CLONE_NEWPID, "pid" },
+ { CLONE_NEWUSER, "user" },
+ { CLONE_NEWUTS, "uts" },
+ {}
+};
+
+int namespace_flags_from_string(const char *name, unsigned long *ret) {
+ unsigned long flags = 0;
+ int r;
+
+ assert_se(ret);
+
+ for (;;) {
+ _cleanup_free_ char *word = NULL;
+ unsigned long f = 0;
+ unsigned i;
+
+ r = extract_first_word(&name, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ for (i = 0; namespace_flag_map[i].name; i++)
+ if (streq(word, namespace_flag_map[i].name)) {
+ f = namespace_flag_map[i].flag;
+ break;
+ }
+
+ if (f == 0)
+ return -EINVAL;
+
+ flags |= f;
+ }
+
+ *ret = flags;
+ return 0;
+}
+
+int namespace_flags_to_string(unsigned long flags, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ unsigned i;
+
+ for (i = 0; namespace_flag_map[i].name; i++) {
+ if ((flags & namespace_flag_map[i].flag) != namespace_flag_map[i].flag)
+ continue;
+
+ if (!strextend_with_separator(&s, " ", namespace_flag_map[i].name, NULL))
+ return -ENOMEM;
+ }
+
+ *ret = TAKE_PTR(s);
+
+ return 0;
+}
diff --git a/src/shared/nsflags.h b/src/shared/nsflags.h
new file mode 100644
index 0000000..3d774c7
--- /dev/null
+++ b/src/shared/nsflags.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "missing_sched.h"
+
+/* The combination of all namespace flags defined by the kernel. The right type for this isn't clear. setns() and
+ * unshare() expect these flags to be passed as (signed) "int", while clone() wants them as "unsigned long". The latter
+ * is definitely more appropriate for a flags parameter, and also the larger type of the two, hence let's stick to that
+ * here. */
+#define NAMESPACE_FLAGS_ALL \
+ ((unsigned long) (CLONE_NEWCGROUP| \
+ CLONE_NEWIPC| \
+ CLONE_NEWNET| \
+ CLONE_NEWNS| \
+ CLONE_NEWPID| \
+ CLONE_NEWUSER| \
+ CLONE_NEWUTS))
+
+#define NAMESPACE_FLAGS_INITIAL ((unsigned long) -1)
+
+int namespace_flags_from_string(const char *name, unsigned long *ret);
+int namespace_flags_to_string(unsigned long flags, char **ret);
+
+struct namespace_flag_map {
+ unsigned long flag;
+ const char *name;
+};
+
+extern const struct namespace_flag_map namespace_flag_map[];
diff --git a/src/shared/numa-util.c b/src/shared/numa-util.c
new file mode 100644
index 0000000..7e41d68
--- /dev/null
+++ b/src/shared/numa-util.c
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <sched.h>
+
+#include "alloc-util.h"
+#include "cpu-set-util.h"
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "macro.h"
+#include "missing_syscall.h"
+#include "numa-util.h"
+#include "stdio-util.h"
+#include "string-table.h"
+
+bool numa_policy_is_valid(const NUMAPolicy *policy) {
+ assert(policy);
+
+ if (!mpol_is_valid(numa_policy_get_type(policy)))
+ return false;
+
+ if (!policy->nodes.set &&
+ !IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL, MPOL_PREFERRED))
+ return false;
+
+ if (policy->nodes.set &&
+ numa_policy_get_type(policy) == MPOL_PREFERRED &&
+ CPU_COUNT_S(policy->nodes.allocated, policy->nodes.set) != 1)
+ return false;
+
+ return true;
+}
+
+static int numa_policy_to_mempolicy(const NUMAPolicy *policy, unsigned long *ret_maxnode, unsigned long **ret_nodes) {
+ unsigned node, bits = 0, ulong_bits;
+ _cleanup_free_ unsigned long *out = NULL;
+
+ assert(policy);
+ assert(ret_maxnode);
+ assert(ret_nodes);
+
+ if (IN_SET(numa_policy_get_type(policy), MPOL_DEFAULT, MPOL_LOCAL) ||
+ (numa_policy_get_type(policy) == MPOL_PREFERRED && !policy->nodes.set)) {
+ *ret_nodes = NULL;
+ *ret_maxnode = 0;
+ return 0;
+ }
+
+ bits = policy->nodes.allocated * 8;
+ ulong_bits = sizeof(unsigned long) * 8;
+
+ out = new0(unsigned long, DIV_ROUND_UP(policy->nodes.allocated, sizeof(unsigned long)));
+ if (!out)
+ return -ENOMEM;
+
+ /* We don't make any assumptions about internal type libc is using to store NUMA node mask.
+ Hence we need to convert the node mask to the representation expected by set_mempolicy() */
+ for (node = 0; node < bits; node++)
+ if (CPU_ISSET_S(node, policy->nodes.allocated, policy->nodes.set))
+ out[node / ulong_bits] |= 1ul << (node % ulong_bits);
+
+ *ret_nodes = TAKE_PTR(out);
+ *ret_maxnode = bits + 1;
+ return 0;
+}
+
+int apply_numa_policy(const NUMAPolicy *policy) {
+ int r;
+ _cleanup_free_ unsigned long *nodes = NULL;
+ unsigned long maxnode;
+
+ assert(policy);
+
+ if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
+ return -EOPNOTSUPP;
+
+ if (!numa_policy_is_valid(policy))
+ return -EINVAL;
+
+ r = numa_policy_to_mempolicy(policy, &maxnode, &nodes);
+ if (r < 0)
+ return r;
+
+ r = set_mempolicy(numa_policy_get_type(policy), nodes, maxnode);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *ret) {
+ int r;
+ size_t i;
+ _cleanup_(cpu_set_reset) CPUSet s = {};
+
+ assert(policy);
+ assert(ret);
+
+ for (i = 0; i < policy->nodes.allocated * 8; i++) {
+ _cleanup_free_ char *l = NULL;
+ char p[STRLEN("/sys/devices/system/node/node//cpulist") + DECIMAL_STR_MAX(size_t) + 1];
+ _cleanup_(cpu_set_reset) CPUSet part = {};
+
+ if (!CPU_ISSET_S(i, policy->nodes.allocated, policy->nodes.set))
+ continue;
+
+ xsprintf(p, "/sys/devices/system/node/node%zu/cpulist", i);
+
+ r = read_one_line_file(p, &l);
+ if (r < 0)
+ return r;
+
+ r = parse_cpu_set(l, &part);
+ if (r < 0)
+ return r;
+
+ r = cpu_set_add_all(&s, &part);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = s;
+ s = (CPUSet) {};
+
+ return 0;
+}
+
+static int numa_max_node(void) {
+ _cleanup_closedir_ DIR *d = NULL;
+ struct dirent *de;
+ int r, max_node = 0;
+
+ d = opendir("/sys/devices/system/node");
+ if (!d)
+ return -errno;
+
+ FOREACH_DIRENT(de, d, break) {
+ int node;
+ const char *n;
+
+ (void) dirent_ensure_type(d, de);
+
+ if (de->d_type != DT_DIR)
+ continue;
+
+ n = startswith(de->d_name, "node");
+ if (!n)
+ continue;
+
+ r = safe_atoi(n, &node);
+ if (r < 0)
+ continue;
+
+ if (node > max_node)
+ max_node = node;
+ }
+
+ return max_node;
+}
+
+int numa_mask_add_all(CPUSet *mask) {
+ int m;
+
+ assert(mask);
+
+ m = numa_max_node();
+ if (m < 0) {
+ log_debug_errno(m, "Failed to determine maximum NUMA node index, assuming 1023: %m");
+ m = 1023; /* CONFIG_NODES_SHIFT is set to 10 on x86_64, i.e. 1024 NUMA nodes in total */
+ }
+
+ for (int i = 0; i <= m; i++) {
+ int r;
+
+ r = cpu_set_add(mask, i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static const char* const mpol_table[] = {
+ [MPOL_DEFAULT] = "default",
+ [MPOL_PREFERRED] = "preferred",
+ [MPOL_BIND] = "bind",
+ [MPOL_INTERLEAVE] = "interleave",
+ [MPOL_LOCAL] = "local",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mpol, int);
diff --git a/src/shared/numa-util.h b/src/shared/numa-util.h
new file mode 100644
index 0000000..2f736c9
--- /dev/null
+++ b/src/shared/numa-util.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "cpu-set-util.h"
+#include "missing_syscall.h"
+
+static inline bool mpol_is_valid(int t) {
+ return t >= MPOL_DEFAULT && t <= MPOL_LOCAL;
+}
+
+typedef struct NUMAPolicy {
+ /* Always use numa_policy_get_type() to read the value */
+ int type;
+ CPUSet nodes;
+} NUMAPolicy;
+
+bool numa_policy_is_valid(const NUMAPolicy *p);
+
+static inline int numa_policy_get_type(const NUMAPolicy *p) {
+ return p->type < 0 ? (p->nodes.set ? MPOL_PREFERRED : -1) : p->type;
+}
+
+static inline void numa_policy_reset(NUMAPolicy *p) {
+ assert(p);
+ cpu_set_reset(&p->nodes);
+ p->type = -1;
+}
+
+int apply_numa_policy(const NUMAPolicy *policy);
+int numa_to_cpu_set(const NUMAPolicy *policy, CPUSet *set);
+
+int numa_mask_add_all(CPUSet *mask);
+
+const char* mpol_to_string(int i) _const_;
+int mpol_from_string(const char *s) _pure_;
diff --git a/src/shared/offline-passwd.c b/src/shared/offline-passwd.c
new file mode 100644
index 0000000..b607aac
--- /dev/null
+++ b/src/shared/offline-passwd.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fd-util.h"
+#include "fs-util.h"
+#include "offline-passwd.h"
+#include "path-util.h"
+#include "user-util.h"
+
+DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(uid_gid_hash_ops, char, string_hash_func, string_compare_func, free);
+
+static int open_passwd_file(const char *root, const char *fname, FILE **ret_file) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+
+ fd = chase_symlinks_and_open(fname, root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC, &p);
+ if (fd < 0)
+ return fd;
+
+ FILE *f = fdopen(fd, "r");
+ if (!f)
+ return -errno;
+
+ TAKE_FD(fd);
+
+ log_debug("Reading %s entries from %s...", basename(fname), p);
+
+ *ret_file = f;
+ return 0;
+}
+
+static int populate_uid_cache(const char *root, Hashmap **ret) {
+ _cleanup_(hashmap_freep) Hashmap *cache = NULL;
+ int r;
+
+ cache = hashmap_new(&uid_gid_hash_ops);
+ if (!cache)
+ return -ENOMEM;
+
+ /* The directory list is hardcoded here: /etc is the standard, and rpm-ostree uses /usr/lib. This
+ * could be made configurable, but I don't see the point right now. */
+
+ const char *fname;
+ FOREACH_STRING(fname, "/etc/passwd", "/usr/lib/passwd") {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ r = open_passwd_file(root, fname, &f);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+
+ struct passwd *pw;
+ while ((r = fgetpwent_sane(f, &pw)) > 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = strdup(pw->pw_name);
+ if (!n)
+ return -ENOMEM;
+
+ r = hashmap_put(cache, n, UID_TO_PTR(pw->pw_uid));
+ if (IN_SET(r, 0 -EEXIST))
+ continue;
+ if (r < 0)
+ return r;
+ TAKE_PTR(n);
+ }
+ }
+
+ *ret = TAKE_PTR(cache);
+ return 0;
+}
+
+static int populate_gid_cache(const char *root, Hashmap **ret) {
+ _cleanup_(hashmap_freep) Hashmap *cache = NULL;
+ int r;
+
+ cache = hashmap_new(&uid_gid_hash_ops);
+ if (!cache)
+ return -ENOMEM;
+
+ const char *fname;
+ FOREACH_STRING(fname, "/etc/group", "/usr/lib/group") {
+ _cleanup_fclose_ FILE *f = NULL;
+
+ r = open_passwd_file(root, fname, &f);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+
+ struct group *gr;
+ while ((r = fgetgrent_sane(f, &gr)) > 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = strdup(gr->gr_name);
+ if (!n)
+ return -ENOMEM;
+
+ r = hashmap_put(cache, n, GID_TO_PTR(gr->gr_gid));
+ if (IN_SET(r, 0, -EEXIST))
+ continue;
+ if (r < 0)
+ return r;
+ TAKE_PTR(n);
+ }
+ }
+
+ *ret = TAKE_PTR(cache);
+ return 0;
+}
+
+int name_to_uid_offline(
+ const char *root,
+ const char *user,
+ uid_t *ret_uid,
+ Hashmap **cache) {
+
+ void *found;
+ int r;
+
+ assert(user);
+ assert(ret_uid);
+ assert(cache);
+
+ if (!*cache) {
+ r = populate_uid_cache(root, cache);
+ if (r < 0)
+ return r;
+ }
+
+ found = hashmap_get(*cache, user);
+ if (!found)
+ return -ESRCH;
+
+ *ret_uid = PTR_TO_UID(found);
+ return 0;
+}
+
+int name_to_gid_offline(
+ const char *root,
+ const char *group,
+ gid_t *ret_gid,
+ Hashmap **cache) {
+
+ void *found;
+ int r;
+
+ assert(group);
+ assert(ret_gid);
+ assert(cache);
+
+ if (!*cache) {
+ r = populate_gid_cache(root, cache);
+ if (r < 0)
+ return r;
+ }
+
+ found = hashmap_get(*cache, group);
+ if (!found)
+ return -ESRCH;
+
+ *ret_gid = PTR_TO_GID(found);
+ return 0;
+}
diff --git a/src/shared/offline-passwd.h b/src/shared/offline-passwd.h
new file mode 100644
index 0000000..587af7b
--- /dev/null
+++ b/src/shared/offline-passwd.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/types.h>
+
+#include "hashmap.h"
+
+int name_to_uid_offline(const char *root, const char *user, uid_t *ret_uid, Hashmap **cache);
+int name_to_gid_offline(const char *root, const char *group, gid_t *ret_gid, Hashmap **cache);
diff --git a/src/shared/openssl-util.h b/src/shared/openssl-util.h
new file mode 100644
index 0000000..1b49834
--- /dev/null
+++ b/src/shared/openssl-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#if HAVE_OPENSSL
+# include <openssl/pem.h>
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(X509*, X509_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(X509_NAME*, X509_NAME_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(EVP_PKEY_CTX*, EVP_PKEY_CTX_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(EVP_CIPHER_CTX*, EVP_CIPHER_CTX_free);
+
+#endif
diff --git a/src/shared/os-util.c b/src/shared/os-util.c
new file mode 100644
index 0000000..3b7e495
--- /dev/null
+++ b/src/shared/os-util.c
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "alloc-util.h"
+#include "env-file.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "os-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+int path_is_os_tree(const char *path) {
+ int r;
+
+ assert(path);
+
+ /* Does the path exist at all? If not, generate an error immediately. This is useful so that a missing root dir
+ * always results in -ENOENT, and we can properly distinguish the case where the whole root doesn't exist from
+ * the case where just the os-release file is missing. */
+ if (laccess(path, F_OK) < 0)
+ return -errno;
+
+ /* We use {/etc|/usr/lib}/os-release as flag file if something is an OS */
+ r = open_os_release(path, NULL, NULL);
+ if (r == -ENOENT) /* We got nothing */
+ return 0;
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+int open_os_release(const char *root, char **ret_path, int *ret_fd) {
+ _cleanup_free_ char *q = NULL;
+ const char *p;
+ int r, fd;
+
+ FOREACH_STRING(p, "/etc/os-release", "/usr/lib/os-release") {
+ r = chase_symlinks(p, root, CHASE_PREFIX_ROOT,
+ ret_path ? &q : NULL,
+ ret_fd ? &fd : NULL);
+ if (r != -ENOENT)
+ break;
+ }
+ if (r < 0)
+ return r;
+
+ if (ret_fd) {
+ int real_fd;
+
+ /* Convert the O_PATH fd into a proper, readable one */
+ real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ safe_close(fd);
+ if (real_fd < 0)
+ return real_fd;
+
+ *ret_fd = real_fd;
+ }
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(q);
+
+ return 0;
+}
+
+int fopen_os_release(const char *root, char **ret_path, FILE **ret_file) {
+ _cleanup_free_ char *p = NULL;
+ _cleanup_close_ int fd = -1;
+ FILE *f;
+ int r;
+
+ if (!ret_file)
+ return open_os_release(root, ret_path, NULL);
+
+ r = open_os_release(root, ret_path ? &p : NULL, &fd);
+ if (r < 0)
+ return r;
+
+ f = take_fdopen(&fd, "r");
+ if (!f)
+ return -errno;
+
+ *ret_file = f;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(p);
+
+ return 0;
+}
+
+int parse_os_release(const char *root, ...) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ va_list ap;
+ int r;
+
+ r = fopen_os_release(root, &p, &f);
+ if (r < 0)
+ return r;
+
+ va_start(ap, root);
+ r = parse_env_filev(f, p, ap);
+ va_end(ap);
+
+ return r;
+}
+
+int load_os_release_pairs(const char *root, char ***ret) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *p = NULL;
+ int r;
+
+ r = fopen_os_release(root, &p, &f);
+ if (r < 0)
+ return r;
+
+ return load_env_file_pairs(f, p, ret);
+}
+
+int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret) {
+ _cleanup_strv_free_ char **os_release_pairs = NULL, **os_release_pairs_prefixed = NULL;
+ char **p, **q;
+ int r;
+
+ r = load_os_release_pairs(root, &os_release_pairs);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH_PAIR(p, q, os_release_pairs) {
+ char *line;
+
+ /* We strictly return only the four main ID fields and ignore the rest */
+ if (!STR_IN_SET(*p, "ID", "VERSION_ID", "BUILD_ID", "VARIANT_ID"))
+ continue;
+
+ ascii_strlower(*p);
+ line = strjoin(prefix, *p, "=", *q);
+ if (!line)
+ return -ENOMEM;
+ r = strv_consume(&os_release_pairs_prefixed, line);
+ if (r < 0)
+ return r;
+ }
+
+ *ret = TAKE_PTR(os_release_pairs_prefixed);
+
+ return 0;
+}
diff --git a/src/shared/os-util.h b/src/shared/os-util.h
new file mode 100644
index 0000000..1d9b0b1
--- /dev/null
+++ b/src/shared/os-util.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+int path_is_os_tree(const char *path);
+
+int open_os_release(const char *root, char **ret_path, int *ret_fd);
+int fopen_os_release(const char *root, char **ret_path, FILE **ret_file);
+
+int parse_os_release(const char *root, ...) _sentinel_;
+int load_os_release_pairs(const char *root, char ***ret);
+int load_os_release_pairs_with_prefix(const char *root, const char *prefix, char ***ret);
diff --git a/src/shared/output-mode.c b/src/shared/output-mode.c
new file mode 100644
index 0000000..1645b75
--- /dev/null
+++ b/src/shared/output-mode.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "output-mode.h"
+#include "string-table.h"
+
+JsonFormatFlags output_mode_to_json_format_flags(OutputMode m) {
+
+ switch (m) {
+
+ case OUTPUT_JSON_SSE:
+ return JSON_FORMAT_SSE;
+
+ case OUTPUT_JSON_SEQ:
+ return JSON_FORMAT_SEQ;
+
+ case OUTPUT_JSON_PRETTY:
+ return JSON_FORMAT_PRETTY;
+
+ default:
+ return JSON_FORMAT_NEWLINE;
+ }
+}
+
+static const char *const output_mode_table[_OUTPUT_MODE_MAX] = {
+ [OUTPUT_SHORT] = "short",
+ [OUTPUT_SHORT_FULL] = "short-full",
+ [OUTPUT_SHORT_ISO] = "short-iso",
+ [OUTPUT_SHORT_ISO_PRECISE] = "short-iso-precise",
+ [OUTPUT_SHORT_PRECISE] = "short-precise",
+ [OUTPUT_SHORT_MONOTONIC] = "short-monotonic",
+ [OUTPUT_SHORT_UNIX] = "short-unix",
+ [OUTPUT_VERBOSE] = "verbose",
+ [OUTPUT_EXPORT] = "export",
+ [OUTPUT_JSON] = "json",
+ [OUTPUT_JSON_PRETTY] = "json-pretty",
+ [OUTPUT_JSON_SSE] = "json-sse",
+ [OUTPUT_JSON_SEQ] = "json-seq",
+ [OUTPUT_CAT] = "cat",
+ [OUTPUT_WITH_UNIT] = "with-unit",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(output_mode, OutputMode);
diff --git a/src/shared/output-mode.h b/src/shared/output-mode.h
new file mode 100644
index 0000000..a879054
--- /dev/null
+++ b/src/shared/output-mode.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "json.h"
+#include "macro.h"
+
+typedef enum OutputMode {
+ OUTPUT_SHORT,
+ OUTPUT_SHORT_FULL,
+ OUTPUT_SHORT_ISO,
+ OUTPUT_SHORT_ISO_PRECISE,
+ OUTPUT_SHORT_PRECISE,
+ OUTPUT_SHORT_MONOTONIC,
+ OUTPUT_SHORT_UNIX,
+ OUTPUT_VERBOSE,
+ OUTPUT_EXPORT,
+ OUTPUT_JSON,
+ OUTPUT_JSON_PRETTY,
+ OUTPUT_JSON_SSE,
+ OUTPUT_JSON_SEQ,
+ OUTPUT_CAT,
+ OUTPUT_WITH_UNIT,
+ _OUTPUT_MODE_MAX,
+ _OUTPUT_MODE_INVALID = -1
+} OutputMode;
+
+static inline bool OUTPUT_MODE_IS_JSON(OutputMode m) {
+ return IN_SET(m, OUTPUT_JSON, OUTPUT_JSON_PRETTY, OUTPUT_JSON_SSE, OUTPUT_JSON_SEQ);
+}
+
+/* The output flags definitions are shared by the logs and process tree output. Some apply to both, some only to the
+ * logs output, others only to the process tree output. */
+
+typedef enum OutputFlags {
+ OUTPUT_SHOW_ALL = 1 << 0,
+ OUTPUT_WARN_CUTOFF = 1 << 1,
+ OUTPUT_FULL_WIDTH = 1 << 2,
+ OUTPUT_COLOR = 1 << 3,
+ OUTPUT_CATALOG = 1 << 4,
+ OUTPUT_BEGIN_NEWLINE = 1 << 5,
+ OUTPUT_UTC = 1 << 6,
+ OUTPUT_KERNEL_THREADS = 1 << 7,
+ OUTPUT_NO_HOSTNAME = 1 << 8,
+} OutputFlags;
+
+JsonFormatFlags output_mode_to_json_format_flags(OutputMode m);
+
+const char* output_mode_to_string(OutputMode m) _const_;
+OutputMode output_mode_from_string(const char *s) _pure_;
diff --git a/src/shared/pager.c b/src/shared/pager.c
new file mode 100644
index 0000000..f689d9f
--- /dev/null
+++ b/src/shared/pager.c
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "sd-login.h"
+
+#include "copy.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "io-util.h"
+#include "locale-util.h"
+#include "log.h"
+#include "macro.h"
+#include "pager.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "signal-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static pid_t pager_pid = 0;
+
+static int stored_stdout = -1;
+static int stored_stderr = -1;
+static bool stdout_redirected = false;
+static bool stderr_redirected = false;
+
+_noreturn_ static void pager_fallback(void) {
+ int r;
+
+ r = copy_bytes(STDIN_FILENO, STDOUT_FILENO, (uint64_t) -1, 0);
+ if (r < 0) {
+ log_error_errno(r, "Internal pager failed: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+}
+
+static int no_quit_on_interrupt(int exe_name_fd, const char *less_opts) {
+ _cleanup_fclose_ FILE *file = NULL;
+ _cleanup_free_ char *line = NULL;
+ int r;
+
+ assert(exe_name_fd >= 0);
+ assert(less_opts);
+
+ /* This takes ownership of exe_name_fd */
+ file = fdopen(exe_name_fd, "r");
+ if (!file) {
+ safe_close(exe_name_fd);
+ return log_error_errno(errno, "Failed to create FILE object: %m");
+ }
+
+ /* Find the last line */
+ for (;;) {
+ _cleanup_free_ char *t = NULL;
+
+ r = read_line(file, LONG_LINE_MAX, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read from socket: %m");
+ if (r == 0)
+ break;
+
+ free_and_replace(line, t);
+ }
+
+ /* We only treat "less" specially.
+ * Return true whenever option K is *not* set. */
+ r = streq_ptr(line, "less") && !strchr(less_opts, 'K');
+
+ log_debug("Pager executable is \"%s\", options \"%s\", quit_on_interrupt: %s",
+ strnull(line), less_opts, yes_no(!r));
+ return r;
+}
+
+int pager_open(PagerFlags flags) {
+ _cleanup_close_pair_ int fd[2] = { -1, -1 }, exe_name_pipe[2] = { -1, -1 };
+ _cleanup_strv_free_ char **pager_args = NULL;
+ const char *pager, *less_opts;
+ int r;
+
+ if (flags & PAGER_DISABLE)
+ return 0;
+
+ if (pager_pid > 0)
+ return 1;
+
+ if (terminal_is_dumb())
+ return 0;
+
+ if (!is_main_thread())
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Pager invoked from wrong thread.");
+
+ pager = getenv("SYSTEMD_PAGER");
+ if (!pager)
+ pager = getenv("PAGER");
+
+ if (pager) {
+ pager_args = strv_split(pager, WHITESPACE);
+ if (!pager_args)
+ return log_oom();
+
+ /* If the pager is explicitly turned off, honour it */
+ if (strv_isempty(pager_args) || strv_equal(pager_args, STRV_MAKE("cat")))
+ return 0;
+ }
+
+ /* Determine and cache number of columns/lines before we spawn the pager so that we get the value from the
+ * actual tty */
+ (void) columns();
+ (void) lines();
+
+ if (pipe2(fd, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create pager pipe: %m");
+
+ /* This is a pipe to feed the name of the executed pager binary into the parent */
+ if (pipe2(exe_name_pipe, O_CLOEXEC) < 0)
+ return log_error_errno(errno, "Failed to create exe_name pipe: %m");
+
+ /* Initialize a good set of less options */
+ less_opts = getenv("SYSTEMD_LESS");
+ if (!less_opts)
+ less_opts = "FRSXMK";
+ if (flags & PAGER_JUMP_TO_END)
+ less_opts = strjoina(less_opts, " +G");
+
+ /* We set SIGINT as PR_DEATHSIG signal here, to match the "K" parameter we set in $LESS, which enables SIGINT behaviour. */
+ r = safe_fork("(pager)", FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGINT|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pager_pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ const char *less_charset, *exe;
+
+ /* In the child start the pager */
+
+ if (dup2(fd[0], STDIN_FILENO) < 0) {
+ log_error_errno(errno, "Failed to duplicate file descriptor to STDIN: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ safe_close_pair(fd);
+
+ if (setenv("LESS", less_opts, 1) < 0) {
+ log_error_errno(errno, "Failed to set environment variable LESS: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* Initialize a good charset for less. This is particularly important if we output UTF-8
+ * characters. */
+ less_charset = getenv("SYSTEMD_LESSCHARSET");
+ if (!less_charset && is_locale_utf8())
+ less_charset = "utf-8";
+ if (less_charset &&
+ setenv("LESSCHARSET", less_charset, 1) < 0) {
+ log_error_errno(errno, "Failed to set environment variable LESSCHARSET: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* People might invoke us from sudo, don't needlessly allow less to be a way to shell out
+ * privileged stuff. If the user set $SYSTEMD_PAGERSECURE, trust their configuration of the
+ * pager. If they didn't, use secure mode when under euid is changed. If $SYSTEMD_PAGERSECURE
+ * wasn't explicitly set, and we autodetect the need for secure mode, only use the pager we
+ * know to be good. */
+ int use_secure_mode = getenv_bool_secure("SYSTEMD_PAGERSECURE");
+ bool trust_pager = use_secure_mode >= 0;
+ if (use_secure_mode == -ENXIO) {
+ uid_t uid;
+
+ r = sd_pid_get_owner_uid(0, &uid);
+ if (r < 0)
+ log_debug_errno(r, "sd_pid_get_owner_uid() failed, enabling pager secure mode: %m");
+
+ use_secure_mode = r < 0 || uid != geteuid();
+
+ } else if (use_secure_mode < 0) {
+ log_warning_errno(use_secure_mode, "Unable to parse $SYSTEMD_PAGERSECURE, assuming true: %m");
+ use_secure_mode = true;
+ }
+
+ /* We generally always set variables used by less, even if we end up using a different pager.
+ * They shouldn't hurt in any case, and ideally other pagers would look at them too. */
+ r = set_unset_env("LESSSECURE", use_secure_mode ? "1" : NULL, true);
+ if (r < 0) {
+ log_error_errno(r, "Failed to adjust environment variable LESSSECURE: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (trust_pager && pager_args) { /* The pager config might be set globally, and we cannot
+ * know if the user adjusted it to be appropriate for the
+ * secure mode. Thus, start the pager specified through
+ * envvars only when $SYSTEMD_PAGERSECURE was explicitly set
+ * as well. */
+ r = loop_write(exe_name_pipe[1], pager_args[0], strlen(pager_args[0]) + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ execvp(pager_args[0], pager_args);
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to execute '%s', using fallback pagers: %m", pager_args[0]);
+ }
+
+ /* Debian's alternatives command for pagers is called 'pager'. Note that we do not call
+ * sensible-pagers here, since that is just a shell script that implements a logic that is
+ * similar to this one anyway, but is Debian-specific. */
+ FOREACH_STRING(exe, "pager", "less", "more") {
+ /* Only less implements secure mode right now. */
+ if (use_secure_mode && !streq(exe, "less"))
+ continue;
+
+ r = loop_write(exe_name_pipe[1], exe, strlen(exe) + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+ execlp(exe, exe, NULL);
+ log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to execute '%s', using next fallback pager: %m", exe);
+ }
+
+ /* Our builtin is also very secure. */
+ r = loop_write(exe_name_pipe[1], "(built-in)", strlen("(built-in)") + 1, false);
+ if (r < 0) {
+ log_error_errno(r, "Failed to write pager name to socket: %m");
+ _exit(EXIT_FAILURE);
+ }
+ /* Close pipe to signal the parent to start sending data */
+ safe_close_pair(exe_name_pipe);
+ pager_fallback();
+ /* not reached */
+ }
+
+ /* Return in the parent */
+ stored_stdout = fcntl(STDOUT_FILENO, F_DUPFD_CLOEXEC, 3);
+ if (dup2(fd[1], STDOUT_FILENO) < 0) {
+ stored_stdout = safe_close(stored_stdout);
+ return log_error_errno(errno, "Failed to duplicate pager pipe: %m");
+ }
+ stdout_redirected = true;
+
+ stored_stderr = fcntl(STDERR_FILENO, F_DUPFD_CLOEXEC, 3);
+ if (dup2(fd[1], STDERR_FILENO) < 0) {
+ stored_stderr = safe_close(stored_stderr);
+ return log_error_errno(errno, "Failed to duplicate pager pipe: %m");
+ }
+ stderr_redirected = true;
+
+ exe_name_pipe[1] = safe_close(exe_name_pipe[1]);
+
+ r = no_quit_on_interrupt(TAKE_FD(exe_name_pipe[0]), less_opts);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ (void) ignore_signals(SIGINT, -1);
+
+ return 1;
+}
+
+void pager_close(void) {
+
+ if (pager_pid <= 0)
+ return;
+
+ /* Inform pager that we are done */
+ (void) fflush(stdout);
+ if (stdout_redirected)
+ if (stored_stdout < 0 || dup2(stored_stdout, STDOUT_FILENO) < 0)
+ (void) close(STDOUT_FILENO);
+ stored_stdout = safe_close(stored_stdout);
+ (void) fflush(stderr);
+ if (stderr_redirected)
+ if (stored_stderr < 0 || dup2(stored_stderr, STDERR_FILENO) < 0)
+ (void) close(STDERR_FILENO);
+ stored_stderr = safe_close(stored_stderr);
+ stdout_redirected = stderr_redirected = false;
+
+ (void) kill(pager_pid, SIGCONT);
+ (void) wait_for_terminate(pager_pid, NULL);
+ pager_pid = 0;
+}
+
+bool pager_have(void) {
+ return pager_pid > 0;
+}
+
+int show_man_page(const char *desc, bool null_stdio) {
+ const char *args[4] = { "man", NULL, NULL, NULL };
+ char *e = NULL;
+ pid_t pid;
+ size_t k;
+ int r;
+
+ k = strlen(desc);
+
+ if (desc[k-1] == ')')
+ e = strrchr(desc, '(');
+
+ if (e) {
+ char *page = NULL, *section = NULL;
+
+ page = strndupa(desc, e - desc);
+ section = strndupa(e + 1, desc + k - e - 2);
+
+ args[1] = section;
+ args[2] = page;
+ } else
+ args[1] = desc;
+
+ r = safe_fork("(man)", FORK_RESET_SIGNALS|FORK_DEATHSIG|(null_stdio ? FORK_NULL_STDIO : 0)|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* Child */
+ execvp(args[0], (char**) args);
+ log_error_errno(errno, "Failed to execute man: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ return wait_for_terminate_and_check(NULL, pid, 0);
+}
diff --git a/src/shared/pager.h b/src/shared/pager.h
new file mode 100644
index 0000000..b3b1b4f
--- /dev/null
+++ b/src/shared/pager.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+typedef enum PagerFlags {
+ PAGER_DISABLE = 1 << 0,
+ PAGER_JUMP_TO_END = 1 << 1,
+} PagerFlags;
+
+int pager_open(PagerFlags flags);
+void pager_close(void);
+bool pager_have(void) _pure_;
+
+int show_man_page(const char *page, bool null_stdio);
diff --git a/src/shared/pam-util.c b/src/shared/pam-util.c
new file mode 100644
index 0000000..621e7fe
--- /dev/null
+++ b/src/shared/pam-util.c
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <security/pam_ext.h>
+#include <syslog.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "macro.h"
+#include "pam-util.h"
+
+int pam_log_oom(pam_handle_t *handle) {
+ /* This is like log_oom(), but uses PAM logging */
+ pam_syslog(handle, LOG_ERR, "Out of memory.");
+ return PAM_BUF_ERR;
+}
+
+int pam_bus_log_create_error(pam_handle_t *handle, int r) {
+ /* This is like bus_log_create_error(), but uses PAM logging */
+ pam_syslog(handle, LOG_ERR, "Failed to create bus message: %s", strerror_safe(r));
+ return PAM_BUF_ERR;
+}
+
+int pam_bus_log_parse_error(pam_handle_t *handle, int r) {
+ /* This is like bus_log_parse_error(), but uses PAM logging */
+ pam_syslog(handle, LOG_ERR, "Failed to parse bus message: %s", strerror_safe(r));
+ return PAM_BUF_ERR;
+}
+
+static void cleanup_system_bus(pam_handle_t *handle, void *data, int error_status) {
+ sd_bus_flush_close_unref(data);
+}
+
+int pam_acquire_bus_connection(pam_handle_t *handle, sd_bus **ret) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ int r;
+
+ assert(handle);
+ assert(ret);
+
+ /* We cache the bus connection so that we can share it between the session and the authentication hooks */
+ r = pam_get_data(handle, "systemd-system-bus", (const void**) &bus);
+ if (r == PAM_SUCCESS && bus) {
+ *ret = sd_bus_ref(TAKE_PTR(bus)); /* Increase the reference counter, so that the PAM data stays valid */
+ return PAM_SUCCESS;
+ }
+ if (!IN_SET(r, PAM_SUCCESS, PAM_NO_MODULE_DATA)) {
+ pam_syslog(handle, LOG_ERR, "Failed to get bus connection: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ r = sd_bus_open_system(&bus);
+ if (r < 0) {
+ pam_syslog(handle, LOG_ERR, "Failed to connect to system bus: %s", strerror_safe(r));
+ return PAM_SERVICE_ERR;
+ }
+
+ r = pam_set_data(handle, "systemd-system-bus", bus, cleanup_system_bus);
+ if (r != PAM_SUCCESS) {
+ pam_syslog(handle, LOG_ERR, "Failed to set PAM bus data: %s", pam_strerror(handle, r));
+ return r;
+ }
+
+ sd_bus_ref(bus);
+ *ret = TAKE_PTR(bus);
+
+ return PAM_SUCCESS;
+}
+
+int pam_release_bus_connection(pam_handle_t *handle) {
+ int r;
+
+ r = pam_set_data(handle, "systemd-system-bus", NULL, NULL);
+ if (r != PAM_SUCCESS)
+ pam_syslog(handle, LOG_ERR, "Failed to release PAM user record data: %s", pam_strerror(handle, r));
+
+ return r;
+}
+
+void pam_cleanup_free(pam_handle_t *handle, void *data, int error_status) {
+ /* A generic destructor for pam_set_data() that just frees the specified data */
+ free(data);
+}
diff --git a/src/shared/pam-util.h b/src/shared/pam-util.h
new file mode 100644
index 0000000..41f1835
--- /dev/null
+++ b/src/shared/pam-util.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <security/pam_modules.h>
+
+#include "sd-bus.h"
+
+int pam_log_oom(pam_handle_t *handle);
+int pam_bus_log_create_error(pam_handle_t *handle, int r);
+int pam_bus_log_parse_error(pam_handle_t *handle, int r);
+
+int pam_acquire_bus_connection(pam_handle_t *handle, sd_bus **ret);
+int pam_release_bus_connection(pam_handle_t *handle);
+
+void pam_cleanup_free(pam_handle_t *handle, void *data, int error_status);
diff --git a/src/shared/pe-header.h b/src/shared/pe-header.h
new file mode 100644
index 0000000..54433c7
--- /dev/null
+++ b/src/shared/pe-header.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <inttypes.h>
+
+#include "macro.h"
+#include "sparse-endian.h"
+
+struct DosFileHeader {
+ uint8_t Magic[2];
+ le16_t LastSize;
+ le16_t nBlocks;
+ le16_t nReloc;
+ le16_t HdrSize;
+ le16_t MinAlloc;
+ le16_t MaxAlloc;
+ le16_t ss;
+ le16_t sp;
+ le16_t Checksum;
+ le16_t ip;
+ le16_t cs;
+ le16_t RelocPos;
+ le16_t nOverlay;
+ le16_t reserved[4];
+ le16_t OEMId;
+ le16_t OEMInfo;
+ le16_t reserved2[10];
+ le32_t ExeHeader;
+} _packed_;
+
+#define PE_HEADER_MACHINE_I386 0x014cU
+#define PE_HEADER_MACHINE_X64 0x8664U
+
+struct PeFileHeader {
+ le16_t Machine;
+ le16_t NumberOfSections;
+ le32_t TimeDateStamp;
+ le32_t PointerToSymbolTable;
+ le32_t NumberOfSymbols;
+ le16_t SizeOfOptionalHeader;
+ le16_t Characteristics;
+} _packed_;
+
+struct PeHeader {
+ uint8_t Magic[4];
+ struct PeFileHeader FileHeader;
+} _packed_;
+
+struct PeSectionHeader {
+ uint8_t Name[8];
+ le32_t VirtualSize;
+ le32_t VirtualAddress;
+ le32_t SizeOfRawData;
+ le32_t PointerToRawData;
+ le32_t PointerToRelocations;
+ le32_t PointerToLinenumbers;
+ le16_t NumberOfRelocations;
+ le16_t NumberOfLinenumbers;
+ le32_t Characteristics;
+ } _packed_;
diff --git a/src/shared/pkcs11-util.c b/src/shared/pkcs11-util.c
new file mode 100644
index 0000000..e74f0be
--- /dev/null
+++ b/src/shared/pkcs11-util.c
@@ -0,0 +1,932 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+
+#include "ask-password-api.h"
+#include "escape.h"
+#include "fd-util.h"
+#include "io-util.h"
+#include "memory-util.h"
+#if HAVE_OPENSSL
+#include "openssl-util.h"
+#endif
+#include "pkcs11-util.h"
+#include "random-util.h"
+#include "string-util.h"
+#include "strv.h"
+
+bool pkcs11_uri_valid(const char *uri) {
+ const char *p;
+
+ /* A very superficial checker for RFC7512 PKCS#11 URI syntax */
+
+ if (isempty(uri))
+ return false;
+
+ p = startswith(uri, "pkcs11:");
+ if (!p)
+ return false;
+
+ if (isempty(p))
+ return false;
+
+ if (!in_charset(p, ALPHANUMERICAL "-_?;&%="))
+ return false;
+
+ return true;
+}
+
+#if HAVE_P11KIT
+
+int uri_from_string(const char *p, P11KitUri **ret) {
+ _cleanup_(p11_kit_uri_freep) P11KitUri *uri = NULL;
+
+ assert(p);
+ assert(ret);
+
+ uri = p11_kit_uri_new();
+ if (!uri)
+ return -ENOMEM;
+
+ if (p11_kit_uri_parse(p, P11_KIT_URI_FOR_ANY, uri) != P11_KIT_URI_OK)
+ return -EINVAL;
+
+ *ret = TAKE_PTR(uri);
+ return 0;
+}
+
+P11KitUri *uri_from_module_info(const CK_INFO *info) {
+ P11KitUri *uri;
+
+ assert(info);
+
+ uri = p11_kit_uri_new();
+ if (!uri)
+ return NULL;
+
+ *p11_kit_uri_get_module_info(uri) = *info;
+ return uri;
+}
+
+P11KitUri *uri_from_slot_info(const CK_SLOT_INFO *slot_info) {
+ P11KitUri *uri;
+
+ assert(slot_info);
+
+ uri = p11_kit_uri_new();
+ if (!uri)
+ return NULL;
+
+ *p11_kit_uri_get_slot_info(uri) = *slot_info;
+ return uri;
+}
+
+P11KitUri *uri_from_token_info(const CK_TOKEN_INFO *token_info) {
+ P11KitUri *uri;
+
+ assert(token_info);
+
+ uri = p11_kit_uri_new();
+ if (!uri)
+ return NULL;
+
+ *p11_kit_uri_get_token_info(uri) = *token_info;
+ return uri;
+}
+
+CK_RV pkcs11_get_slot_list_malloc(
+ CK_FUNCTION_LIST *m,
+ CK_SLOT_ID **ret_slotids,
+ CK_ULONG *ret_n_slotids) {
+
+ CK_RV rv;
+
+ assert(m);
+ assert(ret_slotids);
+ assert(ret_n_slotids);
+
+ for (unsigned tries = 0; tries < 16; tries++) {
+ _cleanup_free_ CK_SLOT_ID *slotids = NULL;
+ CK_ULONG n_slotids = 0;
+
+ rv = m->C_GetSlotList(0, NULL, &n_slotids);
+ if (rv != CKR_OK)
+ return rv;
+ if (n_slotids == 0) {
+ *ret_slotids = NULL;
+ *ret_n_slotids = 0;
+ return CKR_OK;
+ }
+
+ slotids = new(CK_SLOT_ID, n_slotids);
+ if (!slotids)
+ return CKR_HOST_MEMORY;
+
+ rv = m->C_GetSlotList(0, slotids, &n_slotids);
+ if (rv == CKR_OK) {
+ *ret_slotids = TAKE_PTR(slotids);
+ *ret_n_slotids = n_slotids;
+ return CKR_OK;
+ }
+
+ if (rv != CKR_BUFFER_TOO_SMALL)
+ return rv;
+
+ /* Hu? Maybe somebody plugged something in and things changed? Let's try again */
+ }
+
+ return CKR_BUFFER_TOO_SMALL;
+}
+
+char *pkcs11_token_label(const CK_TOKEN_INFO *token_info) {
+ char *t;
+
+ /* The label is not NUL terminated and likely padded with spaces, let's make a copy here, so that we
+ * can strip that. */
+ t = strndup((char*) token_info->label, sizeof(token_info->label));
+ if (!t)
+ return NULL;
+
+ strstrip(t);
+ return t;
+}
+
+char *pkcs11_token_manufacturer_id(const CK_TOKEN_INFO *token_info) {
+ char *t;
+
+ t = strndup((char*) token_info->manufacturerID, sizeof(token_info->manufacturerID));
+ if (!t)
+ return NULL;
+
+ strstrip(t);
+ return t;
+}
+
+char *pkcs11_token_model(const CK_TOKEN_INFO *token_info) {
+ char *t;
+
+ t = strndup((char*) token_info->model, sizeof(token_info->model));
+ if (!t)
+ return NULL;
+
+ strstrip(t);
+ return t;
+}
+
+int pkcs11_token_login(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ CK_SLOT_ID slotid,
+ const CK_TOKEN_INFO *token_info,
+ const char *friendly_name,
+ const char *icon_name,
+ const char *keyname,
+ usec_t until,
+ char **ret_used_pin) {
+
+ _cleanup_free_ char *token_uri_string = NULL, *token_uri_escaped = NULL, *id = NULL, *token_label = NULL;
+ _cleanup_(p11_kit_uri_freep) P11KitUri *token_uri = NULL;
+ CK_TOKEN_INFO updated_token_info;
+ int uri_result, r;
+ CK_RV rv;
+
+ assert(m);
+ assert(token_info);
+
+ token_label = pkcs11_token_label(token_info);
+ if (!token_label)
+ return log_oom();
+
+ token_uri = uri_from_token_info(token_info);
+ if (!token_uri)
+ return log_oom();
+
+ uri_result = p11_kit_uri_format(token_uri, P11_KIT_URI_FOR_ANY, &token_uri_string);
+ if (uri_result != P11_KIT_URI_OK)
+ return log_warning_errno(SYNTHETIC_ERRNO(EAGAIN), "Failed to format slot URI: %s", p11_kit_uri_message(uri_result));
+
+ if (FLAGS_SET(token_info->flags, CKF_PROTECTED_AUTHENTICATION_PATH)) {
+ rv = m->C_Login(session, CKU_USER, NULL, 0);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to log into security token '%s': %s", token_label, p11_kit_strerror(rv));
+
+ log_info("Successfully logged into security token '%s' via protected authentication path.", token_label);
+ if (ret_used_pin)
+ *ret_used_pin = NULL;
+ return 0;
+ }
+
+ if (!FLAGS_SET(token_info->flags, CKF_LOGIN_REQUIRED)) {
+ log_info("No login into security token '%s' required.", token_label);
+ if (ret_used_pin)
+ *ret_used_pin = NULL;
+ return 0;
+ }
+
+ token_uri_escaped = cescape(token_uri_string);
+ if (!token_uri_escaped)
+ return log_oom();
+
+ id = strjoin("pkcs11:", token_uri_escaped);
+ if (!id)
+ return log_oom();
+
+ for (unsigned tries = 0; tries < 3; tries++) {
+ _cleanup_strv_free_erase_ char **passwords = NULL;
+ char **i, *e;
+
+ e = getenv("PIN");
+ if (e) {
+ passwords = strv_new(e);
+ if (!passwords)
+ return log_oom();
+
+ string_erase(e);
+ if (unsetenv("PIN") < 0)
+ return log_error_errno(errno, "Failed to unset $PIN: %m");
+ } else {
+ _cleanup_free_ char *text = NULL;
+
+ if (FLAGS_SET(token_info->flags, CKF_USER_PIN_FINAL_TRY))
+ r = asprintf(&text,
+ "Please enter correct PIN for security token '%s' in order to unlock %s (final try):",
+ token_label, friendly_name);
+ else if (FLAGS_SET(token_info->flags, CKF_USER_PIN_COUNT_LOW))
+ r = asprintf(&text,
+ "PIN has been entered incorrectly previously, please enter correct PIN for security token '%s' in order to unlock %s:",
+ token_label, friendly_name);
+ else if (tries == 0)
+ r = asprintf(&text,
+ "Please enter PIN for security token '%s' in order to unlock %s:",
+ token_label, friendly_name);
+ else
+ r = asprintf(&text,
+ "Please enter PIN for security token '%s' in order to unlock %s (try #%u):",
+ token_label, friendly_name, tries+1);
+ if (r < 0)
+ return log_oom();
+
+ /* We never cache PINs, simply because it's fatal if we use wrong PINs, since usually there are only 3 tries */
+ r = ask_password_auto(text, icon_name, id, keyname, until, 0, &passwords);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query PIN for security token '%s': %m", token_label);
+ }
+
+ STRV_FOREACH(i, passwords) {
+ rv = m->C_Login(session, CKU_USER, (CK_UTF8CHAR*) *i, strlen(*i));
+ if (rv == CKR_OK) {
+
+ if (ret_used_pin) {
+ char *c;
+
+ c = strdup(*i);
+ if (!c)
+ return log_oom();
+
+ *ret_used_pin = c;
+ }
+
+ log_info("Successfully logged into security token '%s'.", token_label);
+ return 0;
+ }
+ if (rv == CKR_PIN_LOCKED)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "PIN has been locked, please reset PIN of security token '%s'.", token_label);
+ if (!IN_SET(rv, CKR_PIN_INCORRECT, CKR_PIN_LEN_RANGE))
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to log into security token '%s': %s", token_label, p11_kit_strerror(rv));
+
+ /* Referesh the token info, so that we can prompt knowing the new flags if they changed. */
+ rv = m->C_GetTokenInfo(slotid, &updated_token_info);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to acquire updated security token information for slot %lu: %s",
+ slotid, p11_kit_strerror(rv));
+
+ token_info = &updated_token_info;
+ log_notice("PIN for token '%s' is incorrect, please try again.", token_label);
+ }
+ }
+
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Too many attempts to log into token '%s'.", token_label);
+}
+
+int pkcs11_token_find_x509_certificate(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ P11KitUri *search_uri,
+ CK_OBJECT_HANDLE *ret_object) {
+
+ bool found_class = false, found_certificate_type = false;
+ _cleanup_free_ CK_ATTRIBUTE *attributes_buffer = NULL;
+ CK_ULONG n_attributes, a, n_objects;
+ CK_ATTRIBUTE *attributes = NULL;
+ CK_OBJECT_HANDLE objects[2];
+ CK_RV rv, rv2;
+
+ assert(m);
+ assert(search_uri);
+ assert(ret_object);
+
+ attributes = p11_kit_uri_get_attributes(search_uri, &n_attributes);
+ for (a = 0; a < n_attributes; a++) {
+
+ /* We use the URI's included match attributes, but make them more strict. This allows users
+ * to specify a token URL instead of an object URL and the right thing should happen if
+ * there's only one suitable key on the token. */
+
+ switch (attributes[a].type) {
+
+ case CKA_CLASS: {
+ CK_OBJECT_CLASS c;
+
+ if (attributes[a].ulValueLen != sizeof(c))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_CLASS attribute size.");
+
+ memcpy(&c, attributes[a].pValue, sizeof(c));
+ if (c != CKO_CERTIFICATE)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Selected PKCS#11 object is not an X.509 certificate, refusing.");
+
+ found_class = true;
+ break;
+ }
+
+ case CKA_CERTIFICATE_TYPE: {
+ CK_CERTIFICATE_TYPE t;
+
+ if (attributes[a].ulValueLen != sizeof(t))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_CERTIFICATE_TYPE attribute size.");
+
+ memcpy(&t, attributes[a].pValue, sizeof(t));
+ if (t != CKC_X_509)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Selected PKCS#11 object is not an X.509 certificate, refusing.");
+
+ found_certificate_type = true;
+ break;
+ }}
+ }
+
+ if (!found_class || !found_certificate_type) {
+ /* Hmm, let's slightly extend the attribute list we search for */
+
+ attributes_buffer = new(CK_ATTRIBUTE, n_attributes + !found_class + !found_certificate_type);
+ if (!attributes_buffer)
+ return log_oom();
+
+ memcpy(attributes_buffer, attributes, sizeof(CK_ATTRIBUTE) * n_attributes);
+
+ if (!found_class) {
+ static const CK_OBJECT_CLASS class = CKO_CERTIFICATE;
+
+ attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) {
+ .type = CKA_CLASS,
+ .pValue = (CK_OBJECT_CLASS*) &class,
+ .ulValueLen = sizeof(class),
+ };
+ }
+
+ if (!found_certificate_type) {
+ static const CK_CERTIFICATE_TYPE type = CKC_X_509;
+
+ attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) {
+ .type = CKA_CERTIFICATE_TYPE,
+ .pValue = (CK_CERTIFICATE_TYPE*) &type,
+ .ulValueLen = sizeof(type),
+ };
+ }
+
+ attributes = attributes_buffer;
+ }
+
+ rv = m->C_FindObjectsInit(session, attributes, n_attributes);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to initialize object find call: %s", p11_kit_strerror(rv));
+
+ rv = m->C_FindObjects(session, objects, ELEMENTSOF(objects), &n_objects);
+ rv2 = m->C_FindObjectsFinal(session);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to find objects: %s", p11_kit_strerror(rv));
+ if (rv2 != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to finalize object find call: %s", p11_kit_strerror(rv));
+ if (n_objects == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Failed to find selected X509 certificate on token.");
+ if (n_objects > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ),
+ "Configured URI matches multiple certificates, refusing.");
+
+ *ret_object = objects[0];
+ return 0;
+}
+
+#if HAVE_OPENSSL
+int pkcs11_token_read_x509_certificate(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ CK_OBJECT_HANDLE object,
+ X509 **ret_cert) {
+
+ _cleanup_free_ void *buffer = NULL;
+ _cleanup_free_ char *t = NULL;
+ CK_ATTRIBUTE attribute = {
+ .type = CKA_VALUE
+ };
+ CK_RV rv;
+ _cleanup_(X509_freep) X509 *x509 = NULL;
+ X509_NAME *name = NULL;
+ const unsigned char *p;
+
+ rv = m->C_GetAttributeValue(session, object, &attribute, 1);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to read X.509 certificate size off token: %s", p11_kit_strerror(rv));
+
+ buffer = malloc(attribute.ulValueLen);
+ if (!buffer)
+ return log_oom();
+
+ attribute.pValue = buffer;
+
+ rv = m->C_GetAttributeValue(session, object, &attribute, 1);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to read X.509 certificate data off token: %s", p11_kit_strerror(rv));
+
+ p = attribute.pValue;
+ x509 = d2i_X509(NULL, &p, attribute.ulValueLen);
+ if (!x509)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed parse X.509 certificate.");
+
+ name = X509_get_subject_name(x509);
+ if (!name)
+ return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed to acquire X.509 subject name.");
+
+ t = X509_NAME_oneline(name, NULL, 0);
+ if (!t)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to format X.509 subject name as string.");
+
+ log_debug("Using X.509 certificate issued for '%s'.", t);
+
+ *ret_cert = TAKE_PTR(x509);
+ return 0;
+}
+#endif
+
+int pkcs11_token_find_private_key(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ P11KitUri *search_uri,
+ CK_OBJECT_HANDLE *ret_object) {
+
+ bool found_decrypt = false, found_class = false, found_key_type = false;
+ _cleanup_free_ CK_ATTRIBUTE *attributes_buffer = NULL;
+ CK_ULONG n_attributes, a, n_objects;
+ CK_ATTRIBUTE *attributes = NULL;
+ CK_OBJECT_HANDLE objects[2];
+ CK_RV rv, rv2;
+
+ assert(m);
+ assert(search_uri);
+ assert(ret_object);
+
+ attributes = p11_kit_uri_get_attributes(search_uri, &n_attributes);
+ for (a = 0; a < n_attributes; a++) {
+
+ /* We use the URI's included match attributes, but make them more strict. This allows users
+ * to specify a token URL instead of an object URL and the right thing should happen if
+ * there's only one suitable key on the token. */
+
+ switch (attributes[a].type) {
+
+ case CKA_CLASS: {
+ CK_OBJECT_CLASS c;
+
+ if (attributes[a].ulValueLen != sizeof(c))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_CLASS attribute size.");
+
+ memcpy(&c, attributes[a].pValue, sizeof(c));
+ if (c != CKO_PRIVATE_KEY)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Selected PKCS#11 object is not a private key, refusing.");
+
+ found_class = true;
+ break;
+ }
+
+ case CKA_DECRYPT: {
+ CK_BBOOL b;
+
+ if (attributes[a].ulValueLen != sizeof(b))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_DECRYPT attribute size.");
+
+ memcpy(&b, attributes[a].pValue, sizeof(b));
+ if (!b)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Selected PKCS#11 object is not suitable for decryption, refusing.");
+
+ found_decrypt = true;
+ break;
+ }
+
+ case CKA_KEY_TYPE: {
+ CK_KEY_TYPE t;
+
+ if (attributes[a].ulValueLen != sizeof(t))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid PKCS#11 CKA_KEY_TYPE attribute size.");
+
+ memcpy(&t, attributes[a].pValue, sizeof(t));
+ if (t != CKK_RSA)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Selected PKCS#11 object is not an RSA key, refusing.");
+
+ found_key_type = true;
+ break;
+ }}
+ }
+
+ if (!found_decrypt || !found_class || !found_key_type) {
+ /* Hmm, let's slightly extend the attribute list we search for */
+
+ attributes_buffer = new(CK_ATTRIBUTE, n_attributes + !found_decrypt + !found_class + !found_key_type);
+ if (!attributes_buffer)
+ return log_oom();
+
+ memcpy(attributes_buffer, attributes, sizeof(CK_ATTRIBUTE) * n_attributes);
+
+ if (!found_decrypt) {
+ static const CK_BBOOL yes = true;
+
+ attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) {
+ .type = CKA_DECRYPT,
+ .pValue = (CK_BBOOL*) &yes,
+ .ulValueLen = sizeof(yes),
+ };
+ }
+
+ if (!found_class) {
+ static const CK_OBJECT_CLASS class = CKO_PRIVATE_KEY;
+
+ attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) {
+ .type = CKA_CLASS,
+ .pValue = (CK_OBJECT_CLASS*) &class,
+ .ulValueLen = sizeof(class),
+ };
+ }
+
+ if (!found_key_type) {
+ static const CK_KEY_TYPE type = CKK_RSA;
+
+ attributes_buffer[n_attributes++] = (CK_ATTRIBUTE) {
+ .type = CKA_KEY_TYPE,
+ .pValue = (CK_KEY_TYPE*) &type,
+ .ulValueLen = sizeof(type),
+ };
+ }
+
+ attributes = attributes_buffer;
+ }
+
+ rv = m->C_FindObjectsInit(session, attributes, n_attributes);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to initialize object find call: %s", p11_kit_strerror(rv));
+
+ rv = m->C_FindObjects(session, objects, ELEMENTSOF(objects), &n_objects);
+ rv2 = m->C_FindObjectsFinal(session);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to find objects: %s", p11_kit_strerror(rv));
+ if (rv2 != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to finalize object find call: %s", p11_kit_strerror(rv));
+ if (n_objects == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Failed to find selected private key suitable for decryption on token.");
+ if (n_objects > 1)
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ),
+ "Configured private key URI matches multiple keys, refusing.");
+
+ *ret_object = objects[0];
+ return 0;
+}
+
+int pkcs11_token_decrypt_data(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session,
+ CK_OBJECT_HANDLE object,
+ const void *encrypted_data,
+ size_t encrypted_data_size,
+ void **ret_decrypted_data,
+ size_t *ret_decrypted_data_size) {
+
+ static const CK_MECHANISM mechanism = {
+ .mechanism = CKM_RSA_PKCS
+ };
+ _cleanup_(erase_and_freep) CK_BYTE *dbuffer = NULL;
+ CK_ULONG dbuffer_size = 0;
+ CK_RV rv;
+
+ assert(m);
+ assert(encrypted_data);
+ assert(encrypted_data_size > 0);
+ assert(ret_decrypted_data);
+ assert(ret_decrypted_data_size);
+
+ rv = m->C_DecryptInit(session, (CK_MECHANISM*) &mechanism, object);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to initialize decryption on security token: %s", p11_kit_strerror(rv));
+
+ dbuffer_size = encrypted_data_size; /* Start with something reasonable */
+ dbuffer = malloc(dbuffer_size);
+ if (!dbuffer)
+ return log_oom();
+
+ rv = m->C_Decrypt(session, (CK_BYTE*) encrypted_data, encrypted_data_size, dbuffer, &dbuffer_size);
+ if (rv == CKR_BUFFER_TOO_SMALL) {
+ erase_and_free(dbuffer);
+
+ dbuffer = malloc(dbuffer_size);
+ if (!dbuffer)
+ return log_oom();
+
+ rv = m->C_Decrypt(session, (CK_BYTE*) encrypted_data, encrypted_data_size, dbuffer, &dbuffer_size);
+ }
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to decrypt key on security token: %s", p11_kit_strerror(rv));
+
+ log_info("Successfully decrypted key with security token.");
+
+ *ret_decrypted_data = TAKE_PTR(dbuffer);
+ *ret_decrypted_data_size = dbuffer_size;
+ return 0;
+}
+
+int pkcs11_token_acquire_rng(
+ CK_FUNCTION_LIST *m,
+ CK_SESSION_HANDLE session) {
+
+ _cleanup_free_ void *buffer = NULL;
+ _cleanup_close_ int fd = -1;
+ size_t rps;
+ CK_RV rv;
+ int r;
+
+ assert(m);
+
+ /* While we are at it, let's read some RNG data from the PKCS#11 token and pass it to the kernel
+ * random pool. This should be cheap if we are talking to the device already. Note that we don't
+ * credit any entropy, since we don't know about the quality of the pkcs#11 token's RNG. Why bother
+ * at all? There are two sides to the argument whether to generate private keys on tokens or on the
+ * host. By crediting some data from the token RNG to the host's pool we at least can say that any
+ * key generated from it is at least as good as both sources individually. */
+
+ rps = random_pool_size();
+
+ buffer = malloc(rps);
+ if (!buffer)
+ return log_oom();
+
+ rv = m->C_GenerateRandom(session, buffer, rps);
+ if (rv != CKR_OK)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Failed to generate RNG data on security token: %s", p11_kit_strerror(rv));
+
+ fd = open("/dev/urandom", O_WRONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to open /dev/urandom for writing: %m");
+
+ r = loop_write(fd, buffer, rps, false);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to write PKCS#11 acquired random data to /dev/urandom: %m");
+
+ log_debug("Successfully written %zu bytes random data acquired via PKCS#11 to kernel random pool.", rps);
+
+ return 0;
+}
+
+static int token_process(
+ CK_FUNCTION_LIST *m,
+ CK_SLOT_ID slotid,
+ const CK_SLOT_INFO *slot_info,
+ const CK_TOKEN_INFO *token_info,
+ P11KitUri *search_uri,
+ pkcs11_find_token_callback_t callback,
+ void *userdata) {
+
+ _cleanup_free_ char *token_label = NULL;
+ CK_SESSION_HANDLE session;
+ CK_RV rv;
+ int r;
+
+ assert(m);
+ assert(slot_info);
+ assert(token_info);
+
+ token_label = pkcs11_token_label(token_info);
+ if (!token_label)
+ return log_oom();
+
+ rv = m->C_OpenSession(slotid, CKF_SERIAL_SESSION, NULL, NULL, &session);
+ if (rv != CKR_OK)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO),
+ "Failed to create session for security token '%s': %s", token_label, p11_kit_strerror(rv));
+
+ if (callback)
+ r = callback(m, session, slotid, slot_info, token_info, search_uri, userdata);
+ else
+ r = 1; /* if not callback was specified, just say we found what we were looking for */
+
+ rv = m->C_CloseSession(session);
+ if (rv != CKR_OK)
+ log_warning("Failed to close session on PKCS#11 token, ignoring: %s", p11_kit_strerror(rv));
+
+ return r;
+}
+
+static int slot_process(
+ CK_FUNCTION_LIST *m,
+ CK_SLOT_ID slotid,
+ P11KitUri *search_uri,
+ pkcs11_find_token_callback_t callback,
+ void *userdata) {
+
+ _cleanup_(p11_kit_uri_freep) P11KitUri* slot_uri = NULL, *token_uri = NULL;
+ _cleanup_free_ char *token_uri_string = NULL;
+ CK_TOKEN_INFO token_info;
+ CK_SLOT_INFO slot_info;
+ int uri_result;
+ CK_RV rv;
+
+ assert(m);
+
+ /* We return -EAGAIN for all failures we can attribute to a specific slot in some way, so that the
+ * caller might try other slots before giving up. */
+
+ rv = m->C_GetSlotInfo(slotid, &slot_info);
+ if (rv != CKR_OK) {
+ log_warning("Failed to acquire slot info for slot %lu, ignoring slot: %s", slotid, p11_kit_strerror(rv));
+ return -EAGAIN;
+ }
+
+ slot_uri = uri_from_slot_info(&slot_info);
+ if (!slot_uri)
+ return log_oom();
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *slot_uri_string = NULL;
+
+ uri_result = p11_kit_uri_format(slot_uri, P11_KIT_URI_FOR_ANY, &slot_uri_string);
+ if (uri_result != P11_KIT_URI_OK) {
+ log_warning("Failed to format slot URI, ignoring slot: %s", p11_kit_uri_message(uri_result));
+ return -EAGAIN;
+ }
+
+ log_debug("Found slot with URI %s", slot_uri_string);
+ }
+
+ rv = m->C_GetTokenInfo(slotid, &token_info);
+ if (rv == CKR_TOKEN_NOT_PRESENT) {
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "Token not present in slot, ignoring.");
+ } else if (rv != CKR_OK) {
+ log_warning("Failed to acquire token info for slot %lu, ignoring slot: %s", slotid, p11_kit_strerror(rv));
+ return -EAGAIN;
+ }
+
+ token_uri = uri_from_token_info(&token_info);
+ if (!token_uri)
+ return log_oom();
+
+ uri_result = p11_kit_uri_format(token_uri, P11_KIT_URI_FOR_ANY, &token_uri_string);
+ if (uri_result != P11_KIT_URI_OK) {
+ log_warning("Failed to format slot URI: %s", p11_kit_uri_message(uri_result));
+ return -EAGAIN;
+ }
+
+ if (search_uri && !p11_kit_uri_match_token_info(search_uri, &token_info))
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "Found non-matching token with URI %s.",
+ token_uri_string);
+
+ log_debug("Found matching token with URI %s.", token_uri_string);
+
+ return token_process(
+ m,
+ slotid,
+ &slot_info,
+ &token_info,
+ search_uri,
+ callback,
+ userdata);
+}
+
+static int module_process(
+ CK_FUNCTION_LIST *m,
+ P11KitUri *search_uri,
+ pkcs11_find_token_callback_t callback,
+ void *userdata) {
+
+ _cleanup_free_ char *name = NULL, *module_uri_string = NULL;
+ _cleanup_(p11_kit_uri_freep) P11KitUri* module_uri = NULL;
+ _cleanup_free_ CK_SLOT_ID *slotids = NULL;
+ CK_ULONG n_slotids = 0;
+ int uri_result;
+ CK_INFO info;
+ size_t k;
+ CK_RV rv;
+ int r;
+
+ assert(m);
+
+ /* We ignore most errors from modules here, in order to skip over faulty modules: one faulty module
+ * should not have the effect that we don't try the others anymore. We indicate such per-module
+ * failures with -EAGAIN, which let's the caller try the next module. */
+
+ name = p11_kit_module_get_name(m);
+ if (!name)
+ return log_oom();
+
+ log_debug("Trying PKCS#11 module %s.", name);
+
+ rv = m->C_GetInfo(&info);
+ if (rv != CKR_OK) {
+ log_warning("Failed to get info on PKCS#11 module, ignoring module: %s", p11_kit_strerror(rv));
+ return -EAGAIN;
+ }
+
+ module_uri = uri_from_module_info(&info);
+ if (!module_uri)
+ return log_oom();
+
+ uri_result = p11_kit_uri_format(module_uri, P11_KIT_URI_FOR_ANY, &module_uri_string);
+ if (uri_result != P11_KIT_URI_OK) {
+ log_warning("Failed to format module URI, ignoring module: %s", p11_kit_uri_message(uri_result));
+ return -EAGAIN;
+ }
+
+ log_debug("Found module with URI %s", module_uri_string);
+
+ rv = pkcs11_get_slot_list_malloc(m, &slotids, &n_slotids);
+ if (rv != CKR_OK) {
+ log_warning("Failed to get slot list, ignoring module: %s", p11_kit_strerror(rv));
+ return -EAGAIN;
+ }
+ if (n_slotids == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EAGAIN),
+ "This module has no slots? Ignoring module.");
+
+ for (k = 0; k < n_slotids; k++) {
+ r = slot_process(
+ m,
+ slotids[k],
+ search_uri,
+ callback,
+ userdata);
+ if (r != -EAGAIN)
+ return r;
+ }
+
+ return -EAGAIN;
+}
+
+int pkcs11_find_token(
+ const char *pkcs11_uri,
+ pkcs11_find_token_callback_t callback,
+ void *userdata) {
+
+ _cleanup_(p11_kit_modules_finalize_and_releasep) CK_FUNCTION_LIST **modules = NULL;
+ _cleanup_(p11_kit_uri_freep) P11KitUri *search_uri = NULL;
+ int r;
+
+ /* Execute the specified callback for each matching token found. If nothing is found returns
+ * -EAGAIN. Logs about all errors, except for EAGAIN, which the caller has to log about. */
+
+ if (pkcs11_uri) {
+ r = uri_from_string(pkcs11_uri, &search_uri);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse PKCS#11 URI '%s': %m", pkcs11_uri);
+ }
+
+ modules = p11_kit_modules_load_and_initialize(0);
+ if (!modules)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to initialize pkcs11 modules");
+
+ for (CK_FUNCTION_LIST **i = modules; *i; i++) {
+ r = module_process(
+ *i,
+ search_uri,
+ callback,
+ userdata);
+ if (r != -EAGAIN)
+ return r;
+ }
+
+ return -EAGAIN;
+}
+
+#endif
diff --git a/src/shared/pkcs11-util.h b/src/shared/pkcs11-util.h
new file mode 100644
index 0000000..f14607d
--- /dev/null
+++ b/src/shared/pkcs11-util.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#if HAVE_P11KIT
+# include <p11-kit/p11-kit.h>
+# include <p11-kit/uri.h>
+#endif
+
+#include "macro.h"
+#include "openssl-util.h"
+#include "time-util.h"
+
+bool pkcs11_uri_valid(const char *uri);
+
+#if HAVE_P11KIT
+int uri_from_string(const char *p, P11KitUri **ret);
+
+P11KitUri *uri_from_module_info(const CK_INFO *info);
+P11KitUri *uri_from_slot_info(const CK_SLOT_INFO *slot_info);
+P11KitUri *uri_from_token_info(const CK_TOKEN_INFO *token_info);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(P11KitUri*, p11_kit_uri_free);
+DEFINE_TRIVIAL_CLEANUP_FUNC(CK_FUNCTION_LIST**, p11_kit_modules_finalize_and_release);
+
+CK_RV pkcs11_get_slot_list_malloc(CK_FUNCTION_LIST *m, CK_SLOT_ID **ret_slotids, CK_ULONG *ret_n_slotids);
+
+char *pkcs11_token_label(const CK_TOKEN_INFO *token_info);
+char *pkcs11_token_manufacturer_id(const CK_TOKEN_INFO *token_info);
+char *pkcs11_token_model(const CK_TOKEN_INFO *token_info);
+
+int pkcs11_token_login(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_SLOT_ID slotid, const CK_TOKEN_INFO *token_info, const char *friendly_name, const char *icon_name, const char *keyname, usec_t until, char **ret_used_pin);
+
+int pkcs11_token_find_x509_certificate(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, P11KitUri *search_uri, CK_OBJECT_HANDLE *ret_object);
+#if HAVE_OPENSSL
+int pkcs11_token_read_x509_certificate(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_OBJECT_HANDLE object, X509 **ret_cert);
+#endif
+
+int pkcs11_token_find_private_key(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, P11KitUri *search_uri, CK_OBJECT_HANDLE *ret_object);
+int pkcs11_token_decrypt_data(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_OBJECT_HANDLE object, const void *encrypted_data, size_t encrypted_data_size, void **ret_decrypted_data, size_t *ret_decrypted_data_size);
+
+int pkcs11_token_acquire_rng(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session);
+
+typedef int (*pkcs11_find_token_callback_t)(CK_FUNCTION_LIST *m, CK_SESSION_HANDLE session, CK_SLOT_ID slotid, const CK_SLOT_INFO *slot_info, const CK_TOKEN_INFO *token_info, P11KitUri *uri, void *userdata);
+int pkcs11_find_token(const char *pkcs11_uri, pkcs11_find_token_callback_t callback, void *userdata);
+#endif
diff --git a/src/shared/pretty-print.c b/src/shared/pretty-print.c
new file mode 100644
index 0000000..ca5b25a
--- /dev/null
+++ b/src/shared/pretty-print.c
@@ -0,0 +1,325 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/utsname.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "conf-files.h"
+#include "def.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "pager.h"
+#include "path-util.h"
+#include "pretty-print.h"
+#include "string-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "util.h"
+
+bool urlify_enabled(void) {
+ static int cached_urlify_enabled = -1;
+
+ /* Unfortunately 'less' doesn't support links like this yet 😭, hence let's disable this as long as there's a
+ * pager in effect. Let's drop this check as soon as less got fixed a and enough time passed so that it's safe
+ * to assume that a link-enabled 'less' version has hit most installations. */
+
+ if (cached_urlify_enabled < 0) {
+ int val;
+
+ val = getenv_bool("SYSTEMD_URLIFY");
+ if (val >= 0)
+ cached_urlify_enabled = val;
+ else
+ cached_urlify_enabled = colors_enabled() && !pager_have();
+ }
+
+ return cached_urlify_enabled;
+}
+
+int terminal_urlify(const char *url, const char *text, char **ret) {
+ char *n;
+
+ assert(url);
+
+ /* Takes an URL and a pretty string and formats it as clickable link for the terminal. See
+ * https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda for details. */
+
+ if (isempty(text))
+ text = url;
+
+ if (urlify_enabled())
+ n = strjoin("\x1B]8;;", url, "\a", text, "\x1B]8;;\a");
+ else
+ n = strdup(text);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int file_url_from_path(const char *path, char **ret) {
+ _cleanup_free_ char *absolute = NULL;
+ struct utsname u;
+ char *url = NULL;
+ int r;
+
+ if (uname(&u) < 0)
+ return -errno;
+
+ if (!path_is_absolute(path)) {
+ r = path_make_absolute_cwd(path, &absolute);
+ if (r < 0)
+ return r;
+
+ path = absolute;
+ }
+
+ /* As suggested by https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda, let's include the local
+ * hostname here. Note that we don't use gethostname_malloc() or gethostname_strict() since we are interested
+ * in the raw string the kernel has set, whatever it may be, under the assumption that terminals are not overly
+ * careful with validating the strings either. */
+
+ url = strjoin("file://", u.nodename, path);
+ if (!url)
+ return -ENOMEM;
+
+ *ret = url;
+ return 0;
+}
+
+int terminal_urlify_path(const char *path, const char *text, char **ret) {
+ _cleanup_free_ char *url = NULL;
+ int r;
+
+ assert(path);
+
+ /* Much like terminal_urlify() above, but takes a file system path as input
+ * and turns it into a proper file:// URL first. */
+
+ if (isempty(path))
+ return -EINVAL;
+
+ if (isempty(text))
+ text = path;
+
+ if (!urlify_enabled()) {
+ char *n;
+
+ n = strdup(text);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+ }
+
+ r = file_url_from_path(path, &url);
+ if (r < 0)
+ return r;
+
+ return terminal_urlify(url, text, ret);
+}
+
+int terminal_urlify_man(const char *page, const char *section, char **ret) {
+ const char *url, *text;
+
+ url = strjoina("man:", page, "(", section, ")");
+ text = strjoina(page, "(", section, ") man page");
+
+ return terminal_urlify(url, text, ret);
+}
+
+static int cat_file(const char *filename, bool newline) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *urlified = NULL;
+ int r;
+
+ f = fopen(filename, "re");
+ if (!f)
+ return -errno;
+
+ r = terminal_urlify_path(filename, NULL, &urlified);
+ if (r < 0)
+ return r;
+
+ printf("%s%s# %s%s\n",
+ newline ? "\n" : "",
+ ansi_highlight_blue(),
+ urlified,
+ ansi_normal());
+ fflush(stdout);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_error_errno(r, "Failed to read \"%s\": %m", filename);
+ if (r == 0)
+ break;
+
+ puts(line);
+ }
+
+ return 0;
+}
+
+int cat_files(const char *file, char **dropins, CatFlags flags) {
+ char **path;
+ int r;
+
+ if (file) {
+ r = cat_file(file, false);
+ if (r == -ENOENT && (flags & CAT_FLAGS_MAIN_FILE_OPTIONAL))
+ printf("%s# Configuration file %s not found%s\n",
+ ansi_highlight_magenta(),
+ file,
+ ansi_normal());
+ else if (r < 0)
+ return log_warning_errno(r, "Failed to cat %s: %m", file);
+ }
+
+ STRV_FOREACH(path, dropins) {
+ r = cat_file(*path, file || path != dropins);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to cat %s: %m", *path);
+ }
+
+ return 0;
+}
+
+void print_separator(void) {
+
+ /* Outputs a separator line that resolves to whitespace when copied from the terminal. We do that by outputting
+ * one line filled with spaces with ANSI underline set, followed by a second (empty) line. */
+
+ if (underline_enabled()) {
+ size_t i, c;
+
+ c = columns();
+
+ flockfile(stdout);
+ fputs_unlocked(ANSI_UNDERLINE, stdout);
+
+ for (i = 0; i < c; i++)
+ fputc_unlocked(' ', stdout);
+
+ fputs_unlocked(ANSI_NORMAL "\n\n", stdout);
+ funlockfile(stdout);
+ } else
+ fputs("\n\n", stdout);
+}
+
+static int guess_type(const char **name, char ***prefixes, bool *is_collection, const char **extension) {
+ /* Try to figure out if name is like tmpfiles.d/ or systemd/system-presets/,
+ * i.e. a collection of directories without a main config file. */
+
+ _cleanup_free_ char *n = NULL;
+ bool usr = false, run = false, coll = false;
+ const char *ext = ".conf";
+ /* This is static so that the array doesn't get deallocated when we exit the function */
+ static const char* const std_prefixes[] = { CONF_PATHS(""), NULL };
+ static const char* const usr_prefixes[] = { CONF_PATHS_USR(""), NULL };
+ static const char* const run_prefixes[] = { "/run/", NULL };
+
+ if (path_equal(*name, "environment.d"))
+ /* Special case: we need to include /etc/environment in the search path, even
+ * though the whole concept is called environment.d. */
+ *name = "environment";
+
+ n = strdup(*name);
+ if (!n)
+ return log_oom();
+
+ /* All systemd-style config files should support the /usr-/etc-/run split and
+ * dropins. Let's add a blanket rule that allows us to support them without keeping
+ * an explicit list. */
+ if (path_startswith(n, "systemd") && endswith(n, ".conf"))
+ usr = true;
+
+ delete_trailing_chars(n, "/");
+
+ if (endswith(n, ".d"))
+ coll = true;
+
+ if (path_equal(n, "environment"))
+ usr = true;
+
+ if (path_equal(n, "udev/hwdb.d"))
+ ext = ".hwdb";
+
+ if (path_equal(n, "udev/rules.d"))
+ ext = ".rules";
+
+ if (path_equal(n, "kernel/install.d"))
+ ext = ".install";
+
+ if (path_equal(n, "systemd/ntp-units.d")) {
+ coll = true;
+ ext = ".list";
+ }
+
+ if (path_equal(n, "systemd/relabel-extra.d")) {
+ coll = run = true;
+ ext = ".relabel";
+ }
+
+ if (PATH_IN_SET(n, "systemd/system-preset", "systemd/user-preset")) {
+ coll = true;
+ ext = ".preset";
+ }
+
+ if (path_equal(n, "systemd/user-preset"))
+ usr = true;
+
+ *prefixes = (char**) (usr ? usr_prefixes : run ? run_prefixes : std_prefixes);
+ *is_collection = coll;
+ *extension = ext;
+ return 0;
+}
+
+int conf_files_cat(const char *root, const char *name) {
+ _cleanup_strv_free_ char **dirs = NULL, **files = NULL;
+ _cleanup_free_ char *path = NULL;
+ char **prefixes, **prefix;
+ bool is_collection;
+ const char *extension;
+ char **t;
+ int r;
+
+ r = guess_type(&name, &prefixes, &is_collection, &extension);
+ if (r < 0)
+ return r;
+
+ STRV_FOREACH(prefix, prefixes) {
+ assert(endswith(*prefix, "/"));
+ r = strv_extendf(&dirs, "%s%s%s", *prefix, name,
+ is_collection ? "" : ".d");
+ if (r < 0)
+ return log_error_errno(r, "Failed to build directory list: %m");
+ }
+
+ r = conf_files_list_strv(&files, extension, root, 0, (const char* const*) dirs);
+ if (r < 0)
+ return log_error_errno(r, "Failed to query file list: %m");
+
+ if (!is_collection) {
+ path = path_join(root, "/etc", name);
+ if (!path)
+ return log_oom();
+ }
+
+ if (DEBUG_LOGGING) {
+ log_debug("Looking for configuration in:");
+ if (path)
+ log_debug(" %s", path);
+ STRV_FOREACH(t, dirs)
+ log_debug(" %s/*%s", *t, extension);
+ }
+
+ /* show */
+ return cat_files(path, files, CAT_FLAGS_MAIN_FILE_OPTIONAL);
+}
diff --git a/src/shared/pretty-print.h b/src/shared/pretty-print.h
new file mode 100644
index 0000000..4619f4e
--- /dev/null
+++ b/src/shared/pretty-print.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+void print_separator(void);
+
+int file_url_from_path(const char *path, char **ret);
+
+bool urlify_enabled(void);
+
+int terminal_urlify(const char *url, const char *text, char **ret);
+int terminal_urlify_path(const char *path, const char *text, char **ret);
+int terminal_urlify_man(const char *page, const char *section, char **ret);
+
+typedef enum CatFlags {
+ CAT_FLAGS_MAIN_FILE_OPTIONAL = 1 << 0,
+} CatFlags;
+
+int cat_files(const char *file, char **dropins, CatFlags flags);
+int conf_files_cat(const char *root, const char *name);
diff --git a/src/shared/psi-util.c b/src/shared/psi-util.c
new file mode 100644
index 0000000..7a184d5
--- /dev/null
+++ b/src/shared/psi-util.c
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "psi-util.h"
+#include "string-util.h"
+#include "stat-util.h"
+#include "strv.h"
+
+int read_resource_pressure(const char *path, PressureType type, ResourcePressure *ret) {
+ _cleanup_free_ char *line = NULL;
+ _cleanup_fclose_ FILE *f = NULL;
+ unsigned field_filled = 0;
+ ResourcePressure rp = {};
+ const char *t, *cline;
+ char *word;
+ int r;
+
+ assert(path);
+ assert(IN_SET(type, PRESSURE_TYPE_SOME, PRESSURE_TYPE_FULL));
+ assert(ret);
+
+ if (type == PRESSURE_TYPE_SOME)
+ t = "some";
+ else if (type == PRESSURE_TYPE_FULL)
+ t = "full";
+ else
+ return -EINVAL;
+
+ r = fopen_unlocked(path, "re", &f);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *l = NULL;
+ char *w;
+
+ r = read_line(f, LONG_LINE_MAX, &l);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ break;
+
+ w = first_word(l, t);
+ if (w) {
+ line = TAKE_PTR(l);
+ cline = w;
+ break;
+ }
+ }
+
+ if (!line)
+ return -ENODATA;
+
+ /* extracts either avgX=Y.Z or total=X */
+ while ((r = extract_first_word(&cline, &word, NULL, 0)) > 0) {
+ _cleanup_free_ char *w = word;
+ const char *v;
+
+ if ((v = startswith(w, "avg10="))) {
+ if (field_filled & (1U << 0))
+ return -EINVAL;
+
+ field_filled |= 1U << 0;
+ r = parse_loadavg_fixed_point(v, &rp.avg10);
+ } else if ((v = startswith(w, "avg60="))) {
+ if (field_filled & (1U << 1))
+ return -EINVAL;
+
+ field_filled |= 1U << 1;
+ r = parse_loadavg_fixed_point(v, &rp.avg60);
+ } else if ((v = startswith(w, "avg300="))) {
+ if (field_filled & (1U << 2))
+ return -EINVAL;
+
+ field_filled |= 1U << 2;
+ r = parse_loadavg_fixed_point(v, &rp.avg300);
+ } else if ((v = startswith(w, "total="))) {
+ if (field_filled & (1U << 3))
+ return -EINVAL;
+
+ field_filled |= 1U << 3;
+ r = safe_atou64(v, &rp.total);
+ } else
+ continue;
+
+ if (r < 0)
+ return r;
+ }
+
+ if (r < 0)
+ return r;
+
+ if (field_filled != 15U)
+ return -EINVAL;
+
+ *ret = rp;
+ return 0;
+}
+
+int is_pressure_supported(void) {
+ const char *p;
+
+ FOREACH_STRING(p, "/proc/pressure/cpu", "/proc/pressure/io", "/proc/pressure/memory")
+ if (access(p, F_OK) < 0) {
+ if (errno == ENOENT)
+ return 0;
+ return -errno;
+ }
+
+ return 1;
+}
diff --git a/src/shared/psi-util.h b/src/shared/psi-util.h
new file mode 100644
index 0000000..415fbbd
--- /dev/null
+++ b/src/shared/psi-util.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "parse-util.h"
+#include "time-util.h"
+
+typedef enum PressureType {
+ PRESSURE_TYPE_SOME,
+ PRESSURE_TYPE_FULL,
+} PressureType;
+
+/* Averages are stored in fixed-point with 11 bit fractions */
+typedef struct ResourcePressure {
+ loadavg_t avg10;
+ loadavg_t avg60;
+ loadavg_t avg300;
+ usec_t total;
+} ResourcePressure;
+
+/** Upstream 4.20+ format
+ *
+ * some avg10=0.22 avg60=0.17 avg300=1.11 total=58761459
+ * full avg10=0.23 avg60=0.16 avg300=1.08 total=58464525
+ */
+int read_resource_pressure(const char *path, PressureType type, ResourcePressure *ret);
+
+/* Was the kernel compiled with CONFIG_PSI=y? 1 if yes, 0 if not, negative on error. */
+int is_pressure_supported(void);
diff --git a/src/shared/ptyfwd.c b/src/shared/ptyfwd.c
new file mode 100644
index 0000000..754b4f5
--- /dev/null
+++ b/src/shared/ptyfwd.c
@@ -0,0 +1,681 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "sd-event.h"
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "macro.h"
+#include "ptyfwd.h"
+#include "terminal-util.h"
+#include "time-util.h"
+
+struct PTYForward {
+ sd_event *event;
+
+ int input_fd;
+ int output_fd;
+ int master;
+
+ PTYForwardFlags flags;
+
+ sd_event_source *stdin_event_source;
+ sd_event_source *stdout_event_source;
+ sd_event_source *master_event_source;
+
+ sd_event_source *sigwinch_event_source;
+
+ struct termios saved_stdin_attr;
+ struct termios saved_stdout_attr;
+
+ bool close_input_fd:1;
+ bool close_output_fd:1;
+
+ bool saved_stdin:1;
+ bool saved_stdout:1;
+
+ bool stdin_readable:1;
+ bool stdin_hangup:1;
+ bool stdout_writable:1;
+ bool stdout_hangup:1;
+ bool master_readable:1;
+ bool master_writable:1;
+ bool master_hangup:1;
+
+ bool read_from_master:1;
+
+ bool done:1;
+ bool drain:1;
+
+ bool last_char_set:1;
+ char last_char;
+
+ char in_buffer[LINE_MAX], out_buffer[LINE_MAX];
+ size_t in_buffer_full, out_buffer_full;
+
+ usec_t escape_timestamp;
+ unsigned escape_counter;
+
+ PTYForwardHandler handler;
+ void *userdata;
+};
+
+#define ESCAPE_USEC (1*USEC_PER_SEC)
+
+static void pty_forward_disconnect(PTYForward *f) {
+
+ if (!f)
+ return;
+
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ f->stdout_event_source = sd_event_source_unref(f->stdout_event_source);
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source);
+ f->event = sd_event_unref(f->event);
+
+ if (f->output_fd >= 0) {
+ if (f->saved_stdout)
+ (void) tcsetattr(f->output_fd, TCSANOW, &f->saved_stdout_attr);
+
+ /* STDIN/STDOUT should not be non-blocking normally, so let's reset it */
+ (void) fd_nonblock(f->output_fd, false);
+ if (f->close_output_fd)
+ f->output_fd = safe_close(f->output_fd);
+ }
+
+ if (f->input_fd >= 0) {
+ if (f->saved_stdin)
+ (void) tcsetattr(f->input_fd, TCSANOW, &f->saved_stdin_attr);
+
+ (void) fd_nonblock(f->input_fd, false);
+ if (f->close_input_fd)
+ f->input_fd = safe_close(f->input_fd);
+ }
+
+ f->saved_stdout = f->saved_stdin = false;
+}
+
+static int pty_forward_done(PTYForward *f, int rcode) {
+ _cleanup_(sd_event_unrefp) sd_event *e = NULL;
+ assert(f);
+
+ if (f->done)
+ return 0;
+
+ e = sd_event_ref(f->event);
+
+ f->done = true;
+ pty_forward_disconnect(f);
+
+ if (f->handler)
+ return f->handler(f, rcode, f->userdata);
+ else
+ return sd_event_exit(e, rcode < 0 ? EXIT_FAILURE : rcode);
+}
+
+static bool look_for_escape(PTYForward *f, const char *buffer, size_t n) {
+ const char *p;
+
+ assert(f);
+ assert(buffer);
+ assert(n > 0);
+
+ for (p = buffer; p < buffer + n; p++) {
+
+ /* Check for ^] */
+ if (*p == 0x1D) {
+ usec_t nw = now(CLOCK_MONOTONIC);
+
+ if (f->escape_counter == 0 || nw > f->escape_timestamp + ESCAPE_USEC) {
+ f->escape_timestamp = nw;
+ f->escape_counter = 1;
+ } else {
+ (f->escape_counter)++;
+
+ if (f->escape_counter >= 3)
+ return true;
+ }
+ } else {
+ f->escape_timestamp = 0;
+ f->escape_counter = 0;
+ }
+ }
+
+ return false;
+}
+
+static bool ignore_vhangup(PTYForward *f) {
+ assert(f);
+
+ if (f->flags & PTY_FORWARD_IGNORE_VHANGUP)
+ return true;
+
+ if ((f->flags & PTY_FORWARD_IGNORE_INITIAL_VHANGUP) && !f->read_from_master)
+ return true;
+
+ return false;
+}
+
+static bool drained(PTYForward *f) {
+ int q = 0;
+
+ assert(f);
+
+ if (f->out_buffer_full > 0)
+ return false;
+
+ if (f->master_readable)
+ return false;
+
+ if (ioctl(f->master, TIOCINQ, &q) < 0)
+ log_debug_errno(errno, "TIOCINQ failed on master: %m");
+ else if (q > 0)
+ return false;
+
+ if (ioctl(f->master, TIOCOUTQ, &q) < 0)
+ log_debug_errno(errno, "TIOCOUTQ failed on master: %m");
+ else if (q > 0)
+ return false;
+
+ return true;
+}
+
+static int shovel(PTYForward *f) {
+ ssize_t k;
+
+ assert(f);
+
+ while ((f->stdin_readable && f->in_buffer_full <= 0) ||
+ (f->master_writable && f->in_buffer_full > 0) ||
+ (f->master_readable && f->out_buffer_full <= 0) ||
+ (f->stdout_writable && f->out_buffer_full > 0)) {
+
+ if (f->stdin_readable && f->in_buffer_full < LINE_MAX) {
+
+ k = read(f->input_fd, f->in_buffer + f->in_buffer_full, LINE_MAX - f->in_buffer_full);
+ if (k < 0) {
+
+ if (errno == EAGAIN)
+ f->stdin_readable = false;
+ else if (errno == EIO || ERRNO_IS_DISCONNECT(errno)) {
+ f->stdin_readable = false;
+ f->stdin_hangup = true;
+
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ } else {
+ log_error_errno(errno, "read(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else if (k == 0) {
+ /* EOF on stdin */
+ f->stdin_readable = false;
+ f->stdin_hangup = true;
+
+ f->stdin_event_source = sd_event_source_unref(f->stdin_event_source);
+ } else {
+ /* Check if ^] has been pressed three times within one second. If we get this we quite
+ * immediately. */
+ if (look_for_escape(f, f->in_buffer + f->in_buffer_full, k))
+ return pty_forward_done(f, -ECANCELED);
+
+ f->in_buffer_full += (size_t) k;
+ }
+ }
+
+ if (f->master_writable && f->in_buffer_full > 0) {
+
+ k = write(f->master, f->in_buffer, f->in_buffer_full);
+ if (k < 0) {
+
+ if (IN_SET(errno, EAGAIN, EIO))
+ f->master_writable = false;
+ else if (IN_SET(errno, EPIPE, ECONNRESET)) {
+ f->master_writable = f->master_readable = false;
+ f->master_hangup = true;
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ } else {
+ log_error_errno(errno, "write(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else {
+ assert(f->in_buffer_full >= (size_t) k);
+ memmove(f->in_buffer, f->in_buffer + k, f->in_buffer_full - k);
+ f->in_buffer_full -= k;
+ }
+ }
+
+ if (f->master_readable && f->out_buffer_full < LINE_MAX) {
+
+ k = read(f->master, f->out_buffer + f->out_buffer_full, LINE_MAX - f->out_buffer_full);
+ if (k < 0) {
+
+ /* Note that EIO on the master device
+ * might be caused by vhangup() or
+ * temporary closing of everything on
+ * the other side, we treat it like
+ * EAGAIN here and try again, unless
+ * ignore_vhangup is off. */
+
+ if (errno == EAGAIN || (errno == EIO && ignore_vhangup(f)))
+ f->master_readable = false;
+ else if (IN_SET(errno, EPIPE, ECONNRESET, EIO)) {
+ f->master_readable = f->master_writable = false;
+ f->master_hangup = true;
+
+ f->master_event_source = sd_event_source_unref(f->master_event_source);
+ } else {
+ log_error_errno(errno, "read(): %m");
+ return pty_forward_done(f, -errno);
+ }
+ } else {
+ f->read_from_master = true;
+ f->out_buffer_full += (size_t) k;
+ }
+ }
+
+ if (f->stdout_writable && f->out_buffer_full > 0) {
+
+ k = write(f->output_fd, f->out_buffer, f->out_buffer_full);
+ if (k < 0) {
+
+ if (errno == EAGAIN)
+ f->stdout_writable = false;
+ else if (errno == EIO || ERRNO_IS_DISCONNECT(errno)) {
+ f->stdout_writable = false;
+ f->stdout_hangup = true;
+ f->stdout_event_source = sd_event_source_unref(f->stdout_event_source);
+ } else {
+ log_error_errno(errno, "write(): %m");
+ return pty_forward_done(f, -errno);
+ }
+
+ } else {
+
+ if (k > 0) {
+ f->last_char = f->out_buffer[k-1];
+ f->last_char_set = true;
+ }
+
+ assert(f->out_buffer_full >= (size_t) k);
+ memmove(f->out_buffer, f->out_buffer + k, f->out_buffer_full - k);
+ f->out_buffer_full -= k;
+ }
+ }
+ }
+
+ if (f->stdin_hangup || f->stdout_hangup || f->master_hangup) {
+ /* Exit the loop if any side hung up and if there's
+ * nothing more to write or nothing we could write. */
+
+ if ((f->out_buffer_full <= 0 || f->stdout_hangup) &&
+ (f->in_buffer_full <= 0 || f->master_hangup))
+ return pty_forward_done(f, 0);
+ }
+
+ /* If we were asked to drain, and there's nothing more to handle from the master, then call the callback
+ * too. */
+ if (f->drain && drained(f))
+ return pty_forward_done(f, 0);
+
+ return 0;
+}
+
+static int on_master_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->master_event_source);
+ assert(fd >= 0);
+ assert(fd == f->master);
+
+ if (revents & (EPOLLIN|EPOLLHUP))
+ f->master_readable = true;
+
+ if (revents & (EPOLLOUT|EPOLLHUP))
+ f->master_writable = true;
+
+ return shovel(f);
+}
+
+static int on_stdin_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->stdin_event_source);
+ assert(fd >= 0);
+ assert(fd == f->input_fd);
+
+ if (revents & (EPOLLIN|EPOLLHUP))
+ f->stdin_readable = true;
+
+ return shovel(f);
+}
+
+static int on_stdout_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) {
+ PTYForward *f = userdata;
+
+ assert(f);
+ assert(e);
+ assert(e == f->stdout_event_source);
+ assert(fd >= 0);
+ assert(fd == f->output_fd);
+
+ if (revents & (EPOLLOUT|EPOLLHUP))
+ f->stdout_writable = true;
+
+ return shovel(f);
+}
+
+static int on_sigwinch_event(sd_event_source *e, const struct signalfd_siginfo *si, void *userdata) {
+ PTYForward *f = userdata;
+ struct winsize ws;
+
+ assert(f);
+ assert(e);
+ assert(e == f->sigwinch_event_source);
+
+ /* The window size changed, let's forward that. */
+ if (ioctl(f->output_fd, TIOCGWINSZ, &ws) >= 0)
+ (void) ioctl(f->master, TIOCSWINSZ, &ws);
+
+ return 0;
+}
+
+int pty_forward_new(
+ sd_event *event,
+ int master,
+ PTYForwardFlags flags,
+ PTYForward **ret) {
+
+ _cleanup_(pty_forward_freep) PTYForward *f = NULL;
+ struct winsize ws;
+ int r;
+
+ f = new(PTYForward, 1);
+ if (!f)
+ return -ENOMEM;
+
+ *f = (struct PTYForward) {
+ .flags = flags,
+ .master = -1,
+ .input_fd = -1,
+ .output_fd = -1,
+ };
+
+ if (event)
+ f->event = sd_event_ref(event);
+ else {
+ r = sd_event_default(&f->event);
+ if (r < 0)
+ return r;
+ }
+
+ if (FLAGS_SET(flags, PTY_FORWARD_READ_ONLY))
+ f->output_fd = STDOUT_FILENO;
+ else {
+ /* If we shall be invoked in interactive mode, let's switch on non-blocking mode, so that we
+ * never end up staving one direction while we block on the other. However, let's be careful
+ * here and not turn on O_NONBLOCK for stdin/stdout directly, but of re-opened copies of
+ * them. This has two advantages: when we are killed abruptly the stdin/stdout fds won't be
+ * left in O_NONBLOCK state for the next process using them. In addition, if some process
+ * running in the background wants to continue writing to our stdout it can do so without
+ * being confused by O_NONBLOCK. */
+
+ f->input_fd = fd_reopen(STDIN_FILENO, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (f->input_fd < 0) {
+ /* Handle failures gracefully, after all certain fd types cannot be reopened
+ * (sockets, …) */
+ log_debug_errno(f->input_fd, "Failed to reopen stdin, using original fd: %m");
+
+ r = fd_nonblock(STDIN_FILENO, true);
+ if (r < 0)
+ return r;
+
+ f->input_fd = STDIN_FILENO;
+ } else
+ f->close_input_fd = true;
+
+ f->output_fd = fd_reopen(STDOUT_FILENO, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
+ if (f->output_fd < 0) {
+ log_debug_errno(f->output_fd, "Failed to reopen stdout, using original fd: %m");
+
+ r = fd_nonblock(STDOUT_FILENO, true);
+ if (r < 0)
+ return r;
+
+ f->output_fd = STDOUT_FILENO;
+ } else
+ f->close_output_fd = true;
+ }
+
+ r = fd_nonblock(master, true);
+ if (r < 0)
+ return r;
+
+ f->master = master;
+
+ if (ioctl(f->output_fd, TIOCGWINSZ, &ws) < 0)
+ /* If we can't get the resolution from the output fd, then use our internal, regular width/height,
+ * i.e. something derived from $COLUMNS and $LINES if set. */
+ ws = (struct winsize) {
+ .ws_row = lines(),
+ .ws_col = columns(),
+ };
+
+ (void) ioctl(master, TIOCSWINSZ, &ws);
+
+ if (!(flags & PTY_FORWARD_READ_ONLY)) {
+ assert(f->input_fd >= 0);
+
+ if (tcgetattr(f->input_fd, &f->saved_stdin_attr) >= 0) {
+ struct termios raw_stdin_attr;
+
+ f->saved_stdin = true;
+
+ raw_stdin_attr = f->saved_stdin_attr;
+ cfmakeraw(&raw_stdin_attr);
+ raw_stdin_attr.c_oflag = f->saved_stdin_attr.c_oflag;
+ tcsetattr(f->input_fd, TCSANOW, &raw_stdin_attr);
+ }
+
+ if (tcgetattr(f->output_fd, &f->saved_stdout_attr) >= 0) {
+ struct termios raw_stdout_attr;
+
+ f->saved_stdout = true;
+
+ raw_stdout_attr = f->saved_stdout_attr;
+ cfmakeraw(&raw_stdout_attr);
+ raw_stdout_attr.c_iflag = f->saved_stdout_attr.c_iflag;
+ raw_stdout_attr.c_lflag = f->saved_stdout_attr.c_lflag;
+ tcsetattr(f->output_fd, TCSANOW, &raw_stdout_attr);
+ }
+
+ r = sd_event_add_io(f->event, &f->stdin_event_source, f->input_fd, EPOLLIN|EPOLLET, on_stdin_event, f);
+ if (r < 0 && r != -EPERM)
+ return r;
+
+ if (r >= 0)
+ (void) sd_event_source_set_description(f->stdin_event_source, "ptyfwd-stdin");
+ }
+
+ r = sd_event_add_io(f->event, &f->stdout_event_source, f->output_fd, EPOLLOUT|EPOLLET, on_stdout_event, f);
+ if (r == -EPERM)
+ /* stdout without epoll support. Likely redirected to regular file. */
+ f->stdout_writable = true;
+ else if (r < 0)
+ return r;
+ else
+ (void) sd_event_source_set_description(f->stdout_event_source, "ptyfwd-stdout");
+
+ r = sd_event_add_io(f->event, &f->master_event_source, master, EPOLLIN|EPOLLOUT|EPOLLET, on_master_event, f);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(f->master_event_source, "ptyfwd-master");
+
+ r = sd_event_add_signal(f->event, &f->sigwinch_event_source, SIGWINCH, on_sigwinch_event, f);
+ if (r < 0)
+ return r;
+
+ (void) sd_event_source_set_description(f->sigwinch_event_source, "ptyfwd-sigwinch");
+
+ *ret = TAKE_PTR(f);
+
+ return 0;
+}
+
+PTYForward *pty_forward_free(PTYForward *f) {
+ pty_forward_disconnect(f);
+ return mfree(f);
+}
+
+int pty_forward_get_last_char(PTYForward *f, char *ch) {
+ assert(f);
+ assert(ch);
+
+ if (!f->last_char_set)
+ return -ENXIO;
+
+ *ch = f->last_char;
+ return 0;
+}
+
+int pty_forward_set_ignore_vhangup(PTYForward *f, bool b) {
+ int r;
+
+ assert(f);
+
+ if (!!(f->flags & PTY_FORWARD_IGNORE_VHANGUP) == b)
+ return 0;
+
+ SET_FLAG(f->flags, PTY_FORWARD_IGNORE_VHANGUP, b);
+
+ if (!ignore_vhangup(f)) {
+
+ /* We shall now react to vhangup()s? Let's check
+ * immediately if we might be in one */
+
+ f->master_readable = true;
+ r = shovel(f);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+bool pty_forward_get_ignore_vhangup(PTYForward *f) {
+ assert(f);
+
+ return !!(f->flags & PTY_FORWARD_IGNORE_VHANGUP);
+}
+
+bool pty_forward_is_done(PTYForward *f) {
+ assert(f);
+
+ return f->done;
+}
+
+void pty_forward_set_handler(PTYForward *f, PTYForwardHandler cb, void *userdata) {
+ assert(f);
+
+ f->handler = cb;
+ f->userdata = userdata;
+}
+
+bool pty_forward_drain(PTYForward *f) {
+ assert(f);
+
+ /* Starts draining the forwarder. Specifically:
+ *
+ * - Returns true if there are no unprocessed bytes from the pty, false otherwise
+ *
+ * - Makes sure the handler function is called the next time the number of unprocessed bytes hits zero
+ */
+
+ f->drain = true;
+ return drained(f);
+}
+
+int pty_forward_set_priority(PTYForward *f, int64_t priority) {
+ int r;
+ assert(f);
+
+ if (f->stdin_event_source) {
+ r = sd_event_source_set_priority(f->stdin_event_source, priority);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_source_set_priority(f->stdout_event_source, priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(f->master_event_source, priority);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(f->sigwinch_event_source, priority);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height) {
+ struct winsize ws;
+
+ assert(f);
+
+ if (width == (unsigned) -1 && height == (unsigned) -1)
+ return 0; /* noop */
+
+ if (width != (unsigned) -1 &&
+ (width == 0 || width > USHRT_MAX))
+ return -ERANGE;
+
+ if (height != (unsigned) -1 &&
+ (height == 0 || height > USHRT_MAX))
+ return -ERANGE;
+
+ if (width == (unsigned) -1 || height == (unsigned) -1) {
+ if (ioctl(f->master, TIOCGWINSZ, &ws) < 0)
+ return -errno;
+
+ if (width != (unsigned) -1)
+ ws.ws_col = width;
+ if (height != (unsigned) -1)
+ ws.ws_row = height;
+ } else
+ ws = (struct winsize) {
+ .ws_row = height,
+ .ws_col = width,
+ };
+
+ if (ioctl(f->master, TIOCSWINSZ, &ws) < 0)
+ return -errno;
+
+ /* Make sure we ignore SIGWINCH window size events from now on */
+ f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source);
+
+ return 0;
+}
diff --git a/src/shared/ptyfwd.h b/src/shared/ptyfwd.h
new file mode 100644
index 0000000..f0ae6e9
--- /dev/null
+++ b/src/shared/ptyfwd.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-event.h"
+
+#include "macro.h"
+
+typedef struct PTYForward PTYForward;
+
+typedef enum PTYForwardFlags {
+ PTY_FORWARD_READ_ONLY = 1,
+
+ /* Continue reading after hangup? */
+ PTY_FORWARD_IGNORE_VHANGUP = 2,
+
+ /* Continue reading after hangup but only if we never read anything else? */
+ PTY_FORWARD_IGNORE_INITIAL_VHANGUP = 4,
+} PTYForwardFlags;
+
+typedef int (*PTYForwardHandler)(PTYForward *f, int rcode, void *userdata);
+
+int pty_forward_new(sd_event *event, int master, PTYForwardFlags flags, PTYForward **f);
+PTYForward *pty_forward_free(PTYForward *f);
+
+int pty_forward_get_last_char(PTYForward *f, char *ch);
+
+int pty_forward_set_ignore_vhangup(PTYForward *f, bool ignore_vhangup);
+bool pty_forward_get_ignore_vhangup(PTYForward *f);
+
+bool pty_forward_is_done(PTYForward *f);
+
+void pty_forward_set_handler(PTYForward *f, PTYForwardHandler handler, void *userdata);
+
+bool pty_forward_drain(PTYForward *f);
+
+int pty_forward_set_priority(PTYForward *f, int64_t priority);
+
+int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(PTYForward*, pty_forward_free);
diff --git a/src/shared/pwquality-util.c b/src/shared/pwquality-util.c
new file mode 100644
index 0000000..4000bef
--- /dev/null
+++ b/src/shared/pwquality-util.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "dlfcn-util.h"
+#include "errno-util.h"
+#include "log.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "pwquality-util.h"
+#include "strv.h"
+
+#if HAVE_PWQUALITY
+
+static void *pwquality_dl = NULL;
+
+int (*sym_pwquality_check)(pwquality_settings_t *pwq, const char *password, const char *oldpassword, const char *user, void **auxerror);
+pwquality_settings_t *(*sym_pwquality_default_settings)(void);
+void (*sym_pwquality_free_settings)(pwquality_settings_t *pwq);
+int (*sym_pwquality_generate)(pwquality_settings_t *pwq, int entropy_bits, char **password);
+int (*sym_pwquality_get_str_value)(pwquality_settings_t *pwq, int setting, const char **value);
+int (*sym_pwquality_read_config)(pwquality_settings_t *pwq, const char *cfgfile, void **auxerror);
+int (*sym_pwquality_set_int_value)(pwquality_settings_t *pwq, int setting, int value);
+const char* (*sym_pwquality_strerror)(char *buf, size_t len, int errcode, void *auxerror);
+
+int dlopen_pwquality(void) {
+ _cleanup_(dlclosep) void *dl = NULL;
+ int r;
+
+ if (pwquality_dl)
+ return 0; /* Already loaded */
+
+ dl = dlopen("libpwquality.so.1", RTLD_LAZY);
+ if (!dl)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "libpwquality support is not installed: %s", dlerror());
+
+ r = dlsym_many_and_warn(
+ dl,
+ LOG_DEBUG,
+ &sym_pwquality_check, "pwquality_check",
+ &sym_pwquality_default_settings, "pwquality_default_settings",
+ &sym_pwquality_free_settings, "pwquality_free_settings",
+ &sym_pwquality_generate, "pwquality_generate",
+ &sym_pwquality_get_str_value, "pwquality_get_str_value",
+ &sym_pwquality_read_config, "pwquality_read_config",
+ &sym_pwquality_set_int_value, "pwquality_set_int_value",
+ &sym_pwquality_strerror, "pwquality_strerror",
+ NULL);
+ if (r < 0)
+ return r;
+
+ /* Note that we never release the reference here, because there's no real reason to, after all this
+ * was traditionally a regular shared library dependency which lives forever too. */
+ pwquality_dl = TAKE_PTR(dl);
+ return 1;
+}
+
+void pwq_maybe_disable_dictionary(pwquality_settings_t *pwq) {
+ char buf[PWQ_MAX_ERROR_MESSAGE_LEN];
+ const char *path;
+ int r;
+
+ assert(pwq);
+
+ r = sym_pwquality_get_str_value(pwq, PWQ_SETTING_DICT_PATH, &path);
+ if (r < 0) {
+ log_debug("Failed to read libpwquality dictionary path, ignoring: %s",
+ sym_pwquality_strerror(buf, sizeof(buf), r, NULL));
+ return;
+ }
+
+ // REMOVE THIS AS SOON AS https://github.com/libpwquality/libpwquality/pull/21 IS MERGED AND RELEASED
+ if (isempty(path))
+ path = "/usr/share/cracklib/pw_dict.pwd.gz";
+
+ if (isempty(path)) {
+ log_debug("Weird, no dictionary file configured, ignoring.");
+ return;
+ }
+
+ if (access(path, F_OK) >= 0)
+ return;
+
+ if (errno != ENOENT) {
+ log_debug_errno(errno, "Failed to check if dictionary file %s exists, ignoring: %m", path);
+ return;
+ }
+
+ r = sym_pwquality_set_int_value(pwq, PWQ_SETTING_DICT_CHECK, 0);
+ if (r < 0)
+ log_debug("Failed to disable libpwquality dictionary check, ignoring: %s",
+ sym_pwquality_strerror(buf, sizeof(buf), r, NULL));
+}
+
+int pwq_allocate_context(pwquality_settings_t **ret) {
+ _cleanup_(sym_pwquality_free_settingsp) pwquality_settings_t *pwq = NULL;
+ char buf[PWQ_MAX_ERROR_MESSAGE_LEN];
+ void *auxerror;
+ int r;
+
+ assert(ret);
+
+ r = dlopen_pwquality();
+ if (r < 0)
+ return r;
+
+ pwq = sym_pwquality_default_settings();
+ if (!pwq)
+ return -ENOMEM;
+
+ r = sym_pwquality_read_config(pwq, NULL, &auxerror);
+ if (r < 0)
+ log_debug("Failed to read libpwquality configuration, ignoring: %s",
+ sym_pwquality_strerror(buf, sizeof(buf), r, auxerror));
+
+ pwq_maybe_disable_dictionary(pwq);
+
+ *ret = TAKE_PTR(pwq);
+ return 0;
+}
+
+#define N_SUGGESTIONS 6
+
+int suggest_passwords(void) {
+ _cleanup_(sym_pwquality_free_settingsp) pwquality_settings_t *pwq = NULL;
+ _cleanup_strv_free_erase_ char **suggestions = NULL;
+ _cleanup_(erase_and_freep) char *joined = NULL;
+ char buf[PWQ_MAX_ERROR_MESSAGE_LEN];
+ size_t i;
+ int r;
+
+ r = pwq_allocate_context(&pwq);
+ if (ERRNO_IS_NOT_SUPPORTED(r))
+ return 0;
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate libpwquality context: %m");
+
+ suggestions = new0(char*, N_SUGGESTIONS+1);
+ if (!suggestions)
+ return log_oom();
+
+ for (i = 0; i < N_SUGGESTIONS; i++) {
+ r = sym_pwquality_generate(pwq, 64, suggestions + i);
+ if (r < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to generate password, ignoring: %s",
+ sym_pwquality_strerror(buf, sizeof(buf), r, NULL));
+ }
+
+ joined = strv_join(suggestions, " ");
+ if (!joined)
+ return log_oom();
+
+ log_info("Password suggestions: %s", joined);
+ return 1;
+}
+
+int quality_check_password(const char *password, const char *username, char **ret_error) {
+ _cleanup_(sym_pwquality_free_settingsp) pwquality_settings_t *pwq = NULL;
+ char buf[PWQ_MAX_ERROR_MESSAGE_LEN];
+ void *auxerror;
+ int r;
+
+ assert(password);
+
+ r = pwq_allocate_context(&pwq);
+ if (ERRNO_IS_NOT_SUPPORTED(r))
+ return 0;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to allocate libpwquality context: %m");
+
+ r = sym_pwquality_check(pwq, password, NULL, username, &auxerror);
+ if (r < 0) {
+
+ if (ret_error) {
+ _cleanup_free_ char *e = NULL;
+
+ e = strdup(sym_pwquality_strerror(buf, sizeof(buf), r, auxerror));
+ if (!e)
+ return -ENOMEM;
+
+ *ret_error = TAKE_PTR(e);
+ }
+
+ return 0; /* all bad */
+ }
+
+ return 1; /* all good */
+}
+
+#endif
diff --git a/src/shared/pwquality-util.h b/src/shared/pwquality-util.h
new file mode 100644
index 0000000..de288bb
--- /dev/null
+++ b/src/shared/pwquality-util.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "macro.h"
+
+#if HAVE_PWQUALITY
+/* pwquality.h uses size_t but doesn't include sys/types.h on its own */
+#include <sys/types.h>
+#include <pwquality.h>
+
+extern int (*sym_pwquality_check)(pwquality_settings_t *pwq, const char *password, const char *oldpassword, const char *user, void **auxerror);
+extern pwquality_settings_t *(*sym_pwquality_default_settings)(void);
+extern void (*sym_pwquality_free_settings)(pwquality_settings_t *pwq);
+extern int (*sym_pwquality_generate)(pwquality_settings_t *pwq, int entropy_bits, char **password);
+extern int (*sym_pwquality_get_str_value)(pwquality_settings_t *pwq, int setting, const char **value);
+extern int (*sym_pwquality_read_config)(pwquality_settings_t *pwq, const char *cfgfile, void **auxerror);
+extern int (*sym_pwquality_set_int_value)(pwquality_settings_t *pwq, int setting, int value);
+extern const char* (*sym_pwquality_strerror)(char *buf, size_t len, int errcode, void *auxerror);
+
+int dlopen_pwquality(void);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(pwquality_settings_t*, sym_pwquality_free_settings);
+
+void pwq_maybe_disable_dictionary(pwquality_settings_t *pwq);
+int pwq_allocate_context(pwquality_settings_t **ret);
+int suggest_passwords(void);
+int quality_check_password(const char *password, const char *username, char **ret_error);
+
+#else
+
+static inline int suggest_passwords(void) {
+ return 0;
+}
+
+static inline int quality_check_password(const char *password, const char *username, char **ret_error) {
+ if (ret_error)
+ *ret_error = NULL;
+ return 1; /* all good */
+}
+
+#endif
diff --git a/src/shared/qrcode-util.c b/src/shared/qrcode-util.c
new file mode 100644
index 0000000..7050e18
--- /dev/null
+++ b/src/shared/qrcode-util.c
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "qrcode-util.h"
+
+#if HAVE_QRENCODE
+#include <qrencode.h>
+
+#include "dlfcn-util.h"
+#include "locale-util.h"
+#include "terminal-util.h"
+
+#define ANSI_WHITE_ON_BLACK "\033[40;37;1m"
+
+static void print_border(FILE *output, unsigned width) {
+ /* Four rows of border */
+ for (unsigned y = 0; y < 4; y += 2) {
+ fputs(ANSI_WHITE_ON_BLACK, output);
+
+ for (unsigned x = 0; x < 4 + width + 4; x++)
+ fputs("\342\226\210", output);
+
+ fputs(ANSI_NORMAL "\n", output);
+ }
+}
+
+static void write_qrcode(FILE *output, QRcode *qr) {
+ assert(qr);
+
+ if (!output)
+ output = stdout;
+
+ print_border(output, qr->width);
+
+ for (unsigned y = 0; y < (unsigned) qr->width; y += 2) {
+ const uint8_t *row1 = qr->data + qr->width * y;
+ const uint8_t *row2 = row1 + qr->width;
+
+ fputs(ANSI_WHITE_ON_BLACK, output);
+ for (unsigned x = 0; x < 4; x++)
+ fputs("\342\226\210", output);
+
+ for (unsigned x = 0; x < (unsigned) qr->width; x++) {
+ bool a, b;
+
+ a = row1[x] & 1;
+ b = (y+1) < (unsigned) qr->width ? (row2[x] & 1) : false;
+
+ if (a && b)
+ fputc(' ', output);
+ else if (a)
+ fputs("\342\226\204", output);
+ else if (b)
+ fputs("\342\226\200", output);
+ else
+ fputs("\342\226\210", output);
+ }
+
+ for (unsigned x = 0; x < 4; x++)
+ fputs("\342\226\210", output);
+ fputs(ANSI_NORMAL "\n", output);
+ }
+
+ print_border(output, qr->width);
+ fflush(output);
+}
+
+int print_qrcode(FILE *out, const char *header, const char *string) {
+ QRcode* (*sym_QRcode_encodeString)(const char *string, int version, QRecLevel level, QRencodeMode hint, int casesensitive);
+ void (*sym_QRcode_free)(QRcode *qrcode);
+ _cleanup_(dlclosep) void *dl = NULL;
+ QRcode* qr;
+ int r;
+
+ /* If this is not an UTF-8 system or ANSI colors aren't supported/disabled don't print any QR
+ * codes */
+ if (!is_locale_utf8() || !colors_enabled())
+ return -EOPNOTSUPP;
+
+ dl = dlopen("libqrencode.so.4", RTLD_LAZY);
+ if (!dl)
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "QRCODE support is not installed: %s", dlerror());
+
+ r = dlsym_many_and_warn(
+ dl,
+ LOG_DEBUG,
+ &sym_QRcode_encodeString, "QRcode_encodeString",
+ &sym_QRcode_free, "QRcode_free",
+ NULL);
+ if (r < 0)
+ return r;
+
+ qr = sym_QRcode_encodeString(string, 0, QR_ECLEVEL_L, QR_MODE_8, 0);
+ if (!qr)
+ return -ENOMEM;
+
+ if (header)
+ fprintf(out, "\n%s:\n\n", header);
+
+ write_qrcode(out, qr);
+
+ fputc('\n', out);
+
+ sym_QRcode_free(qr);
+ return 0;
+}
+#endif
diff --git a/src/shared/qrcode-util.h b/src/shared/qrcode-util.h
new file mode 100644
index 0000000..6fc45c9
--- /dev/null
+++ b/src/shared/qrcode-util.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+#include <stdio.h>
+#include <errno.h>
+
+#if HAVE_QRENCODE
+int print_qrcode(FILE *out, const char *header, const char *string);
+#else
+static inline int print_qrcode(FILE *out, const char *header, const char *string) {
+ return -EOPNOTSUPP;
+}
+#endif
diff --git a/src/shared/reboot-util.c b/src/shared/reboot-util.c
new file mode 100644
index 0000000..756f9d3
--- /dev/null
+++ b/src/shared/reboot-util.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "proc-cmdline.h"
+#include "raw-reboot.h"
+#include "reboot-util.h"
+#include "string-util.h"
+#include "umask-util.h"
+#include "virt.h"
+
+int update_reboot_parameter_and_warn(const char *parameter, bool keep) {
+ int r;
+
+ if (isempty(parameter)) {
+ if (keep)
+ return 0;
+
+ if (unlink("/run/systemd/reboot-param") < 0) {
+ if (errno == ENOENT)
+ return 0;
+
+ return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
+ }
+
+ return 0;
+ }
+
+ RUN_WITH_UMASK(0022) {
+ r = write_string_file("/run/systemd/reboot-param", parameter,
+ WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to write reboot parameter file: %m");
+ }
+
+ return 0;
+}
+
+int read_reboot_parameter(char **parameter) {
+ int r;
+
+ assert(parameter);
+
+ r = read_one_line_file("/run/systemd/reboot-param", parameter);
+ if (r < 0 && r != -ENOENT)
+ return log_debug_errno(r, "Failed to read /run/systemd/reboot-param: %m");
+
+ return 0;
+}
+
+int reboot_with_parameter(RebootFlags flags) {
+ int r;
+
+ /* Reboots the system with a parameter that is read from /run/systemd/reboot-param. Returns 0 if
+ * REBOOT_DRY_RUN was set and the actual reboot operation was hence skipped. If REBOOT_FALLBACK is
+ * set and the reboot with parameter doesn't work out a fallback to classic reboot() is attempted. If
+ * REBOOT_FALLBACK is not set, 0 is returned instead, which should be considered indication for the
+ * caller to fall back to reboot() on its own, or somehow else deal with this. If REBOOT_LOG is
+ * specified will log about what it is going to do, as well as all errors. */
+
+ if (detect_container() == 0) {
+ _cleanup_free_ char *parameter = NULL;
+
+ r = read_one_line_file("/run/systemd/reboot-param", &parameter);
+ if (r < 0 && r != -ENOENT)
+ log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, r,
+ "Failed to read reboot parameter file, ignoring: %m");
+
+ if (!isempty(parameter)) {
+ log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG,
+ "Rebooting with argument '%s'.", parameter);
+
+ if (flags & REBOOT_DRY_RUN)
+ return 0;
+
+ (void) raw_reboot(LINUX_REBOOT_CMD_RESTART2, parameter);
+
+ log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, errno,
+ "Failed to reboot with parameter, retrying without: %m");
+ }
+ }
+
+ if (!(flags & REBOOT_FALLBACK))
+ return 0;
+
+ log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG, "Rebooting.");
+
+ if (flags & REBOOT_DRY_RUN)
+ return 0;
+
+ (void) reboot(RB_AUTOBOOT);
+
+ return log_full_errno(flags & REBOOT_LOG ? LOG_ERR : LOG_DEBUG, errno, "Failed to reboot: %m");
+}
+
+int shall_restore_state(void) {
+ bool ret;
+ int r;
+
+ r = proc_cmdline_get_bool("systemd.restore_state", &ret);
+ if (r < 0)
+ return r;
+
+ return r > 0 ? ret : true;
+}
diff --git a/src/shared/reboot-util.h b/src/shared/reboot-util.h
new file mode 100644
index 0000000..bbca8b8
--- /dev/null
+++ b/src/shared/reboot-util.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+int update_reboot_parameter_and_warn(const char *parameter, bool keep);
+
+typedef enum RebootFlags {
+ REBOOT_LOG = 1 << 0, /* log about what we are going to do and all errors */
+ REBOOT_DRY_RUN = 1 << 1, /* return 0 right before actually doing the reboot */
+ REBOOT_FALLBACK = 1 << 2, /* fall back to plain reboot() if argument-based reboot doesn't work, isn't configured or doesn't apply otherwise */
+} RebootFlags;
+
+int read_reboot_parameter(char **parameter);
+int reboot_with_parameter(RebootFlags flags);
+
+int shall_restore_state(void);
diff --git a/src/shared/resize-fs.c b/src/shared/resize-fs.c
new file mode 100644
index 0000000..33cb78b
--- /dev/null
+++ b/src/shared/resize-fs.c
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <linux/btrfs.h>
+#include <linux/magic.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+
+#include "blockdev-util.h"
+#include "fs-util.h"
+#include "missing_fs.h"
+#include "missing_magic.h"
+#include "missing_xfs.h"
+#include "resize-fs.h"
+#include "stat-util.h"
+
+int resize_fs(int fd, uint64_t sz, uint64_t *ret_size) {
+ struct statfs sfs;
+
+ assert(fd >= 0);
+
+ /* Rounds down to next block size */
+
+ if (sz <= 0 || sz == UINT64_MAX)
+ return -ERANGE;
+
+ if (fstatfs(fd, &sfs) < 0)
+ return -errno;
+
+ if (is_fs_type(&sfs, EXT4_SUPER_MAGIC)) {
+ uint64_t u;
+
+ if (sz < EXT4_MINIMAL_SIZE)
+ return -ERANGE;
+
+ u = sz / sfs.f_bsize;
+
+ if (ioctl(fd, EXT4_IOC_RESIZE_FS, &u) < 0)
+ return -errno;
+
+ if (ret_size)
+ *ret_size = u * sfs.f_bsize;
+
+ } else if (is_fs_type(&sfs, BTRFS_SUPER_MAGIC)) {
+ struct btrfs_ioctl_vol_args args = {};
+
+ /* 256M is the minimize size enforced by the btrfs kernel code when resizing (which is
+ * strange btw, as mkfs.btrfs is fine creating file systems > 109M). It will return EINVAL in
+ * that case, let's catch this error beforehand though, and report a more explanatory
+ * error. */
+
+ if (sz < BTRFS_MINIMAL_SIZE)
+ return -ERANGE;
+
+ sz -= sz % sfs.f_bsize;
+
+ xsprintf(args.name, "%" PRIu64, sz);
+
+ if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
+ return -errno;
+
+ if (ret_size)
+ *ret_size = sz;
+
+ } else if (is_fs_type(&sfs, XFS_SB_MAGIC)) {
+ xfs_fsop_geom_t geo;
+ xfs_growfs_data_t d;
+
+ if (sz < XFS_MINIMAL_SIZE)
+ return -ERANGE;
+
+ if (ioctl(fd, XFS_IOC_FSGEOMETRY, &geo) < 0)
+ return -errno;
+
+ d = (xfs_growfs_data_t) {
+ .imaxpct = geo.imaxpct,
+ .newblocks = sz / geo.blocksize,
+ };
+
+ if (ioctl(fd, XFS_IOC_FSGROWFSDATA, &d) < 0)
+ return -errno;
+
+ if (ret_size)
+ *ret_size = d.newblocks * geo.blocksize;
+
+ } else
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+uint64_t minimal_size_by_fs_magic(statfs_f_type_t magic) {
+
+ switch (magic) {
+
+ case (statfs_f_type_t) EXT4_SUPER_MAGIC:
+ return EXT4_MINIMAL_SIZE;
+
+ case (statfs_f_type_t) XFS_SB_MAGIC:
+ return XFS_MINIMAL_SIZE;
+
+ case (statfs_f_type_t) BTRFS_SUPER_MAGIC:
+ return BTRFS_MINIMAL_SIZE;
+
+ default:
+ return UINT64_MAX;
+ }
+}
+
+uint64_t minimal_size_by_fs_name(const char *name) {
+
+ if (streq_ptr(name, "ext4"))
+ return EXT4_MINIMAL_SIZE;
+
+ if (streq_ptr(name, "xfs"))
+ return XFS_MINIMAL_SIZE;
+
+ if (streq_ptr(name, "btrfs"))
+ return BTRFS_MINIMAL_SIZE;
+
+ return UINT64_MAX;
+}
diff --git a/src/shared/resize-fs.h b/src/shared/resize-fs.h
new file mode 100644
index 0000000..8831fd8
--- /dev/null
+++ b/src/shared/resize-fs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+#include "stat-util.h"
+
+int resize_fs(int fd, uint64_t sz, uint64_t *ret_size);
+
+#define BTRFS_MINIMAL_SIZE (256U*1024U*1024U)
+#define XFS_MINIMAL_SIZE (14U*1024U*1024U)
+#define EXT4_MINIMAL_SIZE (1024U*1024U)
+
+uint64_t minimal_size_by_fs_magic(statfs_f_type_t magic);
+uint64_t minimal_size_by_fs_name(const char *str);
diff --git a/src/shared/resolve-util.c b/src/shared/resolve-util.c
new file mode 100644
index 0000000..1023b62
--- /dev/null
+++ b/src/shared/resolve-util.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "conf-parser.h"
+#include "resolve-util.h"
+#include "string-table.h"
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_resolve_support, resolve_support, ResolveSupport, "Failed to parse resolve support setting");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dnssec_mode, dnssec_mode, DnssecMode, "Failed to parse DNSSEC mode setting");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dns_over_tls_mode, dns_over_tls_mode, DnsOverTlsMode, "Failed to parse DNS-over-TLS mode setting");
+
+static const char* const resolve_support_table[_RESOLVE_SUPPORT_MAX] = {
+ [RESOLVE_SUPPORT_NO] = "no",
+ [RESOLVE_SUPPORT_YES] = "yes",
+ [RESOLVE_SUPPORT_RESOLVE] = "resolve",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(resolve_support, ResolveSupport, RESOLVE_SUPPORT_YES);
+
+static const char* const dnssec_mode_table[_DNSSEC_MODE_MAX] = {
+ [DNSSEC_NO] = "no",
+ [DNSSEC_ALLOW_DOWNGRADE] = "allow-downgrade",
+ [DNSSEC_YES] = "yes",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dnssec_mode, DnssecMode, DNSSEC_YES);
+
+static const char* const dns_over_tls_mode_table[_DNS_OVER_TLS_MODE_MAX] = {
+ [DNS_OVER_TLS_NO] = "no",
+ [DNS_OVER_TLS_OPPORTUNISTIC] = "opportunistic",
+ [DNS_OVER_TLS_YES] = "yes",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_over_tls_mode, DnsOverTlsMode, DNS_OVER_TLS_YES);
+
+bool dns_server_address_valid(int family, const union in_addr_union *sa) {
+
+ /* Refuses the 0 IP addresses as well as 127.0.0.53 (which is our own DNS stub) */
+
+ if (in_addr_is_null(family, sa))
+ return false;
+
+ if (family == AF_INET && sa->in.s_addr == htobe32(INADDR_DNS_STUB))
+ return false;
+
+ return true;
+}
+
+DEFINE_CONFIG_PARSE_ENUM(config_parse_dns_cache_mode, dns_cache_mode, DnsCacheMode, "Failed to parse DNS cache mode setting")
+
+static const char* const dns_cache_mode_table[_DNS_CACHE_MODE_MAX] = {
+ [DNS_CACHE_MODE_YES] = "yes",
+ [DNS_CACHE_MODE_NO] = "no",
+ [DNS_CACHE_MODE_NO_NEGATIVE] = "no-negative",
+};
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_cache_mode, DnsCacheMode, DNS_CACHE_MODE_YES);
diff --git a/src/shared/resolve-util.h b/src/shared/resolve-util.h
new file mode 100644
index 0000000..4ea24a6
--- /dev/null
+++ b/src/shared/resolve-util.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "conf-parser.h"
+#include "in-addr-util.h"
+#include "macro.h"
+
+/* 127.0.0.53 in native endian */
+#define INADDR_DNS_STUB ((in_addr_t) 0x7f000035U)
+
+typedef enum DnsCacheMode DnsCacheMode;
+
+enum DnsCacheMode {
+ DNS_CACHE_MODE_NO,
+ DNS_CACHE_MODE_YES,
+ DNS_CACHE_MODE_NO_NEGATIVE,
+ _DNS_CACHE_MODE_MAX,
+ _DNS_CACHE_MODE_INVALID = 1
+};
+
+typedef enum ResolveSupport ResolveSupport;
+typedef enum DnssecMode DnssecMode;
+typedef enum DnsOverTlsMode DnsOverTlsMode;
+
+enum ResolveSupport {
+ RESOLVE_SUPPORT_NO,
+ RESOLVE_SUPPORT_YES,
+ RESOLVE_SUPPORT_RESOLVE,
+ _RESOLVE_SUPPORT_MAX,
+ _RESOLVE_SUPPORT_INVALID = -1
+};
+
+enum DnssecMode {
+ /* No DNSSEC validation is done */
+ DNSSEC_NO,
+
+ /* Validate locally, if the server knows DO, but if not,
+ * don't. Don't trust the AD bit. If the server doesn't do
+ * DNSSEC properly, downgrade to non-DNSSEC operation. Of
+ * course, we then are vulnerable to a downgrade attack, but
+ * that's life and what is configured. */
+ DNSSEC_ALLOW_DOWNGRADE,
+
+ /* Insist on DNSSEC server support, and rather fail than downgrading. */
+ DNSSEC_YES,
+
+ _DNSSEC_MODE_MAX,
+ _DNSSEC_MODE_INVALID = -1
+};
+
+enum DnsOverTlsMode {
+ /* No connection is made for DNS-over-TLS */
+ DNS_OVER_TLS_NO,
+
+ /* Try to connect using DNS-over-TLS, but if connection fails,
+ * fall back to using an unencrypted connection */
+ DNS_OVER_TLS_OPPORTUNISTIC,
+
+ /* Enforce DNS-over-TLS and require valid server certificates */
+ DNS_OVER_TLS_YES,
+
+ _DNS_OVER_TLS_MODE_MAX,
+ _DNS_OVER_TLS_MODE_INVALID = -1
+};
+
+CONFIG_PARSER_PROTOTYPE(config_parse_resolve_support);
+CONFIG_PARSER_PROTOTYPE(config_parse_dnssec_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_dns_over_tls_mode);
+CONFIG_PARSER_PROTOTYPE(config_parse_dns_cache_mode);
+
+const char* resolve_support_to_string(ResolveSupport p) _const_;
+ResolveSupport resolve_support_from_string(const char *s) _pure_;
+
+const char* dnssec_mode_to_string(DnssecMode p) _const_;
+DnssecMode dnssec_mode_from_string(const char *s) _pure_;
+
+const char* dns_over_tls_mode_to_string(DnsOverTlsMode p) _const_;
+DnsOverTlsMode dns_over_tls_mode_from_string(const char *s) _pure_;
+
+bool dns_server_address_valid(int family, const union in_addr_union *sa);
+
+const char* dns_cache_mode_to_string(DnsCacheMode p) _const_;
+DnsCacheMode dns_cache_mode_from_string(const char *s) _pure_;
+
+/* A resolv.conf file containing the DNS server and domain data we learnt from uplink, i.e. the full uplink data */
+#define PRIVATE_UPLINK_RESOLV_CONF "/run/systemd/resolve/resolv.conf"
+
+/* A resolv.conf file containing the domain data we learnt from uplink, but our own DNS server address. */
+#define PRIVATE_STUB_RESOLV_CONF "/run/systemd/resolve/stub-resolv.conf"
+
+/* A static resolv.conf file containing no domains, but only our own DNS server address */
+#define PRIVATE_STATIC_RESOLV_CONF ROOTLIBEXECDIR "/resolv.conf"
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
new file mode 100644
index 0000000..ccae9d4
--- /dev/null
+++ b/src/shared/seccomp-util.c
@@ -0,0 +1,2140 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/seccomp.h>
+#include <seccomp.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+
+#include "af-list.h"
+#include "alloc-util.h"
+#include "env-util.h"
+#include "errno-list.h"
+#include "macro.h"
+#include "nsflags.h"
+#include "nulstr-util.h"
+#include "process-util.h"
+#include "seccomp-util.h"
+#include "set.h"
+#include "string-util.h"
+#include "strv.h"
+
+const uint32_t seccomp_local_archs[] = {
+
+ /* Note: always list the native arch we are compiled as last, so that users can deny-list seccomp(), but our own calls to it still succeed */
+
+#if defined(__x86_64__) && defined(__ILP32__)
+ SCMP_ARCH_X86,
+ SCMP_ARCH_X86_64,
+ SCMP_ARCH_X32, /* native */
+#elif defined(__x86_64__) && !defined(__ILP32__)
+ SCMP_ARCH_X86,
+ SCMP_ARCH_X32,
+ SCMP_ARCH_X86_64, /* native */
+#elif defined(__i386__)
+ SCMP_ARCH_X86,
+#elif defined(__aarch64__)
+ SCMP_ARCH_ARM,
+ SCMP_ARCH_AARCH64, /* native */
+#elif defined(__arm__)
+ SCMP_ARCH_ARM,
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64N32,
+ SCMP_ARCH_MIPS64N32, /* native */
+#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32
+ SCMP_ARCH_MIPS,
+ SCMP_ARCH_MIPSEL,
+ SCMP_ARCH_MIPS64,
+ SCMP_ARCH_MIPSEL64,
+ SCMP_ARCH_MIPS64N32,
+ SCMP_ARCH_MIPSEL64N32, /* native */
+#elif defined(__powerpc64__) && __BYTE_ORDER == __BIG_ENDIAN
+ SCMP_ARCH_PPC,
+ SCMP_ARCH_PPC64LE,
+ SCMP_ARCH_PPC64, /* native */
+#elif defined(__powerpc64__) && __BYTE_ORDER == __LITTLE_ENDIAN
+ SCMP_ARCH_PPC,
+ SCMP_ARCH_PPC64,
+ SCMP_ARCH_PPC64LE, /* native */
+#elif defined(__powerpc__)
+ SCMP_ARCH_PPC,
+#elif defined(__riscv) && __riscv_xlen == 64 && defined(SCMP_ARCH_RISCV64)
+ SCMP_ARCH_RISCV64,
+#elif defined(__s390x__)
+ SCMP_ARCH_S390,
+ SCMP_ARCH_S390X, /* native */
+#elif defined(__s390__)
+ SCMP_ARCH_S390,
+#endif
+ (uint32_t) -1
+ };
+
+const char* seccomp_arch_to_string(uint32_t c) {
+ /* Maintain order used in <seccomp.h>.
+ *
+ * Names used here should be the same as those used for ConditionArchitecture=,
+ * except for "subarchitectures" like x32. */
+
+ switch(c) {
+ case SCMP_ARCH_NATIVE:
+ return "native";
+ case SCMP_ARCH_X86:
+ return "x86";
+ case SCMP_ARCH_X86_64:
+ return "x86-64";
+ case SCMP_ARCH_X32:
+ return "x32";
+ case SCMP_ARCH_ARM:
+ return "arm";
+ case SCMP_ARCH_AARCH64:
+ return "arm64";
+ case SCMP_ARCH_MIPS:
+ return "mips";
+ case SCMP_ARCH_MIPS64:
+ return "mips64";
+ case SCMP_ARCH_MIPS64N32:
+ return "mips64-n32";
+ case SCMP_ARCH_MIPSEL:
+ return "mips-le";
+ case SCMP_ARCH_MIPSEL64:
+ return "mips64-le";
+ case SCMP_ARCH_MIPSEL64N32:
+ return "mips64-le-n32";
+ case SCMP_ARCH_PPC:
+ return "ppc";
+ case SCMP_ARCH_PPC64:
+ return "ppc64";
+ case SCMP_ARCH_PPC64LE:
+ return "ppc64-le";
+#ifdef SCMP_ARCH_RISCV64
+ case SCMP_ARCH_RISCV64:
+ return "riscv64";
+#endif
+ case SCMP_ARCH_S390:
+ return "s390";
+ case SCMP_ARCH_S390X:
+ return "s390x";
+ default:
+ return NULL;
+ }
+}
+
+int seccomp_arch_from_string(const char *n, uint32_t *ret) {
+ if (!n)
+ return -EINVAL;
+
+ assert(ret);
+
+ if (streq(n, "native"))
+ *ret = SCMP_ARCH_NATIVE;
+ else if (streq(n, "x86"))
+ *ret = SCMP_ARCH_X86;
+ else if (streq(n, "x86-64"))
+ *ret = SCMP_ARCH_X86_64;
+ else if (streq(n, "x32"))
+ *ret = SCMP_ARCH_X32;
+ else if (streq(n, "arm"))
+ *ret = SCMP_ARCH_ARM;
+ else if (streq(n, "arm64"))
+ *ret = SCMP_ARCH_AARCH64;
+ else if (streq(n, "mips"))
+ *ret = SCMP_ARCH_MIPS;
+ else if (streq(n, "mips64"))
+ *ret = SCMP_ARCH_MIPS64;
+ else if (streq(n, "mips64-n32"))
+ *ret = SCMP_ARCH_MIPS64N32;
+ else if (streq(n, "mips-le"))
+ *ret = SCMP_ARCH_MIPSEL;
+ else if (streq(n, "mips64-le"))
+ *ret = SCMP_ARCH_MIPSEL64;
+ else if (streq(n, "mips64-le-n32"))
+ *ret = SCMP_ARCH_MIPSEL64N32;
+ else if (streq(n, "ppc"))
+ *ret = SCMP_ARCH_PPC;
+ else if (streq(n, "ppc64"))
+ *ret = SCMP_ARCH_PPC64;
+ else if (streq(n, "ppc64-le"))
+ *ret = SCMP_ARCH_PPC64LE;
+#ifdef SCMP_ARCH_RISCV64
+ else if (streq(n, "riscv64"))
+ *ret = SCMP_ARCH_RISCV64;
+#endif
+ else if (streq(n, "s390"))
+ *ret = SCMP_ARCH_S390;
+ else if (streq(n, "s390x"))
+ *ret = SCMP_ARCH_S390X;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int r;
+
+ /* Much like seccomp_init(), but initializes the filter for one specific architecture only, without affecting
+ * any others. Also, turns off the NNP fiddling. */
+
+ seccomp = seccomp_init(default_action);
+ if (!seccomp)
+ return -ENOMEM;
+
+ if (arch != SCMP_ARCH_NATIVE &&
+ arch != seccomp_arch_native()) {
+
+ r = seccomp_arch_remove(seccomp, seccomp_arch_native());
+ if (r < 0)
+ return r;
+
+ r = seccomp_arch_add(seccomp, arch);
+ if (r < 0)
+ return r;
+
+ assert(seccomp_arch_exist(seccomp, arch) >= 0);
+ assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) == -EEXIST);
+ assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) == -EEXIST);
+ } else {
+ assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) >= 0);
+ assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) >= 0);
+ }
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_ACT_BADARCH, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ return r;
+
+#if SCMP_VER_MAJOR >= 3 || (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 4)
+ if (getenv_bool("SYSTEMD_LOG_SECCOMP") > 0) {
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_LOG, 1);
+ if (r < 0)
+ log_debug_errno(r, "Failed to enable seccomp event logging: %m");
+ }
+#endif
+
+ *ret = TAKE_PTR(seccomp);
+ return 0;
+}
+
+static bool is_basic_seccomp_available(void) {
+ return prctl(PR_GET_SECCOMP, 0, 0, 0, 0) >= 0;
+}
+
+static bool is_seccomp_filter_available(void) {
+ return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0) < 0 &&
+ errno == EFAULT;
+}
+
+bool is_seccomp_available(void) {
+ static int cached_enabled = -1;
+
+ if (cached_enabled < 0) {
+ int b;
+
+ b = getenv_bool_secure("SYSTEMD_SECCOMP");
+ if (b != 0) {
+ if (b < 0 && b != -ENXIO) /* ENXIO: env var unset */
+ log_debug_errno(b, "Failed to parse $SYSTEMD_SECCOMP value, ignoring.");
+
+ cached_enabled =
+ is_basic_seccomp_available() &&
+ is_seccomp_filter_available();
+ } else
+ cached_enabled = false;
+ }
+
+ return cached_enabled;
+}
+
+const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
+ [SYSCALL_FILTER_SET_DEFAULT] = {
+ .name = "@default",
+ .help = "System calls that are always permitted",
+ .value =
+ "brk\0"
+ "cacheflush\0"
+ "clock_getres\0"
+ "clock_getres_time64\0"
+ "clock_gettime\0"
+ "clock_gettime64\0"
+ "clock_nanosleep\0"
+ "clock_nanosleep_time64\0"
+ "execve\0"
+ "exit\0"
+ "exit_group\0"
+ "futex\0"
+ "futex_time64\0"
+ "get_robust_list\0"
+ "get_thread_area\0"
+ "getegid\0"
+ "getegid32\0"
+ "geteuid\0"
+ "geteuid32\0"
+ "getgid\0"
+ "getgid32\0"
+ "getgroups\0"
+ "getgroups32\0"
+ "getpgid\0"
+ "getpgrp\0"
+ "getpid\0"
+ "getppid\0"
+ "getresgid\0"
+ "getresgid32\0"
+ "getresuid\0"
+ "getresuid32\0"
+ "getrlimit\0" /* make sure processes can query stack size and such */
+ "getsid\0"
+ "gettid\0"
+ "gettimeofday\0"
+ "getuid\0"
+ "getuid32\0"
+ "membarrier\0"
+ "mmap\0"
+ "mmap2\0"
+ "munmap\0"
+ "nanosleep\0"
+ "pause\0"
+ "prlimit64\0"
+ "restart_syscall\0"
+ "rseq\0"
+ "rt_sigreturn\0"
+ "sched_yield\0"
+ "set_robust_list\0"
+ "set_thread_area\0"
+ "set_tid_address\0"
+ "set_tls\0"
+ "sigreturn\0"
+ "time\0"
+ "ugetrlimit\0"
+ },
+ [SYSCALL_FILTER_SET_AIO] = {
+ .name = "@aio",
+ .help = "Asynchronous IO",
+ .value =
+ "io_cancel\0"
+ "io_destroy\0"
+ "io_getevents\0"
+ "io_pgetevents\0"
+ "io_pgetevents_time64\0"
+ "io_setup\0"
+ "io_submit\0"
+ "io_uring_enter\0"
+ "io_uring_register\0"
+ "io_uring_setup\0"
+ },
+ [SYSCALL_FILTER_SET_BASIC_IO] = {
+ .name = "@basic-io",
+ .help = "Basic IO",
+ .value =
+ "_llseek\0"
+ "close\0"
+ "close_range\0"
+ "dup\0"
+ "dup2\0"
+ "dup3\0"
+ "lseek\0"
+ "pread64\0"
+ "preadv\0"
+ "preadv2\0"
+ "pwrite64\0"
+ "pwritev\0"
+ "pwritev2\0"
+ "read\0"
+ "readv\0"
+ "write\0"
+ "writev\0"
+ },
+ [SYSCALL_FILTER_SET_CHOWN] = {
+ .name = "@chown",
+ .help = "Change ownership of files and directories",
+ .value =
+ "chown\0"
+ "chown32\0"
+ "fchown\0"
+ "fchown32\0"
+ "fchownat\0"
+ "lchown\0"
+ "lchown32\0"
+ },
+ [SYSCALL_FILTER_SET_CLOCK] = {
+ .name = "@clock",
+ .help = "Change the system time",
+ .value =
+ "adjtimex\0"
+ "clock_adjtime\0"
+ "clock_adjtime64\0"
+ "clock_settime\0"
+ "clock_settime64\0"
+ "settimeofday\0"
+ },
+ [SYSCALL_FILTER_SET_CPU_EMULATION] = {
+ .name = "@cpu-emulation",
+ .help = "System calls for CPU emulation functionality",
+ .value =
+ "modify_ldt\0"
+ "subpage_prot\0"
+ "switch_endian\0"
+ "vm86\0"
+ "vm86old\0"
+ },
+ [SYSCALL_FILTER_SET_DEBUG] = {
+ .name = "@debug",
+ .help = "Debugging, performance monitoring and tracing functionality",
+ .value =
+ "lookup_dcookie\0"
+ "perf_event_open\0"
+ "pidfd_getfd\0"
+ "ptrace\0"
+ "rtas\0"
+#if defined __s390__ || defined __s390x__
+ "s390_runtime_instr\0"
+#endif
+ "sys_debug_setcontext\0"
+ },
+ [SYSCALL_FILTER_SET_FILE_SYSTEM] = {
+ .name = "@file-system",
+ .help = "File system operations",
+ .value =
+ "access\0"
+ "chdir\0"
+ "chmod\0"
+ "close\0"
+ "creat\0"
+ "faccessat\0"
+ "faccessat2\0"
+ "fallocate\0"
+ "fchdir\0"
+ "fchmod\0"
+ "fchmodat\0"
+ "fcntl\0"
+ "fcntl64\0"
+ "fgetxattr\0"
+ "flistxattr\0"
+ "fremovexattr\0"
+ "fsetxattr\0"
+ "fstat\0"
+ "fstat64\0"
+ "fstatat64\0"
+ "fstatfs\0"
+ "fstatfs64\0"
+ "ftruncate\0"
+ "ftruncate64\0"
+ "futimesat\0"
+ "getcwd\0"
+ "getdents\0"
+ "getdents64\0"
+ "getxattr\0"
+ "inotify_add_watch\0"
+ "inotify_init\0"
+ "inotify_init1\0"
+ "inotify_rm_watch\0"
+ "lgetxattr\0"
+ "link\0"
+ "linkat\0"
+ "listxattr\0"
+ "llistxattr\0"
+ "lremovexattr\0"
+ "lsetxattr\0"
+ "lstat\0"
+ "lstat64\0"
+ "mkdir\0"
+ "mkdirat\0"
+ "mknod\0"
+ "mknodat\0"
+ "newfstatat\0"
+ "oldfstat\0"
+ "oldlstat\0"
+ "oldstat\0"
+ "open\0"
+ "openat\0"
+ "openat2\0"
+ "readlink\0"
+ "readlinkat\0"
+ "removexattr\0"
+ "rename\0"
+ "renameat\0"
+ "renameat2\0"
+ "rmdir\0"
+ "setxattr\0"
+ "stat\0"
+ "stat64\0"
+ "statfs\0"
+ "statfs64\0"
+ "statx\0"
+ "symlink\0"
+ "symlinkat\0"
+ "truncate\0"
+ "truncate64\0"
+ "unlink\0"
+ "unlinkat\0"
+ "utime\0"
+ "utimensat\0"
+ "utimensat_time64\0"
+ "utimes\0"
+ },
+ [SYSCALL_FILTER_SET_IO_EVENT] = {
+ .name = "@io-event",
+ .help = "Event loop system calls",
+ .value =
+ "_newselect\0"
+ "epoll_create\0"
+ "epoll_create1\0"
+ "epoll_ctl\0"
+ "epoll_ctl_old\0"
+ "epoll_pwait\0"
+ "epoll_wait\0"
+ "epoll_wait_old\0"
+ "eventfd\0"
+ "eventfd2\0"
+ "poll\0"
+ "ppoll\0"
+ "ppoll_time64\0"
+ "pselect6\0"
+ "pselect6_time64\0"
+ "select\0"
+ },
+ [SYSCALL_FILTER_SET_IPC] = {
+ .name = "@ipc",
+ .help = "SysV IPC, POSIX Message Queues or other IPC",
+ .value =
+ "ipc\0"
+ "memfd_create\0"
+ "mq_getsetattr\0"
+ "mq_notify\0"
+ "mq_open\0"
+ "mq_timedreceive\0"
+ "mq_timedreceive_time64\0"
+ "mq_timedsend\0"
+ "mq_timedsend_time64\0"
+ "mq_unlink\0"
+ "msgctl\0"
+ "msgget\0"
+ "msgrcv\0"
+ "msgsnd\0"
+ "pipe\0"
+ "pipe2\0"
+ "process_vm_readv\0"
+ "process_vm_writev\0"
+ "semctl\0"
+ "semget\0"
+ "semop\0"
+ "semtimedop\0"
+ "semtimedop_time64\0"
+ "shmat\0"
+ "shmctl\0"
+ "shmdt\0"
+ "shmget\0"
+ },
+ [SYSCALL_FILTER_SET_KEYRING] = {
+ .name = "@keyring",
+ .help = "Kernel keyring access",
+ .value =
+ "add_key\0"
+ "keyctl\0"
+ "request_key\0"
+ },
+ [SYSCALL_FILTER_SET_MEMLOCK] = {
+ .name = "@memlock",
+ .help = "Memory locking control",
+ .value =
+ "mlock\0"
+ "mlock2\0"
+ "mlockall\0"
+ "munlock\0"
+ "munlockall\0"
+ },
+ [SYSCALL_FILTER_SET_MODULE] = {
+ .name = "@module",
+ .help = "Loading and unloading of kernel modules",
+ .value =
+ "delete_module\0"
+ "finit_module\0"
+ "init_module\0"
+ },
+ [SYSCALL_FILTER_SET_MOUNT] = {
+ .name = "@mount",
+ .help = "Mounting and unmounting of file systems",
+ .value =
+ "chroot\0"
+ "fsconfig\0"
+ "fsmount\0"
+ "fsopen\0"
+ "fspick\0"
+ "mount\0"
+ "move_mount\0"
+ "open_tree\0"
+ "pivot_root\0"
+ "umount\0"
+ "umount2\0"
+ },
+ [SYSCALL_FILTER_SET_NETWORK_IO] = {
+ .name = "@network-io",
+ .help = "Network or Unix socket IO, should not be needed if not network facing",
+ .value =
+ "accept\0"
+ "accept4\0"
+ "bind\0"
+ "connect\0"
+ "getpeername\0"
+ "getsockname\0"
+ "getsockopt\0"
+ "listen\0"
+ "recv\0"
+ "recvfrom\0"
+ "recvmmsg\0"
+ "recvmmsg_time64\0"
+ "recvmsg\0"
+ "send\0"
+ "sendmmsg\0"
+ "sendmsg\0"
+ "sendto\0"
+ "setsockopt\0"
+ "shutdown\0"
+ "socket\0"
+ "socketcall\0"
+ "socketpair\0"
+ },
+ [SYSCALL_FILTER_SET_OBSOLETE] = {
+ /* some unknown even to libseccomp */
+ .name = "@obsolete",
+ .help = "Unusual, obsolete or unimplemented system calls",
+ .value =
+ "_sysctl\0"
+ "afs_syscall\0"
+ "bdflush\0"
+ "break\0"
+ "create_module\0"
+ "ftime\0"
+ "get_kernel_syms\0"
+ "getpmsg\0"
+ "gtty\0"
+ "idle\0"
+ "lock\0"
+ "mpx\0"
+ "prof\0"
+ "profil\0"
+ "putpmsg\0"
+ "query_module\0"
+ "security\0"
+ "sgetmask\0"
+ "ssetmask\0"
+ "stime\0"
+ "stty\0"
+ "sysfs\0"
+ "tuxcall\0"
+ "ulimit\0"
+ "uselib\0"
+ "ustat\0"
+ "vserver\0"
+ },
+ [SYSCALL_FILTER_SET_PKEY] = {
+ .name = "@pkey",
+ .help = "System calls used for memory protection keys",
+ .value =
+ "pkey_alloc\0"
+ "pkey_free\0"
+ "pkey_mprotect\0"
+ },
+ [SYSCALL_FILTER_SET_PRIVILEGED] = {
+ .name = "@privileged",
+ .help = "All system calls which need super-user capabilities",
+ .value =
+ "@chown\0"
+ "@clock\0"
+ "@module\0"
+ "@raw-io\0"
+ "@reboot\0"
+ "@swap\0"
+ "_sysctl\0"
+ "acct\0"
+ "bpf\0"
+ "capset\0"
+ "chroot\0"
+ "fanotify_init\0"
+ "fanotify_mark\0"
+ "nfsservctl\0"
+ "open_by_handle_at\0"
+ "pivot_root\0"
+ "quotactl\0"
+ "setdomainname\0"
+ "setfsuid\0"
+ "setfsuid32\0"
+ "setgroups\0"
+ "setgroups32\0"
+ "sethostname\0"
+ "setresuid\0"
+ "setresuid32\0"
+ "setreuid\0"
+ "setreuid32\0"
+ "setuid\0" /* We list the explicit system calls here, as @setuid also includes setgid() which is not necessarily privileged */
+ "setuid32\0"
+ "vhangup\0"
+ },
+ [SYSCALL_FILTER_SET_PROCESS] = {
+ .name = "@process",
+ .help = "Process control, execution, namespacing operations",
+ .value =
+ "arch_prctl\0"
+ "capget\0" /* Able to query arbitrary processes */
+ "clone\0"
+ "clone3\0"
+ "execveat\0"
+ "fork\0"
+ "getrusage\0"
+ "kill\0"
+ "pidfd_open\0"
+ "pidfd_send_signal\0"
+ "prctl\0"
+ "rt_sigqueueinfo\0"
+ "rt_tgsigqueueinfo\0"
+ "setns\0"
+ "swapcontext\0" /* Some archs e.g. powerpc32 are using it to do userspace context switches */
+ "tgkill\0"
+ "times\0"
+ "tkill\0"
+ "unshare\0"
+ "vfork\0"
+ "wait4\0"
+ "waitid\0"
+ "waitpid\0"
+ },
+ [SYSCALL_FILTER_SET_RAW_IO] = {
+ .name = "@raw-io",
+ .help = "Raw I/O port access",
+ .value =
+ "ioperm\0"
+ "iopl\0"
+ "pciconfig_iobase\0"
+ "pciconfig_read\0"
+ "pciconfig_write\0"
+#if defined __s390__ || defined __s390x__
+ "s390_pci_mmio_read\0"
+ "s390_pci_mmio_write\0"
+#endif
+ },
+ [SYSCALL_FILTER_SET_REBOOT] = {
+ .name = "@reboot",
+ .help = "Reboot and reboot preparation/kexec",
+ .value =
+ "kexec_file_load\0"
+ "kexec_load\0"
+ "reboot\0"
+ },
+ [SYSCALL_FILTER_SET_RESOURCES] = {
+ .name = "@resources",
+ .help = "Alter resource settings",
+ .value =
+ "ioprio_set\0"
+ "mbind\0"
+ "migrate_pages\0"
+ "move_pages\0"
+ "nice\0"
+ "sched_setaffinity\0"
+ "sched_setattr\0"
+ "sched_setparam\0"
+ "sched_setscheduler\0"
+ "set_mempolicy\0"
+ "setpriority\0"
+ "setrlimit\0"
+ },
+ [SYSCALL_FILTER_SET_SETUID] = {
+ .name = "@setuid",
+ .help = "Operations for changing user/group credentials",
+ .value =
+ "setgid\0"
+ "setgid32\0"
+ "setgroups\0"
+ "setgroups32\0"
+ "setregid\0"
+ "setregid32\0"
+ "setresgid\0"
+ "setresgid32\0"
+ "setresuid\0"
+ "setresuid32\0"
+ "setreuid\0"
+ "setreuid32\0"
+ "setuid\0"
+ "setuid32\0"
+ },
+ [SYSCALL_FILTER_SET_SIGNAL] = {
+ .name = "@signal",
+ .help = "Process signal handling",
+ .value =
+ "rt_sigaction\0"
+ "rt_sigpending\0"
+ "rt_sigprocmask\0"
+ "rt_sigsuspend\0"
+ "rt_sigtimedwait\0"
+ "rt_sigtimedwait_time64\0"
+ "sigaction\0"
+ "sigaltstack\0"
+ "signal\0"
+ "signalfd\0"
+ "signalfd4\0"
+ "sigpending\0"
+ "sigprocmask\0"
+ "sigsuspend\0"
+ },
+ [SYSCALL_FILTER_SET_SWAP] = {
+ .name = "@swap",
+ .help = "Enable/disable swap devices",
+ .value =
+ "swapoff\0"
+ "swapon\0"
+ },
+ [SYSCALL_FILTER_SET_SYNC] = {
+ .name = "@sync",
+ .help = "Synchronize files and memory to storage",
+ .value =
+ "fdatasync\0"
+ "fsync\0"
+ "msync\0"
+ "sync\0"
+ "sync_file_range\0"
+ "sync_file_range2\0"
+ "syncfs\0"
+ },
+ [SYSCALL_FILTER_SET_SYSTEM_SERVICE] = {
+ .name = "@system-service",
+ .help = "General system service operations",
+ .value =
+ "@aio\0"
+ "@basic-io\0"
+ "@chown\0"
+ "@default\0"
+ "@file-system\0"
+ "@io-event\0"
+ "@ipc\0"
+ "@keyring\0"
+ "@memlock\0"
+ "@network-io\0"
+ "@process\0"
+ "@resources\0"
+ "@setuid\0"
+ "@signal\0"
+ "@sync\0"
+ "@timer\0"
+ "capget\0"
+ "capset\0"
+ "copy_file_range\0"
+ "fadvise64\0"
+ "fadvise64_64\0"
+ "flock\0"
+ "get_mempolicy\0"
+ "getcpu\0"
+ "getpriority\0"
+ "getrandom\0"
+ "ioctl\0"
+ "ioprio_get\0"
+ "kcmp\0"
+ "madvise\0"
+ "mprotect\0"
+ "mremap\0"
+ "name_to_handle_at\0"
+ "oldolduname\0"
+ "olduname\0"
+ "personality\0"
+ "readahead\0"
+ "readdir\0"
+ "remap_file_pages\0"
+ "sched_get_priority_max\0"
+ "sched_get_priority_min\0"
+ "sched_getaffinity\0"
+ "sched_getattr\0"
+ "sched_getparam\0"
+ "sched_getscheduler\0"
+ "sched_rr_get_interval\0"
+ "sched_rr_get_interval_time64\0"
+ "sched_yield\0"
+ "sendfile\0"
+ "sendfile64\0"
+ "setfsgid\0"
+ "setfsgid32\0"
+ "setfsuid\0"
+ "setfsuid32\0"
+ "setpgid\0"
+ "setsid\0"
+ "splice\0"
+ "sysinfo\0"
+ "tee\0"
+ "umask\0"
+ "uname\0"
+ "userfaultfd\0"
+ "vmsplice\0"
+ },
+ [SYSCALL_FILTER_SET_TIMER] = {
+ .name = "@timer",
+ .help = "Schedule operations by time",
+ .value =
+ "alarm\0"
+ "getitimer\0"
+ "setitimer\0"
+ "timer_create\0"
+ "timer_delete\0"
+ "timer_getoverrun\0"
+ "timer_gettime\0"
+ "timer_gettime64\0"
+ "timer_settime\0"
+ "timer_settime64\0"
+ "timerfd_create\0"
+ "timerfd_gettime\0"
+ "timerfd_gettime64\0"
+ "timerfd_settime\0"
+ "timerfd_settime64\0"
+ "times\0"
+ },
+ [SYSCALL_FILTER_SET_KNOWN] = {
+ .name = "@known",
+ .help = "All known syscalls declared in the kernel",
+ .value =
+#include "syscall-list.h"
+ },
+};
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name) {
+ if (isempty(name) || name[0] != '@')
+ return NULL;
+
+ for (unsigned i = 0; i < _SYSCALL_FILTER_SET_MAX; i++)
+ if (streq(syscall_filter_sets[i].name, name))
+ return syscall_filter_sets + i;
+
+ return NULL;
+}
+
+static int add_syscall_filter_set(
+ scmp_filter_ctx seccomp,
+ const SyscallFilterSet *set,
+ uint32_t action,
+ char **exclude,
+ bool log_missing,
+ char ***added);
+
+int seccomp_add_syscall_filter_item(
+ scmp_filter_ctx *seccomp,
+ const char *name,
+ uint32_t action,
+ char **exclude,
+ bool log_missing,
+ char ***added) {
+
+ assert(seccomp);
+ assert(name);
+
+ if (strv_contains(exclude, name))
+ return 0;
+
+ /* Any syscalls that are handled are added to the *added strv. The pointer
+ * must be either NULL or point to a valid pre-initialized possibly-empty strv. */
+
+ if (name[0] == '@') {
+ const SyscallFilterSet *other;
+
+ other = syscall_filter_set_find(name);
+ if (!other)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Filter set %s is not known!",
+ name);
+
+ return add_syscall_filter_set(seccomp, other, action, exclude, log_missing, added);
+
+ } else {
+ int id, r;
+
+ id = seccomp_syscall_resolve_name(name);
+ if (id == __NR_SCMP_ERROR) {
+ if (log_missing)
+ log_debug("System call %s is not known, ignoring.", name);
+ return 0;
+ }
+
+ r = seccomp_rule_add_exact(seccomp, action, id, 0);
+ if (r < 0) {
+ /* If the system call is not known on this architecture, then that's fine, let's ignore it */
+ bool ignore = r == -EDOM;
+
+ if (!ignore || log_missing)
+ log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m",
+ name, id, ignore ? ", ignoring" : "");
+ if (!ignore)
+ return r;
+ }
+
+ if (added) {
+ r = strv_extend(added, name);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+ }
+}
+
+static int add_syscall_filter_set(
+ scmp_filter_ctx seccomp,
+ const SyscallFilterSet *set,
+ uint32_t action,
+ char **exclude,
+ bool log_missing,
+ char ***added) {
+
+ const char *sys;
+ int r;
+
+ /* Any syscalls that are handled are added to the *added strv. It needs to be initialized. */
+
+ assert(seccomp);
+ assert(set);
+
+ NULSTR_FOREACH(sys, set->value) {
+ r = seccomp_add_syscall_filter_item(seccomp, sys, action, exclude, log_missing, added);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing) {
+ uint32_t arch;
+ int r;
+
+ assert(set);
+
+ /* The one-stop solution: allocate a seccomp object, add the specified filter to it, and apply it. Once for
+ * each local arch. */
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, default_action);
+ if (r < 0)
+ return r;
+
+ r = add_syscall_filter_set(seccomp, set, action, NULL, log_missing, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add filter set: %m");
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing) {
+ uint32_t arch;
+ int r;
+
+ /* Similar to seccomp_load_syscall_filter_set(), but takes a raw Set* of syscalls, instead of a
+ * SyscallFilterSet* table. */
+
+ if (hashmap_isempty(set) && default_action == SCMP_ACT_ALLOW)
+ return 0;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ void *syscall_id, *val;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, default_action);
+ if (r < 0)
+ return r;
+
+ HASHMAP_FOREACH_KEY(val, syscall_id, set) {
+ uint32_t a = action;
+ int id = PTR_TO_INT(syscall_id) - 1;
+ int error = PTR_TO_INT(val);
+
+ if (error == SECCOMP_ERROR_NUMBER_KILL)
+ a = scmp_act_kill_process();
+#ifdef SCMP_ACT_LOG
+ else if (action == SCMP_ACT_LOG)
+ a = SCMP_ACT_LOG;
+#endif
+ else if (action != SCMP_ACT_ALLOW && error >= 0)
+ a = SCMP_ACT_ERRNO(error);
+
+ r = seccomp_rule_add_exact(seccomp, a, id, 0);
+ if (r < 0) {
+ /* If the system call is not known on this architecture, then that's fine, let's ignore it */
+ _cleanup_free_ char *n = NULL;
+ bool ignore;
+
+ n = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, id);
+ ignore = r == -EDOM;
+ if (!ignore || log_missing)
+ log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m",
+ strna(n), id, ignore ? ", ignoring" : "");
+ if (!ignore)
+ return r;
+ }
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_parse_syscall_filter(
+ const char *name,
+ int errno_num,
+ Hashmap *filter,
+ SeccompParseFlags flags,
+ const char *unit,
+ const char *filename,
+ unsigned line) {
+
+ int r;
+
+ assert(name);
+ assert(filter);
+
+ if (name[0] == '@') {
+ const SyscallFilterSet *set;
+ const char *i;
+
+ set = syscall_filter_set_find(name);
+ if (!set) {
+ if (!(flags & SECCOMP_PARSE_PERMISSIVE))
+ return -EINVAL;
+
+ log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
+ "Unknown system call group, ignoring: %s", name);
+ return 0;
+ }
+
+ NULSTR_FOREACH(i, set->value) {
+ /* Call ourselves again, for the group to parse. Note that we downgrade logging here (i.e. take
+ * away the SECCOMP_PARSE_LOG flag) since any issues in the group table are our own problem,
+ * not a problem in user configuration data and we shouldn't pretend otherwise by complaining
+ * about them. */
+ r = seccomp_parse_syscall_filter(i, errno_num, filter, flags &~ SECCOMP_PARSE_LOG, unit, filename, line);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ int id;
+
+ id = seccomp_syscall_resolve_name(name);
+ if (id == __NR_SCMP_ERROR) {
+ if (!(flags & SECCOMP_PARSE_PERMISSIVE))
+ return -EINVAL;
+
+ log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
+ "Failed to parse system call, ignoring: %s", name);
+ return 0;
+ }
+
+ /* If we previously wanted to forbid a syscall and now
+ * we want to allow it, then remove it from the list. */
+ if (!(flags & SECCOMP_PARSE_INVERT) == !!(flags & SECCOMP_PARSE_ALLOW_LIST)) {
+ r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num));
+ if (r < 0)
+ switch (r) {
+ case -ENOMEM:
+ return flags & SECCOMP_PARSE_LOG ? log_oom() : -ENOMEM;
+ case -EEXIST:
+ assert_se(hashmap_update(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num)) == 0);
+ break;
+ default:
+ return r;
+ }
+ } else
+ (void) hashmap_remove(filter, INT_TO_PTR(id + 1));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_namespaces(unsigned long retain) {
+ uint32_t arch;
+ int r;
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *s = NULL;
+
+ (void) namespace_flags_to_string(retain, &s);
+ log_debug("Restricting namespace to: %s.", strna(s));
+ }
+
+ /* NOOP? */
+ if (FLAGS_SET(retain, NAMESPACE_FLAGS_ALL))
+ return 0;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ if ((retain & NAMESPACE_FLAGS_ALL) == 0)
+ /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall
+ * altogether. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 0);
+ else
+ /* Otherwise, block only the invocations with the appropriate flags in the loop below, but also the
+ * special invocation with a zero flags argument, right here. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 1,
+ SCMP_A1(SCMP_CMP_EQ, 0));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ for (unsigned i = 0; namespace_flag_map[i].name; i++) {
+ unsigned long f;
+
+ f = namespace_flag_map[i].flag;
+ if (FLAGS_SET(retain, f)) {
+ log_debug("Permitting %s.", namespace_flag_map[i].name);
+ continue;
+ }
+
+ log_debug("Blocking %s.", namespace_flag_map[i].name);
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(unshare),
+ 1,
+ SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add unshare() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+
+ /* On s390/s390x the first two parameters to clone are switched */
+ if (!IN_SET(arch, SCMP_ARCH_S390, SCMP_ARCH_S390X))
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
+ else
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(clone),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+
+ if ((retain & NAMESPACE_FLAGS_ALL) != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setns),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ break;
+ }
+ }
+ }
+ if (r < 0)
+ continue;
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install namespace restriction rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_protect_sysctl(void) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ if (IN_SET(arch,
+ SCMP_ARCH_AARCH64,
+#ifdef SCMP_ARCH_RISCV64
+ SCMP_ARCH_RISCV64,
+#endif
+ SCMP_ARCH_X32
+ ))
+ /* No _sysctl syscall */
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(_sysctl),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add _sysctl() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install sysctl protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_protect_syslog(void) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(syslog),
+ 0);
+
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add syslog() rule for architecture %s, skipping %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install syslog protection rules for architecture %s, skipping %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_address_families(Set *address_families, bool allow_list) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ bool supported;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ switch (arch) {
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ case SCMP_ARCH_ARM:
+ case SCMP_ARCH_AARCH64:
+ case SCMP_ARCH_MIPSEL64N32:
+ case SCMP_ARCH_MIPS64N32:
+ case SCMP_ARCH_MIPSEL64:
+ case SCMP_ARCH_MIPS64:
+#ifdef SCMP_ARCH_RISCV64
+ case SCMP_ARCH_RISCV64:
+#endif
+ /* These we know we support (i.e. are the ones that do not use socketcall()) */
+ supported = true;
+ break;
+
+ case SCMP_ARCH_S390:
+ case SCMP_ARCH_S390X:
+ case SCMP_ARCH_X86:
+ case SCMP_ARCH_MIPSEL:
+ case SCMP_ARCH_MIPS:
+ case SCMP_ARCH_PPC:
+ case SCMP_ARCH_PPC64:
+ case SCMP_ARCH_PPC64LE:
+ default:
+ /* These we either know we don't support (i.e. are the ones that do use socketcall()), or we
+ * don't know */
+ supported = false;
+ break;
+ }
+
+ if (!supported)
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ if (allow_list) {
+ int first = 0, last = 0;
+ void *afp;
+
+ /* If this is an allow list, we first block the address families that are out of
+ * range and then everything that is not in the set. First, we find the lowest and
+ * highest address family in the set. */
+
+ SET_FOREACH(afp, address_families) {
+ int af = PTR_TO_INT(afp);
+
+ if (af <= 0 || af >= af_max())
+ continue;
+
+ if (first == 0 || af < first)
+ first = af;
+
+ if (last == 0 || af > last)
+ last = af;
+ }
+
+ assert((first == 0) == (last == 0));
+
+ if (first == 0) {
+
+ /* No entries in the valid range, block everything */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ } else {
+
+ /* Block everything below the first entry */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_LT, first));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ /* Block everything above the last entry */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_GT, last));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ /* Block everything between the first and last entry */
+ for (int af = 1; af < af_max(); af++) {
+
+ if (set_contains(address_families, INT_TO_PTR(af)))
+ continue;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_EQ, af));
+ if (r < 0)
+ break;
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ } else {
+ void *af;
+
+ /* If this is a deny list, then generate one rule for each address family that are
+ * then combined in OR checks. */
+
+ SET_FOREACH(af, address_families) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EAFNOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
+ if (r < 0)
+ break;
+ }
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install socket family rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_restrict_realtime(void) {
+ static const int permitted_policies[] = {
+ SCHED_OTHER,
+ SCHED_BATCH,
+ SCHED_IDLE,
+ };
+
+ int r, max_policy = 0;
+ uint32_t arch;
+ unsigned i;
+
+ /* Determine the highest policy constant we want to allow */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] > max_policy)
+ max_policy = permitted_policies[i];
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int p;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ /* Go through all policies with lower values than that, and block them -- unless they appear in the
+ * allow list. */
+ for (p = 0; p < max_policy; p++) {
+ bool good = false;
+
+ /* Check if this is in the allow list. */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] == p) {
+ good = true;
+ break;
+ }
+
+ if (good)
+ continue;
+
+ /* Deny this policy */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_EQ, p));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ /* Deny-list all other policies, i.e. the ones with higher values. Note that all comparisons
+ * are unsigned here, hence no need no check for < 0 values. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_GT, max_policy));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install realtime protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+static int add_seccomp_syscall_filter(scmp_filter_ctx seccomp,
+ uint32_t arch,
+ int nr,
+ unsigned arg_cnt,
+ const struct scmp_arg_cmp arg) {
+ int r;
+
+ r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM), nr, arg_cnt, arg);
+ if (r < 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = seccomp_syscall_resolve_num_arch(arch, nr);
+ log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
+ strna(n),
+ seccomp_arch_to_string(arch));
+ }
+
+ return r;
+}
+
+/* For known architectures, check that syscalls are indeed defined or not. */
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || (defined(__riscv) && __riscv_xlen == 64)
+assert_cc(SCMP_SYS(shmget) > 0);
+assert_cc(SCMP_SYS(shmat) > 0);
+assert_cc(SCMP_SYS(shmdt) > 0);
+#endif
+
+int seccomp_memory_deny_write_execute(void) {
+ uint32_t arch;
+ unsigned loaded = 0;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0, r;
+
+ log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+
+ switch (arch) {
+
+ /* Note that on some architectures shmat() isn't available, and the call is multiplexed through ipc().
+ * We ignore that here, which means there's still a way to get writable/executable
+ * memory, if an IPC key is mapped like this. That's a pity, but no total loss. */
+
+ case SCMP_ARCH_X86:
+ case SCMP_ARCH_S390:
+ filter_syscall = SCMP_SYS(mmap2);
+ block_syscall = SCMP_SYS(mmap);
+ /* shmat multiplexed, see above */
+ break;
+
+ case SCMP_ARCH_PPC:
+ case SCMP_ARCH_PPC64:
+ case SCMP_ARCH_PPC64LE:
+ case SCMP_ARCH_S390X:
+ filter_syscall = SCMP_SYS(mmap);
+ /* shmat multiplexed, see above */
+ break;
+
+ case SCMP_ARCH_ARM:
+ filter_syscall = SCMP_SYS(mmap2); /* arm has only mmap2 */
+ shmat_syscall = SCMP_SYS(shmat);
+ break;
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ case SCMP_ARCH_AARCH64:
+#ifdef SCMP_ARCH_RISCV64
+ case SCMP_ARCH_RISCV64:
+#endif
+ filter_syscall = SCMP_SYS(mmap); /* amd64, x32, arm64 and riscv64 have only mmap */
+ shmat_syscall = SCMP_SYS(shmat);
+ break;
+
+ /* Please add more definitions here, if you port systemd to other architectures! */
+
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__powerpc__) && !defined(__powerpc64__) && !defined(__arm__) && !defined(__aarch64__) && !defined(__s390__) && !defined(__s390x__) && !(defined(__riscv) && __riscv_xlen == 64)
+#warning "Consider adding the right mmap() syscall definitions here!"
+#endif
+ }
+
+ /* Can't filter mmap() on this arch, then skip it */
+ if (filter_syscall == 0)
+ continue;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = add_seccomp_syscall_filter(seccomp, arch, filter_syscall,
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
+ if (r < 0)
+ continue;
+
+ if (block_syscall != 0) {
+ r = add_seccomp_syscall_filter(seccomp, arch, block_syscall, 0, (const struct scmp_arg_cmp){} );
+ if (r < 0)
+ continue;
+ }
+
+ r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(mprotect),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
+ if (r < 0)
+ continue;
+
+#ifdef __NR_pkey_mprotect
+ r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(pkey_mprotect),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
+ if (r < 0)
+ continue;
+#endif
+
+ if (shmat_syscall > 0) {
+ r = add_seccomp_syscall_filter(seccomp, arch, shmat_syscall,
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
+ if (r < 0)
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to install MemoryDenyWriteExecute= rule for architecture %s, skipping: %m",
+ seccomp_arch_to_string(arch));
+ loaded++;
+ }
+
+ if (loaded == 0)
+ log_debug("Failed to install any seccomp rules for MemoryDenyWriteExecute=.");
+
+ return loaded;
+}
+
+int seccomp_restrict_archs(Set *archs) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ void *id;
+ int r;
+
+ /* This installs a filter with no rules, but that restricts the system call architectures to the specified
+ * list.
+ *
+ * There are some qualifications. However the most important use is to stop processes from bypassing
+ * system call restrictions, in case they used a broader (multiplexing) syscall which is only available
+ * in a non-native architecture. There are no holes in this use case, at least so far. */
+
+ /* Note libseccomp includes our "native" (current) architecture in the filter by default.
+ * We do not remove it. For example, our callers expect to be able to call execve() afterwards
+ * to run a program with the restrictions applied. */
+ seccomp = seccomp_init(SCMP_ACT_ALLOW);
+ if (!seccomp)
+ return -ENOMEM;
+
+ SET_FOREACH(id, archs) {
+ r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ /* The vdso for x32 assumes that x86-64 syscalls are available. Let's allow them, since x32
+ * x32 syscalls should basically match x86-64 for everything except the pointer type.
+ * The important thing is that you can block the old 32-bit x86 syscalls.
+ * https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=850047 */
+
+ if (seccomp_arch_native() == SCMP_ARCH_X32 ||
+ set_contains(archs, UINT32_TO_PTR(SCMP_ARCH_X32 + 1))) {
+
+ r = seccomp_arch_add(seccomp, SCMP_ARCH_X86_64);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ return r;
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to restrict system call architectures, skipping: %m");
+
+ return 0;
+}
+
+int parse_syscall_archs(char **l, Set **ret_archs) {
+ _cleanup_set_free_ Set *archs = NULL;
+ char **s;
+ int r;
+
+ assert(l);
+ assert(ret_archs);
+
+ STRV_FOREACH(s, l) {
+ uint32_t a;
+
+ r = seccomp_arch_from_string(*s, &a);
+ if (r < 0)
+ return -EINVAL;
+
+ r = set_ensure_put(&archs, NULL, UINT32_TO_PTR(a + 1));
+ if (r < 0)
+ return -ENOMEM;
+ }
+
+ *ret_archs = TAKE_PTR(archs);
+ return 0;
+}
+
+int seccomp_filter_set_add(Hashmap *filter, bool add, const SyscallFilterSet *set) {
+ const char *i;
+ int r;
+
+ assert(set);
+
+ NULSTR_FOREACH(i, set->value) {
+
+ if (i[0] == '@') {
+ const SyscallFilterSet *more;
+
+ more = syscall_filter_set_find(i);
+ if (!more)
+ return -ENXIO;
+
+ r = seccomp_filter_set_add(filter, add, more);
+ if (r < 0)
+ return r;
+ } else {
+ int id;
+
+ id = seccomp_syscall_resolve_name(i);
+ if (id == __NR_SCMP_ERROR) {
+ log_debug("Couldn't resolve system call, ignoring: %s", i);
+ continue;
+ }
+
+ if (add) {
+ r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(-1));
+ if (r < 0)
+ return r;
+ } else
+ (void) hashmap_remove(filter, INT_TO_PTR(id + 1));
+ }
+ }
+
+ return 0;
+}
+
+int seccomp_lock_personality(unsigned long personality) {
+ uint32_t arch;
+ int r;
+
+ if (personality >= PERSONALITY_INVALID)
+ return -EINVAL;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(personality),
+ 1,
+ SCMP_A0(SCMP_CMP_NE, personality));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to enable personality lock for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+int seccomp_protect_hostname(void) {
+ uint32_t arch;
+ int r;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sethostname),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add sethostname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(setdomainname),
+ 0);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add setdomainname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to apply hostname restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+static int seccomp_restrict_sxid(scmp_filter_ctx seccomp, mode_t m) {
+ /* Checks the mode_t parameter of the following system calls:
+ *
+ * → chmod() + fchmod() + fchmodat()
+ * → open() + creat() + openat()
+ * → mkdir() + mkdirat()
+ * → mknod() + mknodat()
+ *
+ * Returns error if *everything* failed, and 0 otherwise.
+ */
+ int r;
+ bool any = false;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(chmod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for chmod: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(fchmod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for fchmod: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(fchmodat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for fchmodat: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mkdir),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mkdir: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mkdirat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mkdirat: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mknod),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mknod: %m");
+ else
+ any = true;
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mknodat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for mknodat: %m");
+ else
+ any = true;
+
+#if SCMP_SYS(open) > 0
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(open),
+ 2,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
+ SCMP_A2(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for open: %m");
+ else
+ any = true;
+#endif
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(openat),
+ 2,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT),
+ SCMP_A3(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for openat: %m");
+ else
+ any = true;
+
+#if defined(__SNR_openat2)
+ /* The new openat2() system call can't be filtered sensibly, since it moves the flags parameter into
+ * an indirect structure. Let's block it entirely for now. That should be a reasonably OK thing to do
+ * for now, since openat2() is very new and code generally needs fallback logic anyway to be
+ * compatible with kernels that are not absolutely recent. */
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(openat2),
+ 0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for openat2: %m");
+ else
+ any = true;
+#endif
+
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(creat),
+ 1,
+ SCMP_A1(SCMP_CMP_MASKED_EQ, m, m));
+ if (r < 0)
+ log_debug_errno(r, "Failed to add filter for creat: %m");
+ else
+ any = true;
+
+ return any ? 0 : r;
+}
+
+int seccomp_restrict_suid_sgid(void) {
+ uint32_t arch;
+ int r, k;
+
+ SECCOMP_FOREACH_LOCAL_ARCH(arch) {
+ _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+
+ r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
+ if (r < 0)
+ return r;
+
+ r = seccomp_restrict_sxid(seccomp, S_ISUID);
+ if (r < 0)
+ log_debug_errno(r, "Failed to add suid rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch));
+
+ k = seccomp_restrict_sxid(seccomp, S_ISGID);
+ if (k < 0)
+ log_debug_errno(r, "Failed to add sgid rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch));
+
+ if (r < 0 && k < 0)
+ continue;
+
+ r = seccomp_load(seccomp);
+ if (ERRNO_IS_SECCOMP_FATAL(r))
+ return r;
+ if (r < 0)
+ log_debug_errno(r, "Failed to apply suid/sgid restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ }
+
+ return 0;
+}
+
+uint32_t scmp_act_kill_process(void) {
+
+ /* Returns SCMP_ACT_KILL_PROCESS if it's supported, and SCMP_ACT_KILL_THREAD otherwise. We never
+ * actually want to use SCMP_ACT_KILL_THREAD as its semantics are nuts (killing arbitrary threads of
+ * a program is just a bad idea), but on old kernels/old libseccomp it is all we have, and at least
+ * for single-threaded apps does the right thing. */
+
+#ifdef SCMP_ACT_KILL_PROCESS
+ if (seccomp_api_get() >= 3)
+ return SCMP_ACT_KILL_PROCESS;
+#endif
+
+ return SCMP_ACT_KILL; /* same as SCMP_ACT_KILL_THREAD */
+}
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
new file mode 100644
index 0000000..6105971
--- /dev/null
+++ b/src/shared/seccomp-util.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <seccomp.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "errno-list.h"
+#include "parse-util.h"
+#include "set.h"
+#include "string-util.h"
+
+const char* seccomp_arch_to_string(uint32_t c);
+int seccomp_arch_from_string(const char *n, uint32_t *ret);
+
+int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action);
+
+bool is_seccomp_available(void);
+
+typedef struct SyscallFilterSet {
+ const char *name;
+ const char *help;
+ const char *value;
+} SyscallFilterSet;
+
+enum {
+ /* Please leave DEFAULT first and KNOWN last, but sort the rest alphabetically */
+ SYSCALL_FILTER_SET_DEFAULT,
+ SYSCALL_FILTER_SET_AIO,
+ SYSCALL_FILTER_SET_BASIC_IO,
+ SYSCALL_FILTER_SET_CHOWN,
+ SYSCALL_FILTER_SET_CLOCK,
+ SYSCALL_FILTER_SET_CPU_EMULATION,
+ SYSCALL_FILTER_SET_DEBUG,
+ SYSCALL_FILTER_SET_FILE_SYSTEM,
+ SYSCALL_FILTER_SET_IO_EVENT,
+ SYSCALL_FILTER_SET_IPC,
+ SYSCALL_FILTER_SET_KEYRING,
+ SYSCALL_FILTER_SET_MEMLOCK,
+ SYSCALL_FILTER_SET_MODULE,
+ SYSCALL_FILTER_SET_MOUNT,
+ SYSCALL_FILTER_SET_NETWORK_IO,
+ SYSCALL_FILTER_SET_OBSOLETE,
+ SYSCALL_FILTER_SET_PKEY,
+ SYSCALL_FILTER_SET_PRIVILEGED,
+ SYSCALL_FILTER_SET_PROCESS,
+ SYSCALL_FILTER_SET_RAW_IO,
+ SYSCALL_FILTER_SET_REBOOT,
+ SYSCALL_FILTER_SET_RESOURCES,
+ SYSCALL_FILTER_SET_SETUID,
+ SYSCALL_FILTER_SET_SIGNAL,
+ SYSCALL_FILTER_SET_SWAP,
+ SYSCALL_FILTER_SET_SYNC,
+ SYSCALL_FILTER_SET_SYSTEM_SERVICE,
+ SYSCALL_FILTER_SET_TIMER,
+ SYSCALL_FILTER_SET_KNOWN,
+ _SYSCALL_FILTER_SET_MAX
+};
+
+extern const SyscallFilterSet syscall_filter_sets[];
+
+const SyscallFilterSet *syscall_filter_set_find(const char *name);
+
+int seccomp_filter_set_add(Hashmap *s, bool b, const SyscallFilterSet *set);
+
+int seccomp_add_syscall_filter_item(
+ scmp_filter_ctx *ctx,
+ const char *name,
+ uint32_t action,
+ char **exclude,
+ bool log_missing,
+ char ***added);
+
+int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing);
+int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing);
+
+typedef enum SeccompParseFlags {
+ SECCOMP_PARSE_INVERT = 1 << 0,
+ SECCOMP_PARSE_ALLOW_LIST = 1 << 1,
+ SECCOMP_PARSE_LOG = 1 << 2,
+ SECCOMP_PARSE_PERMISSIVE = 1 << 3,
+} SeccompParseFlags;
+
+int seccomp_parse_syscall_filter(
+ const char *name,
+ int errno_num,
+ Hashmap *filter,
+ SeccompParseFlags flags,
+ const char *unit,
+ const char *filename, unsigned line);
+
+int seccomp_restrict_archs(Set *archs);
+int seccomp_restrict_namespaces(unsigned long retain);
+int seccomp_protect_sysctl(void);
+int seccomp_protect_syslog(void);
+int seccomp_restrict_address_families(Set *address_families, bool allow_list);
+int seccomp_restrict_realtime(void);
+int seccomp_memory_deny_write_execute(void);
+int seccomp_lock_personality(unsigned long personality);
+int seccomp_protect_hostname(void);
+int seccomp_restrict_suid_sgid(void);
+
+extern const uint32_t seccomp_local_archs[];
+
+#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
+ for (unsigned _i = ({ (arch) = seccomp_local_archs[0]; 0; }); \
+ seccomp_local_archs[_i] != (uint32_t) -1; \
+ (arch) = seccomp_local_archs[++_i])
+
+/* EACCES: does not have the CAP_SYS_ADMIN or no_new_privs == 1
+ * ENOMEM: out of memory, failed to allocate space for a libseccomp structure, or would exceed a defined constant
+ * EFAULT: addresses passed as args (by libseccomp) are invalid */
+#define ERRNO_IS_SECCOMP_FATAL(r) \
+ IN_SET(abs(r), EPERM, EACCES, ENOMEM, EFAULT)
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(scmp_filter_ctx, seccomp_release);
+
+int parse_syscall_archs(char **l, Set **ret_archs);
+
+uint32_t scmp_act_kill_process(void);
+
+/* This is a special value to be used where syscall filters otherwise expect errno numbers, will be
+ replaced with real seccomp action. */
+enum {
+ SECCOMP_ERROR_NUMBER_KILL = INT_MAX - 1,
+};
+
+static inline bool seccomp_errno_or_action_is_valid(int n) {
+ return n == SECCOMP_ERROR_NUMBER_KILL || errno_is_valid(n);
+}
+
+static inline int seccomp_parse_errno_or_action(const char *p) {
+ if (streq_ptr(p, "kill"))
+ return SECCOMP_ERROR_NUMBER_KILL;
+ return parse_errno(p);
+}
+
+static inline const char *seccomp_errno_or_action_to_string(int num) {
+ if (num == SECCOMP_ERROR_NUMBER_KILL)
+ return "kill";
+ return errno_to_name(num);
+}
diff --git a/src/shared/securebits-util.c b/src/shared/securebits-util.c
new file mode 100644
index 0000000..c867807
--- /dev/null
+++ b/src/shared/securebits-util.c
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "extract-word.h"
+#include "securebits-util.h"
+#include "string-util.h"
+
+int secure_bits_to_string_alloc(int i, char **s) {
+ _cleanup_free_ char *str = NULL;
+ size_t len;
+ int r;
+
+ assert(s);
+
+ r = asprintf(&str, "%s%s%s%s%s%s",
+ (i & (1 << SECURE_KEEP_CAPS)) ? "keep-caps " : "",
+ (i & (1 << SECURE_KEEP_CAPS_LOCKED)) ? "keep-caps-locked " : "",
+ (i & (1 << SECURE_NO_SETUID_FIXUP)) ? "no-setuid-fixup " : "",
+ (i & (1 << SECURE_NO_SETUID_FIXUP_LOCKED)) ? "no-setuid-fixup-locked " : "",
+ (i & (1 << SECURE_NOROOT)) ? "noroot " : "",
+ (i & (1 << SECURE_NOROOT_LOCKED)) ? "noroot-locked " : "");
+ if (r < 0)
+ return -ENOMEM;
+
+ len = strlen(str);
+ if (len != 0)
+ str[len - 1] = '\0';
+
+ *s = TAKE_PTR(str);
+
+ return 0;
+}
+
+int secure_bits_from_string(const char *s) {
+ int secure_bits = 0;
+ const char *p;
+ int r;
+
+ for (p = s;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+ if (r == -ENOMEM)
+ return r;
+ if (r <= 0)
+ break;
+
+ if (streq(word, "keep-caps"))
+ secure_bits |= 1 << SECURE_KEEP_CAPS;
+ else if (streq(word, "keep-caps-locked"))
+ secure_bits |= 1 << SECURE_KEEP_CAPS_LOCKED;
+ else if (streq(word, "no-setuid-fixup"))
+ secure_bits |= 1 << SECURE_NO_SETUID_FIXUP;
+ else if (streq(word, "no-setuid-fixup-locked"))
+ secure_bits |= 1 << SECURE_NO_SETUID_FIXUP_LOCKED;
+ else if (streq(word, "noroot"))
+ secure_bits |= 1 << SECURE_NOROOT;
+ else if (streq(word, "noroot-locked"))
+ secure_bits |= 1 << SECURE_NOROOT_LOCKED;
+ }
+
+ return secure_bits;
+}
diff --git a/src/shared/securebits-util.h b/src/shared/securebits-util.h
new file mode 100644
index 0000000..f2e65cf
--- /dev/null
+++ b/src/shared/securebits-util.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "missing_securebits.h"
+
+int secure_bits_to_string_alloc(int i, char **s);
+int secure_bits_from_string(const char *s);
+
+static inline bool secure_bits_is_valid(int i) {
+ return ((SECURE_ALL_BITS | SECURE_ALL_LOCKS) & i) == i;
+}
+
+static inline int secure_bits_to_string_alloc_with_check(int n, char **s) {
+ if (!secure_bits_is_valid(n))
+ return -EINVAL;
+
+ return secure_bits_to_string_alloc(n, s);
+}
diff --git a/src/shared/serialize.c b/src/shared/serialize.c
new file mode 100644
index 0000000..45f57d6
--- /dev/null
+++ b/src/shared/serialize.c
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fcntl.h>
+
+#include "alloc-util.h"
+#include "env-util.h"
+#include "escape.h"
+#include "fileio.h"
+#include "missing_mman.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "serialize.h"
+#include "strv.h"
+#include "tmpfile-util.h"
+
+int serialize_item(FILE *f, const char *key, const char *value) {
+ assert(f);
+ assert(key);
+
+ if (!value)
+ return 0;
+
+ /* Make sure that anything we serialize we can also read back again with read_line() with a maximum line size
+ * of LONG_LINE_MAX. This is a safety net only. All code calling us should filter this out earlier anyway. */
+ if (strlen(key) + 1 + strlen(value) + 1 > LONG_LINE_MAX) {
+ log_warning("Attempted to serialize overly long item '%s', refusing.", key);
+ return -EINVAL;
+ }
+
+ fputs(key, f);
+ fputc('=', f);
+ fputs(value, f);
+ fputc('\n', f);
+
+ return 1;
+}
+
+int serialize_item_escaped(FILE *f, const char *key, const char *value) {
+ _cleanup_free_ char *c = NULL;
+
+ assert(f);
+ assert(key);
+
+ if (!value)
+ return 0;
+
+ c = cescape(value);
+ if (!c)
+ return log_oom();
+
+ return serialize_item(f, key, c);
+}
+
+int serialize_item_format(FILE *f, const char *key, const char *format, ...) {
+ char buf[LONG_LINE_MAX];
+ va_list ap;
+ int k;
+
+ assert(f);
+ assert(key);
+ assert(format);
+
+ va_start(ap, format);
+ k = vsnprintf(buf, sizeof(buf), format, ap);
+ va_end(ap);
+
+ if (k < 0 || (size_t) k >= sizeof(buf) || strlen(key) + 1 + k + 1 > LONG_LINE_MAX) {
+ log_warning("Attempted to serialize overly long item '%s', refusing.", key);
+ return -EINVAL;
+ }
+
+ fputs(key, f);
+ fputc('=', f);
+ fputs(buf, f);
+ fputc('\n', f);
+
+ return 1;
+}
+
+int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd) {
+ int copy;
+
+ assert(f);
+ assert(key);
+
+ if (fd < 0)
+ return 0;
+
+ copy = fdset_put_dup(fds, fd);
+ if (copy < 0)
+ return log_error_errno(copy, "Failed to add file descriptor to serialization set: %m");
+
+ return serialize_item_format(f, key, "%i", copy);
+}
+
+int serialize_usec(FILE *f, const char *key, usec_t usec) {
+ assert(f);
+ assert(key);
+
+ if (usec == USEC_INFINITY)
+ return 0;
+
+ return serialize_item_format(f, key, USEC_FMT, usec);
+}
+
+int serialize_dual_timestamp(FILE *f, const char *name, const dual_timestamp *t) {
+ assert(f);
+ assert(name);
+ assert(t);
+
+ if (!dual_timestamp_is_set(t))
+ return 0;
+
+ return serialize_item_format(f, name, USEC_FMT " " USEC_FMT, t->realtime, t->monotonic);
+}
+
+int serialize_strv(FILE *f, const char *key, char **l) {
+ int ret = 0, r;
+ char **i;
+
+ /* Returns the first error, or positive if anything was serialized, 0 otherwise. */
+
+ STRV_FOREACH(i, l) {
+ r = serialize_item_escaped(f, key, *i);
+ if ((ret >= 0 && r < 0) ||
+ (ret == 0 && r > 0))
+ ret = r;
+ }
+
+ return ret;
+}
+
+int deserialize_usec(const char *value, usec_t *ret) {
+ int r;
+
+ assert(value);
+
+ r = safe_atou64(value, ret);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse usec value \"%s\": %m", value);
+
+ return 0;
+}
+
+int deserialize_dual_timestamp(const char *value, dual_timestamp *t) {
+ uint64_t a, b;
+ int r, pos;
+
+ assert(value);
+ assert(t);
+
+ pos = strspn(value, WHITESPACE);
+ if (value[pos] == '-')
+ return -EINVAL;
+ pos += strspn(value + pos, DIGITS);
+ pos += strspn(value + pos, WHITESPACE);
+ if (value[pos] == '-')
+ return -EINVAL;
+
+ r = sscanf(value, "%" PRIu64 "%" PRIu64 "%n", &a, &b, &pos);
+ if (r != 2)
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Failed to parse dual timestamp value \"%s\".",
+ value);
+
+ if (value[pos] != '\0')
+ /* trailing garbage */
+ return -EINVAL;
+
+ t->realtime = a;
+ t->monotonic = b;
+
+ return 0;
+}
+
+int deserialize_environment(const char *value, char ***list) {
+ _cleanup_free_ char *unescaped = NULL;
+ int r;
+
+ assert(value);
+ assert(list);
+
+ /* Changes the *environment strv inline. */
+
+ r = cunescape(value, 0, &unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to unescape: %m");
+
+ r = strv_env_replace(list, unescaped);
+ if (r < 0)
+ return log_error_errno(r, "Failed to append environment variable: %m");
+
+ unescaped = NULL; /* now part of 'list' */
+ return 0;
+}
+
+int open_serialization_fd(const char *ident) {
+ int fd;
+
+ fd = memfd_create(ident, MFD_CLOEXEC);
+ if (fd < 0) {
+ const char *path;
+
+ path = getpid_cached() == 1 ? "/run/systemd" : "/tmp";
+ fd = open_tmpfile_unlinkable(path, O_RDWR|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ log_debug("Serializing %s to %s.", ident, path);
+ } else
+ log_debug("Serializing %s to memfd.", ident);
+
+ return fd;
+}
diff --git a/src/shared/serialize.h b/src/shared/serialize.h
new file mode 100644
index 0000000..6d4f1ef
--- /dev/null
+++ b/src/shared/serialize.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+#include "fdset.h"
+#include "macro.h"
+#include "string-util.h"
+#include "time-util.h"
+
+int serialize_item(FILE *f, const char *key, const char *value);
+int serialize_item_escaped(FILE *f, const char *key, const char *value);
+int serialize_item_format(FILE *f, const char *key, const char *value, ...) _printf_(3,4);
+int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd);
+int serialize_usec(FILE *f, const char *key, usec_t usec);
+int serialize_dual_timestamp(FILE *f, const char *key, const dual_timestamp *t);
+int serialize_strv(FILE *f, const char *key, char **l);
+
+static inline int serialize_bool(FILE *f, const char *key, bool b) {
+ return serialize_item(f, key, yes_no(b));
+}
+
+int deserialize_usec(const char *value, usec_t *timestamp);
+int deserialize_dual_timestamp(const char *value, dual_timestamp *t);
+int deserialize_environment(const char *value, char ***environment);
+
+int open_serialization_fd(const char *ident);
diff --git a/src/shared/service-util.c b/src/shared/service-util.c
new file mode 100644
index 0000000..092be6e
--- /dev/null
+++ b/src/shared/service-util.c
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <getopt.h>
+#include <stdio.h>
+
+#include "alloc-util.h"
+#include "pretty-print.h"
+#include "service-util.h"
+#include "terminal-util.h"
+#include "util.h"
+
+static int help(const char *program_path, const char *service, const char *description, bool bus_introspect) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man(service, "8", &link);
+ if (r < 0)
+ return log_oom();
+
+ printf("%s [OPTIONS...]\n\n"
+ "%s%s%s\n\n"
+ "This program takes no positional arguments.\n\n"
+ "%sOptions%s:\n"
+ " -h --help Show this help\n"
+ " --version Show package version\n"
+ " --bus-introspect=PATH Write D-Bus XML introspection data\n"
+ "\nSee the %s for details.\n"
+ , program_path
+ , ansi_highlight(), description, ansi_normal()
+ , ansi_underline(), ansi_normal()
+ , link
+ );
+
+ return 0; /* No further action */
+}
+
+int service_parse_argv(
+ const char *service,
+ const char *description,
+ const BusObjectImplementation* const* bus_objects,
+ int argc, char *argv[]) {
+
+ enum {
+ ARG_VERSION = 0x100,
+ ARG_BUS_INTROSPECT,
+ };
+
+ static const struct option options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, ARG_VERSION },
+ { "bus-introspect", required_argument, NULL, ARG_BUS_INTROSPECT },
+ {}
+ };
+
+ int c;
+
+ assert(argc >= 0);
+ assert(argv);
+
+ while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0)
+ switch(c) {
+
+ case 'h':
+ return help(argv[0], service, description, bus_objects);
+
+ case ARG_VERSION:
+ return version();
+
+ case ARG_BUS_INTROSPECT:
+ return bus_introspect_implementations(
+ stdout,
+ optarg,
+ bus_objects);
+
+ case '?':
+ return -EINVAL;
+
+ default:
+ assert_not_reached("Unknown option code.");
+ }
+
+ if (optind < argc)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "This program takes no arguments.");
+
+ return 1; /* Further action */
+}
diff --git a/src/shared/service-util.h b/src/shared/service-util.h
new file mode 100644
index 0000000..360341f
--- /dev/null
+++ b/src/shared/service-util.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "bus-object.h"
+
+int service_parse_argv(
+ const char *service,
+ const char *description,
+ const BusObjectImplementation* const* bus_objects,
+ int argc, char *argv[]);
diff --git a/src/shared/sleep-config.c b/src/shared/sleep-config.c
new file mode 100644
index 0000000..cea5148
--- /dev/null
+++ b/src/shared/sleep-config.c
@@ -0,0 +1,703 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/***
+ Copyright © 2018 Dell Inc.
+***/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "blockdev-util.h"
+#include "btrfs-util.h"
+#include "conf-parser.h"
+#include "def.h"
+#include "env-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "sleep-config.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+
+int parse_sleep_config(SleepConfig **ret_sleep_config) {
+ _cleanup_(free_sleep_configp) SleepConfig *sc;
+ int allow_suspend = -1, allow_hibernate = -1,
+ allow_s2h = -1, allow_hybrid_sleep = -1;
+
+ sc = new0(SleepConfig, 1);
+ if (!sc)
+ return log_oom();
+
+ const ConfigTableItem items[] = {
+ { "Sleep", "AllowSuspend", config_parse_tristate, 0, &allow_suspend },
+ { "Sleep", "AllowHibernation", config_parse_tristate, 0, &allow_hibernate },
+ { "Sleep", "AllowSuspendThenHibernate", config_parse_tristate, 0, &allow_s2h },
+ { "Sleep", "AllowHybridSleep", config_parse_tristate, 0, &allow_hybrid_sleep },
+
+ { "Sleep", "SuspendMode", config_parse_strv, 0, &sc->suspend_modes },
+ { "Sleep", "SuspendState", config_parse_strv, 0, &sc->suspend_states },
+ { "Sleep", "HibernateMode", config_parse_strv, 0, &sc->hibernate_modes },
+ { "Sleep", "HibernateState", config_parse_strv, 0, &sc->hibernate_states },
+ { "Sleep", "HybridSleepMode", config_parse_strv, 0, &sc->hybrid_modes },
+ { "Sleep", "HybridSleepState", config_parse_strv, 0, &sc->hybrid_states },
+
+ { "Sleep", "HibernateDelaySec", config_parse_sec, 0, &sc->hibernate_delay_sec},
+ {}
+ };
+
+ (void) config_parse_many_nulstr(
+ PKGSYSCONFDIR "/sleep.conf",
+ CONF_PATHS_NULSTR("systemd/sleep.conf.d"),
+ "Sleep\0",
+ config_item_table_lookup, items,
+ CONFIG_PARSE_WARN,
+ NULL,
+ NULL);
+
+ /* use default values unless set */
+ sc->allow_suspend = allow_suspend != 0;
+ sc->allow_hibernate = allow_hibernate != 0;
+ sc->allow_hybrid_sleep = allow_hybrid_sleep >= 0 ? allow_hybrid_sleep
+ : (allow_suspend != 0 && allow_hibernate != 0);
+ sc->allow_s2h = allow_s2h >= 0 ? allow_s2h
+ : (allow_suspend != 0 && allow_hibernate != 0);
+
+ if (!sc->suspend_states)
+ sc->suspend_states = strv_new("mem", "standby", "freeze");
+ if (!sc->hibernate_modes)
+ sc->hibernate_modes = strv_new("platform", "shutdown");
+ if (!sc->hibernate_states)
+ sc->hibernate_states = strv_new("disk");
+ if (!sc->hybrid_modes)
+ sc->hybrid_modes = strv_new("suspend", "platform", "shutdown");
+ if (!sc->hybrid_states)
+ sc->hybrid_states = strv_new("disk");
+ if (sc->hibernate_delay_sec == 0)
+ sc->hibernate_delay_sec = 2 * USEC_PER_HOUR;
+
+ /* ensure values set for all required fields */
+ if (!sc->suspend_states || !sc->hibernate_modes
+ || !sc->hibernate_states || !sc->hybrid_modes || !sc->hybrid_states)
+ return log_oom();
+
+ *ret_sleep_config = TAKE_PTR(sc);
+
+ return 0;
+}
+
+int can_sleep_state(char **types) {
+ _cleanup_free_ char *text = NULL;
+ int r;
+
+ if (strv_isempty(types))
+ return true;
+
+ /* If /sys is read-only we cannot sleep */
+ if (access("/sys/power/state", W_OK) < 0) {
+ log_debug_errno(errno, "/sys/power/state is not writable, cannot sleep: %m");
+ return false;
+ }
+
+ r = read_one_line_file("/sys/power/state", &text);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read /sys/power/state, cannot sleep: %m");
+ return false;
+ }
+
+ const char *found;
+ r = string_contains_word_strv(text, NULL, types, &found);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse /sys/power/state: %m");
+ if (r > 0)
+ log_debug("Sleep mode \"%s\" is supported by the kernel.", found);
+ else if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t = strv_join(types, "/");
+ log_debug("Sleep mode %s not supported by the kernel, sorry.", strnull(t));
+ }
+ return r;
+}
+
+int can_sleep_disk(char **types) {
+ _cleanup_free_ char *text = NULL;
+ int r;
+
+ if (strv_isempty(types))
+ return true;
+
+ /* If /sys is read-only we cannot sleep */
+ if (access("/sys/power/disk", W_OK) < 0) {
+ log_debug_errno(errno, "/sys/power/disk is not writable: %m");
+ return false;
+ }
+
+ r = read_one_line_file("/sys/power/disk", &text);
+ if (r < 0) {
+ log_debug_errno(r, "Couldn't read /sys/power/disk: %m");
+ return false;
+ }
+
+ for (const char *p = text;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse /sys/power/disk: %m");
+ if (r == 0)
+ break;
+
+ char *s = word;
+ size_t l = strlen(s);
+ if (s[0] == '[' && s[l-1] == ']') {
+ s[l-1] = '\0';
+ s++;
+ }
+
+ if (strv_contains(types, s)) {
+ log_debug("Disk sleep mode \"%s\" is supported by the kernel.", s);
+ return true;
+ }
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t = strv_join(types, "/");
+ log_debug("Disk sleep mode %s not supported by the kernel, sorry.", strnull(t));
+ }
+ return false;
+}
+
+#define HIBERNATION_SWAP_THRESHOLD 0.98
+
+SwapEntry* swap_entry_free(SwapEntry *se) {
+ if (!se)
+ return NULL;
+
+ free(se->device);
+ free(se->type);
+
+ return mfree(se);
+}
+
+HibernateLocation* hibernate_location_free(HibernateLocation *hl) {
+ if (!hl)
+ return NULL;
+
+ swap_entry_free(hl->swap);
+
+ return mfree(hl);
+}
+
+static int swap_device_to_device_id(const SwapEntry *swap, dev_t *ret_dev) {
+ struct stat sb;
+ int r;
+
+ assert(swap);
+ assert(swap->device);
+ assert(swap->type);
+
+ r = stat(swap->device, &sb);
+ if (r < 0)
+ return -errno;
+
+ if (streq(swap->type, "partition")) {
+ if (!S_ISBLK(sb.st_mode))
+ return -ENOTBLK;
+
+ *ret_dev = sb.st_rdev;
+ return 0;
+ }
+
+ return get_block_device(swap->device, ret_dev);
+}
+
+/*
+ * Attempt to calculate the swap file offset on supported filesystems. On unsupported
+ * filesystems, a debug message is logged and ret_offset is set to UINT64_MAX.
+ */
+static int calculate_swap_file_offset(const SwapEntry *swap, uint64_t *ret_offset) {
+ _cleanup_close_ int fd = -1;
+ _cleanup_free_ struct fiemap *fiemap = NULL;
+ struct stat sb;
+ int r, btrfs;
+
+ assert(swap);
+ assert(swap->device);
+ assert(streq(swap->type, "file"));
+
+ fd = open(swap->device, O_RDONLY|O_CLOEXEC|O_NOCTTY);
+ if (fd < 0)
+ return log_debug_errno(errno, "Failed to open swap file %s to determine on-disk offset: %m", swap->device);
+
+ if (fstat(fd, &sb) < 0)
+ return log_debug_errno(errno, "Failed to stat %s: %m", swap->device);
+
+ btrfs = btrfs_is_filesystem(fd);
+ if (btrfs < 0)
+ return log_debug_errno(btrfs, "Error checking %s for Btrfs filesystem: %m", swap->device);
+ if (btrfs > 0) {
+ log_debug("%s: detection of swap file offset on Btrfs is not supported", swap->device);
+ *ret_offset = UINT64_MAX;
+ return 0;
+ }
+
+ r = read_fiemap(fd, &fiemap);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to read extent map for '%s': %m", swap->device);
+
+ *ret_offset = fiemap->fm_extents[0].fe_physical / page_size();
+ return 0;
+}
+
+static int read_resume_files(dev_t *ret_resume, uint64_t *ret_resume_offset) {
+ _cleanup_free_ char *resume_str = NULL, *resume_offset_str = NULL;
+ uint64_t resume_offset = 0;
+ dev_t resume;
+ int r;
+
+ r = read_one_line_file("/sys/power/resume", &resume_str);
+ if (r < 0)
+ return log_debug_errno(r, "Error reading /sys/power/resume: %m");
+
+ r = parse_dev(resume_str, &resume);
+ if (r < 0)
+ return log_debug_errno(r, "Error parsing /sys/power/resume device: %s: %m", resume_str);
+
+ r = read_one_line_file("/sys/power/resume_offset", &resume_offset_str);
+ if (r == -ENOENT)
+ log_debug_errno(r, "Kernel does not support resume_offset; swap file offset detection will be skipped.");
+ else if (r < 0)
+ return log_debug_errno(r, "Error reading /sys/power/resume_offset: %m");
+ else {
+ r = safe_atou64(resume_offset_str, &resume_offset);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse value in /sys/power/resume_offset \"%s\": %m", resume_offset_str);
+ }
+
+ if (resume_offset > 0 && resume == 0)
+ log_debug("Warning: found /sys/power/resume_offset==%" PRIu64 ", but /sys/power/resume unset. Misconfiguration?",
+ resume_offset);
+
+ *ret_resume = resume;
+ *ret_resume_offset = resume_offset;
+
+ return 0;
+}
+
+/*
+ * Determine if the HibernateLocation matches the resume= (device) and resume_offset= (file).
+ */
+static bool location_is_resume_device(const HibernateLocation *location, dev_t sys_resume, uint64_t sys_offset) {
+ if (!location)
+ return false;
+
+ return sys_resume > 0 &&
+ sys_resume == location->devno &&
+ (sys_offset == location->offset || (sys_offset > 0 && location->offset == UINT64_MAX));
+}
+
+/*
+ * Attempt to find the hibernation location by parsing /proc/swaps, /sys/power/resume, and
+ * /sys/power/resume_offset.
+ *
+ * Returns:
+ * 1 - Values are set in /sys/power/resume and /sys/power/resume_offset.
+ * ret_hibernate_location will represent matching /proc/swap entry if identified or NULL if not.
+ *
+ * 0 - No values are set in /sys/power/resume and /sys/power/resume_offset.
+ ret_hibernate_location will represent the highest priority swap with most remaining space discovered in /proc/swaps.
+ *
+ * Negative value in the case of error.
+ */
+int find_hibernate_location(HibernateLocation **ret_hibernate_location) {
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_(hibernate_location_freep) HibernateLocation *hibernate_location = NULL;
+ dev_t sys_resume;
+ uint64_t sys_offset = 0;
+ bool resume_match = false;
+ int r;
+
+ /* read the /sys/power/resume & /sys/power/resume_offset values */
+ r = read_resume_files(&sys_resume, &sys_offset);
+ if (r < 0)
+ return r;
+
+ f = fopen("/proc/swaps", "re");
+ if (!f) {
+ log_debug_errno(errno, "Failed to open /proc/swaps: %m");
+ return errno == ENOENT ? -EOPNOTSUPP : -errno; /* Convert swap not supported to a recognizable error */
+ }
+
+ (void) fscanf(f, "%*s %*s %*s %*s %*s\n");
+ for (unsigned i = 1;; i++) {
+ _cleanup_(swap_entry_freep) SwapEntry *swap = NULL;
+ uint64_t swap_offset = 0;
+ int k;
+
+ swap = new0(SwapEntry, 1);
+ if (!swap)
+ return -ENOMEM;
+
+ k = fscanf(f,
+ "%ms " /* device/file */
+ "%ms " /* type of swap */
+ "%" PRIu64 /* swap size */
+ "%" PRIu64 /* used */
+ "%i\n", /* priority */
+ &swap->device, &swap->type, &swap->size, &swap->used, &swap->priority);
+ if (k == EOF)
+ break;
+ if (k != 5) {
+ log_debug("Failed to parse /proc/swaps:%u, ignoring", i);
+ continue;
+ }
+
+ if (streq(swap->type, "file")) {
+ if (endswith(swap->device, "\\040(deleted)")) {
+ log_debug("Ignoring deleted swap file '%s'.", swap->device);
+ continue;
+ }
+
+ r = calculate_swap_file_offset(swap, &swap_offset);
+ if (r < 0)
+ return r;
+
+ } else if (streq(swap->type, "partition")) {
+ const char *fn;
+
+ fn = path_startswith(swap->device, "/dev/");
+ if (fn && startswith(fn, "zram")) {
+ log_debug("%s: ignoring zram swap", swap->device);
+ continue;
+ }
+
+ } else {
+ log_debug("%s: swap type %s is unsupported for hibernation, ignoring", swap->device, swap->type);
+ continue;
+ }
+
+ /* prefer resume device or highest priority swap with most remaining space */
+ if (hibernate_location && swap->priority < hibernate_location->swap->priority) {
+ log_debug("%s: ignoring device with lower priority", swap->device);
+ continue;
+ }
+ if (hibernate_location &&
+ (swap->priority == hibernate_location->swap->priority
+ && swap->size - swap->used < hibernate_location->swap->size - hibernate_location->swap->used)) {
+ log_debug("%s: ignoring device with lower usable space", swap->device);
+ continue;
+ }
+
+ dev_t swap_device;
+ r = swap_device_to_device_id(swap, &swap_device);
+ if (r < 0)
+ return log_debug_errno(r, "%s: failed to query device number: %m", swap->device);
+ if (swap_device == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENODEV), "%s: not backed by block device.", swap->device);
+
+ hibernate_location = hibernate_location_free(hibernate_location);
+ hibernate_location = new(HibernateLocation, 1);
+ if (!hibernate_location)
+ return -ENOMEM;
+
+ *hibernate_location = (HibernateLocation) {
+ .devno = swap_device,
+ .offset = swap_offset,
+ .swap = TAKE_PTR(swap),
+ };
+
+ /* if the swap is the resume device, stop the loop */
+ if (location_is_resume_device(hibernate_location, sys_resume, sys_offset)) {
+ log_debug("%s: device matches configured resume settings.", hibernate_location->swap->device);
+ resume_match = true;
+ break;
+ }
+
+ log_debug("%s: is a candidate device.", hibernate_location->swap->device);
+ }
+
+ /* We found nothing at all */
+ if (!hibernate_location)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "No possible swap partitions or files suitable for hibernation were found in /proc/swaps.");
+
+ /* resume= is set but a matching /proc/swaps entry was not found */
+ if (sys_resume != 0 && !resume_match)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS),
+ "No swap partitions or files matching resume config were found in /proc/swaps.");
+
+ if (hibernate_location->offset == UINT64_MAX) {
+ if (sys_offset == 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS), "Offset detection failed and /sys/power/resume_offset is not set.");
+
+ hibernate_location->offset = sys_offset;
+ }
+
+ if (resume_match)
+ log_debug("Hibernation will attempt to use swap entry with path: %s, device: %u:%u, offset: %" PRIu64 ", priority: %i",
+ hibernate_location->swap->device, major(hibernate_location->devno), minor(hibernate_location->devno),
+ hibernate_location->offset, hibernate_location->swap->priority);
+ else
+ log_debug("/sys/power/resume is not configured; attempting to hibernate with path: %s, device: %u:%u, offset: %" PRIu64 ", priority: %i",
+ hibernate_location->swap->device, major(hibernate_location->devno), minor(hibernate_location->devno),
+ hibernate_location->offset, hibernate_location->swap->priority);
+
+ *ret_hibernate_location = TAKE_PTR(hibernate_location);
+
+ if (resume_match)
+ return 1;
+
+ return 0;
+}
+
+static bool enough_swap_for_hibernation(void) {
+ _cleanup_free_ char *active = NULL;
+ _cleanup_(hibernate_location_freep) HibernateLocation *hibernate_location = NULL;
+ unsigned long long act = 0;
+ int r;
+
+ if (getenv_bool("SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK") > 0)
+ return true;
+
+ r = find_hibernate_location(&hibernate_location);
+ if (r < 0)
+ return false;
+
+ /* If /sys/power/{resume,resume_offset} is configured but a matching entry
+ * could not be identified in /proc/swaps, user is likely using Btrfs with a swapfile;
+ * return true and let the system attempt hibernation.
+ */
+ if (r > 0 && !hibernate_location) {
+ log_debug("Unable to determine remaining swap space; hibernation may fail");
+ return true;
+ }
+
+ if (!hibernate_location)
+ return false;
+
+ r = get_proc_field("/proc/meminfo", "Active(anon)", WHITESPACE, &active);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to retrieve Active(anon) from /proc/meminfo: %m");
+ return false;
+ }
+
+ r = safe_atollu(active, &act);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse Active(anon) from /proc/meminfo: %s: %m", active);
+ return false;
+ }
+
+ r = act <= (hibernate_location->swap->size - hibernate_location->swap->used) * HIBERNATION_SWAP_THRESHOLD;
+ log_debug("%s swap for hibernation, Active(anon)=%llu kB, size=%" PRIu64 " kB, used=%" PRIu64 " kB, threshold=%.2g%%",
+ r ? "Enough" : "Not enough", act, hibernate_location->swap->size, hibernate_location->swap->used, 100*HIBERNATION_SWAP_THRESHOLD);
+
+ return r;
+}
+
+int read_fiemap(int fd, struct fiemap **ret) {
+ _cleanup_free_ struct fiemap *fiemap = NULL, *result_fiemap = NULL;
+ struct stat statinfo;
+ uint32_t result_extents = 0;
+ uint64_t fiemap_start = 0, fiemap_length;
+ const size_t n_extra = DIV_ROUND_UP(sizeof(struct fiemap), sizeof(struct fiemap_extent));
+ size_t fiemap_allocated = n_extra, result_fiemap_allocated = n_extra;
+
+ if (fstat(fd, &statinfo) < 0)
+ return log_debug_errno(errno, "Cannot determine file size: %m");
+ if (!S_ISREG(statinfo.st_mode))
+ return -ENOTTY;
+ fiemap_length = statinfo.st_size;
+
+ /* Zero this out in case we run on a file with no extents */
+ fiemap = calloc(n_extra, sizeof(struct fiemap_extent));
+ if (!fiemap)
+ return -ENOMEM;
+
+ result_fiemap = malloc_multiply(n_extra, sizeof(struct fiemap_extent));
+ if (!result_fiemap)
+ return -ENOMEM;
+
+ /* XFS filesystem has incorrect implementation of fiemap ioctl and
+ * returns extents for only one block-group at a time, so we need
+ * to handle it manually, starting the next fiemap call from the end
+ * of the last extent
+ */
+ while (fiemap_start < fiemap_length) {
+ *fiemap = (struct fiemap) {
+ .fm_start = fiemap_start,
+ .fm_length = fiemap_length,
+ .fm_flags = FIEMAP_FLAG_SYNC,
+ };
+
+ /* Find out how many extents there are */
+ if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
+ return log_debug_errno(errno, "Failed to read extents: %m");
+
+ /* Nothing to process */
+ if (fiemap->fm_mapped_extents == 0)
+ break;
+
+ /* Resize fiemap to allow us to read in the extents, result fiemap has to hold all
+ * the extents for the whole file. Add space for the initial struct fiemap. */
+ if (!greedy_realloc0((void**) &fiemap, &fiemap_allocated,
+ n_extra + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent)))
+ return -ENOMEM;
+
+ fiemap->fm_extent_count = fiemap->fm_mapped_extents;
+ fiemap->fm_mapped_extents = 0;
+
+ if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
+ return log_debug_errno(errno, "Failed to read extents: %m");
+
+ /* Resize result_fiemap to allow us to copy in the extents */
+ if (!greedy_realloc((void**) &result_fiemap, &result_fiemap_allocated,
+ n_extra + result_extents + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent)))
+ return -ENOMEM;
+
+ memcpy(result_fiemap->fm_extents + result_extents,
+ fiemap->fm_extents,
+ sizeof(struct fiemap_extent) * fiemap->fm_mapped_extents);
+
+ result_extents += fiemap->fm_mapped_extents;
+
+ /* Highly unlikely that it is zero */
+ if (_likely_(fiemap->fm_mapped_extents > 0)) {
+ uint32_t i = fiemap->fm_mapped_extents - 1;
+
+ fiemap_start = fiemap->fm_extents[i].fe_logical +
+ fiemap->fm_extents[i].fe_length;
+
+ if (fiemap->fm_extents[i].fe_flags & FIEMAP_EXTENT_LAST)
+ break;
+ }
+ }
+
+ memcpy(result_fiemap, fiemap, sizeof(struct fiemap));
+ result_fiemap->fm_mapped_extents = result_extents;
+ *ret = TAKE_PTR(result_fiemap);
+ return 0;
+}
+
+static int can_sleep_internal(const char *verb, bool check_allowed, const SleepConfig *sleep_config);
+
+static bool can_s2h(const SleepConfig *sleep_config) {
+ const char *p;
+ int r;
+
+ if (!clock_supported(CLOCK_BOOTTIME_ALARM)) {
+ log_debug("CLOCK_BOOTTIME_ALARM is not supported.");
+ return false;
+ }
+
+ FOREACH_STRING(p, "suspend", "hibernate") {
+ r = can_sleep_internal(p, false, sleep_config);
+ if (IN_SET(r, 0, -ENOSPC, -EADV)) {
+ log_debug("Unable to %s system.", p);
+ return false;
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to check if %s is possible: %m", p);
+ }
+
+ return true;
+}
+
+static int can_sleep_internal(const char *verb, bool check_allowed, const SleepConfig *sleep_config) {
+ bool allow;
+ char **modes = NULL, **states = NULL;
+ int r;
+
+ assert(STR_IN_SET(verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate"));
+
+ r = sleep_settings(verb, sleep_config, &allow, &modes, &states);
+ if (r < 0)
+ return false;
+
+ if (check_allowed && !allow) {
+ log_debug("Sleep mode \"%s\" is disabled by configuration.", verb);
+ return false;
+ }
+
+ if (streq(verb, "suspend-then-hibernate"))
+ return can_s2h(sleep_config);
+
+ if (!can_sleep_state(states) || !can_sleep_disk(modes))
+ return false;
+
+ if (streq(verb, "suspend"))
+ return true;
+
+ if (!enough_swap_for_hibernation())
+ return -ENOSPC;
+
+ return true;
+}
+
+int can_sleep(const char *verb) {
+ _cleanup_(free_sleep_configp) SleepConfig *sleep_config = NULL;
+ int r;
+
+ r = parse_sleep_config(&sleep_config);
+ if (r < 0)
+ return r;
+
+ return can_sleep_internal(verb, true, sleep_config);
+}
+
+int sleep_settings(const char *verb, const SleepConfig *sleep_config, bool *ret_allow, char ***ret_modes, char ***ret_states) {
+
+ assert(verb);
+ assert(sleep_config);
+ assert(STR_IN_SET(verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate"));
+
+ if (streq(verb, "suspend")) {
+ *ret_allow = sleep_config->allow_suspend;
+ *ret_modes = sleep_config->suspend_modes;
+ *ret_states = sleep_config->suspend_states;
+ } else if (streq(verb, "hibernate")) {
+ *ret_allow = sleep_config->allow_hibernate;
+ *ret_modes = sleep_config->hibernate_modes;
+ *ret_states = sleep_config->hibernate_states;
+ } else if (streq(verb, "hybrid-sleep")) {
+ *ret_allow = sleep_config->allow_hybrid_sleep;
+ *ret_modes = sleep_config->hybrid_modes;
+ *ret_states = sleep_config->hybrid_states;
+ } else if (streq(verb, "suspend-then-hibernate")) {
+ *ret_allow = sleep_config->allow_s2h;
+ *ret_modes = *ret_states = NULL;
+ }
+
+ /* suspend modes empty by default */
+ if ((!ret_modes && !streq(verb, "suspend")) || !ret_states)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No modes or states set for %s; Check sleep.conf", verb);
+
+ return 0;
+}
+
+SleepConfig* free_sleep_config(SleepConfig *sc) {
+ if (!sc)
+ return NULL;
+
+ strv_free(sc->suspend_modes);
+ strv_free(sc->suspend_states);
+
+ strv_free(sc->hibernate_modes);
+ strv_free(sc->hibernate_states);
+
+ strv_free(sc->hybrid_modes);
+ strv_free(sc->hybrid_states);
+
+ return mfree(sc);
+}
diff --git a/src/shared/sleep-config.h b/src/shared/sleep-config.h
new file mode 100644
index 0000000..4b30e6d
--- /dev/null
+++ b/src/shared/sleep-config.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <linux/fiemap.h>
+#include "time-util.h"
+
+typedef struct SleepConfig {
+ bool allow_suspend; /* AllowSuspend */
+ bool allow_hibernate; /* AllowHibernation */
+ bool allow_s2h; /* AllowSuspendThenHibernate */
+ bool allow_hybrid_sleep; /* AllowHybridSleep */
+
+ char **suspend_modes; /* SuspendMode */
+ char **suspend_states; /* SuspendState */
+ char **hibernate_modes; /* HibernateMode */
+ char **hibernate_states; /* HibernateState */
+ char **hybrid_modes; /* HybridSleepMode */
+ char **hybrid_states; /* HybridSleepState */
+
+ usec_t hibernate_delay_sec; /* HibernateDelaySec */
+} SleepConfig;
+
+SleepConfig* free_sleep_config(SleepConfig *sc);
+DEFINE_TRIVIAL_CLEANUP_FUNC(SleepConfig*, free_sleep_config);
+
+/* entry in /proc/swaps */
+typedef struct SwapEntry {
+ char *device;
+ char *type;
+ uint64_t size;
+ uint64_t used;
+ int priority;
+} SwapEntry;
+
+SwapEntry* swap_entry_free(SwapEntry *se);
+DEFINE_TRIVIAL_CLEANUP_FUNC(SwapEntry*, swap_entry_free);
+
+/*
+ * represents values for /sys/power/resume & /sys/power/resume_offset
+ * and the matching /proc/swap entry.
+ */
+typedef struct HibernateLocation {
+ dev_t devno;
+ uint64_t offset;
+ SwapEntry *swap;
+} HibernateLocation;
+
+HibernateLocation* hibernate_location_free(HibernateLocation *hl);
+DEFINE_TRIVIAL_CLEANUP_FUNC(HibernateLocation*, hibernate_location_free);
+
+int sleep_settings(const char *verb, const SleepConfig *sleep_config, bool *ret_allow, char ***ret_modes, char ***ret_states);
+
+int read_fiemap(int fd, struct fiemap **ret);
+int parse_sleep_config(SleepConfig **sleep_config);
+int find_hibernate_location(HibernateLocation **ret_hibernate_location);
+
+int can_sleep(const char *verb);
+int can_sleep_disk(char **types);
+int can_sleep_state(char **types);
diff --git a/src/shared/socket-netlink.c b/src/shared/socket-netlink.c
new file mode 100644
index 0000000..4a7007d
--- /dev/null
+++ b/src/shared/socket-netlink.c
@@ -0,0 +1,493 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <net/if.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "extract-word.h"
+#include "log.h"
+#include "memory-util.h"
+#include "netlink-util.h"
+#include "parse-util.h"
+#include "socket-netlink.h"
+#include "socket-util.h"
+#include "string-util.h"
+
+int resolve_ifname(sd_netlink **rtnl, const char *name) {
+ int r;
+
+ /* Like if_nametoindex, but resolves "alternative names" too. */
+
+ assert(name);
+
+ r = if_nametoindex(name);
+ if (r > 0)
+ return r;
+
+ return rtnl_resolve_link_alternative_name(rtnl, name);
+}
+
+int resolve_interface(sd_netlink **rtnl, const char *name) {
+ int r;
+
+ /* Like resolve_ifname, but resolves interface numbers too. */
+
+ assert(name);
+
+ r = parse_ifindex(name);
+ if (r > 0)
+ return r;
+ assert(r < 0);
+
+ return resolve_ifname(rtnl, name);
+}
+
+int resolve_interface_or_warn(sd_netlink **rtnl, const char *name) {
+ int r;
+
+ r = resolve_interface(rtnl, name);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve interface \"%s\": %m", name);
+ return r;
+}
+
+int socket_address_parse(SocketAddress *a, const char *s) {
+ _cleanup_free_ char *n = NULL;
+ char *e;
+ int r;
+
+ assert(a);
+ assert(s);
+
+ if (IN_SET(*s, '/', '@')) {
+ /* AF_UNIX socket */
+ struct sockaddr_un un;
+
+ r = sockaddr_un_set_path(&un, s);
+ if (r < 0)
+ return r;
+
+ *a = (SocketAddress) {
+ .sockaddr.un = un,
+ .size = r,
+ };
+
+ } else if (startswith(s, "vsock:")) {
+ /* AF_VSOCK socket in vsock:cid:port notation */
+ const char *cid_start = s + STRLEN("vsock:");
+ unsigned port, cid;
+
+ e = strchr(cid_start, ':');
+ if (!e)
+ return -EINVAL;
+
+ r = safe_atou(e+1, &port);
+ if (r < 0)
+ return r;
+
+ n = strndup(cid_start, e - cid_start);
+ if (!n)
+ return -ENOMEM;
+
+ if (isempty(n))
+ cid = VMADDR_CID_ANY;
+ else {
+ r = safe_atou(n, &cid);
+ if (r < 0)
+ return r;
+ }
+
+ *a = (SocketAddress) {
+ .sockaddr.vm = {
+ .svm_cid = cid,
+ .svm_family = AF_VSOCK,
+ .svm_port = port,
+ },
+ .size = sizeof(struct sockaddr_vm),
+ };
+
+ } else {
+ uint16_t port;
+
+ r = parse_ip_port(s, &port);
+ if (r == -ERANGE)
+ return r; /* Valid port syntax, but the numerical value is wrong for a port. */
+ if (r >= 0) {
+ /* Just a port */
+ if (socket_ipv6_is_supported())
+ *a = (SocketAddress) {
+ .sockaddr.in6 = {
+ .sin6_family = AF_INET6,
+ .sin6_port = htobe16(port),
+ .sin6_addr = in6addr_any,
+ },
+ .size = sizeof(struct sockaddr_in6),
+ };
+ else
+ *a = (SocketAddress) {
+ .sockaddr.in = {
+ .sin_family = AF_INET,
+ .sin_port = htobe16(port),
+ .sin_addr.s_addr = INADDR_ANY,
+ },
+ .size = sizeof(struct sockaddr_in),
+ };
+
+ } else {
+ union in_addr_union address;
+ int family, ifindex;
+
+ r = in_addr_port_ifindex_name_from_string_auto(s, &family, &address, &port, &ifindex, NULL);
+ if (r < 0)
+ return r;
+
+ if (port == 0) /* No port, no go. */
+ return -EINVAL;
+
+ if (family == AF_INET)
+ *a = (SocketAddress) {
+ .sockaddr.in = {
+ .sin_family = AF_INET,
+ .sin_addr = address.in,
+ .sin_port = htobe16(port),
+ },
+ .size = sizeof(struct sockaddr_in),
+ };
+ else if (family == AF_INET6)
+ *a = (SocketAddress) {
+ .sockaddr.in6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = address.in6,
+ .sin6_port = htobe16(port),
+ .sin6_scope_id = ifindex,
+ },
+ .size = sizeof(struct sockaddr_in6),
+ };
+ else
+ assert_not_reached("Family quarrel");
+ }
+ }
+
+ return 0;
+}
+
+int socket_address_parse_and_warn(SocketAddress *a, const char *s) {
+ SocketAddress b;
+ int r;
+
+ /* Similar to socket_address_parse() but warns for IPv6 sockets when we don't support them. */
+
+ r = socket_address_parse(&b, s);
+ if (r < 0)
+ return r;
+
+ if (!socket_ipv6_is_supported() && b.sockaddr.sa.sa_family == AF_INET6) {
+ log_warning("Binding to IPv6 address not available since kernel does not support IPv6.");
+ return -EAFNOSUPPORT;
+ }
+
+ *a = b;
+ return 0;
+}
+
+int socket_address_parse_netlink(SocketAddress *a, const char *s) {
+ _cleanup_free_ char *word = NULL;
+ unsigned group = 0;
+ int family, r;
+
+ assert(a);
+ assert(s);
+
+ r = extract_first_word(&s, &word, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ family = netlink_family_from_string(word);
+ if (family < 0)
+ return -EINVAL;
+
+ if (!isempty(s)) {
+ r = safe_atou(s, &group);
+ if (r < 0)
+ return r;
+ }
+
+ *a = (SocketAddress) {
+ .type = SOCK_RAW,
+ .sockaddr.nl.nl_family = AF_NETLINK,
+ .sockaddr.nl.nl_groups = group,
+ .protocol = family,
+ .size = sizeof(struct sockaddr_nl),
+ };
+
+ return 0;
+}
+
+bool socket_address_is(const SocketAddress *a, const char *s, int type) {
+ struct SocketAddress b;
+
+ assert(a);
+ assert(s);
+
+ if (socket_address_parse(&b, s) < 0)
+ return false;
+
+ b.type = type;
+
+ return socket_address_equal(a, &b);
+}
+
+bool socket_address_is_netlink(const SocketAddress *a, const char *s) {
+ struct SocketAddress b;
+
+ assert(a);
+ assert(s);
+
+ if (socket_address_parse_netlink(&b, s) < 0)
+ return false;
+
+ return socket_address_equal(a, &b);
+}
+
+int make_socket_fd(int log_level, const char* address, int type, int flags) {
+ SocketAddress a;
+ int fd, r;
+
+ r = socket_address_parse(&a, address);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse socket address \"%s\": %m", address);
+
+ a.type = type;
+
+ fd = socket_address_listen(&a, type | flags, SOMAXCONN, SOCKET_ADDRESS_DEFAULT,
+ NULL, false, false, false, 0755, 0644, NULL);
+ if (fd < 0 || log_get_max_level() >= log_level) {
+ _cleanup_free_ char *p = NULL;
+
+ r = socket_address_print(&a, &p);
+ if (r < 0)
+ return log_error_errno(r, "socket_address_print(): %m");
+
+ if (fd < 0)
+ log_error_errno(fd, "Failed to listen on %s: %m", p);
+ else
+ log_full(log_level, "Listening on %s", p);
+ }
+
+ return fd;
+}
+
+int in_addr_port_ifindex_name_from_string_auto(
+ const char *s,
+ int *ret_family,
+ union in_addr_union *ret_address,
+ uint16_t *ret_port,
+ int *ret_ifindex,
+ char **ret_server_name) {
+
+ _cleanup_free_ char *buf1 = NULL, *buf2 = NULL, *name = NULL;
+ int family, ifindex = 0, r;
+ union in_addr_union a;
+ uint16_t port = 0;
+ const char *m;
+
+ assert(s);
+
+ /* This accepts the following:
+ * 192.168.0.1:53#example.com
+ * [2001:4860:4860::8888]:53%eth0#example.com
+ *
+ * If ret_port is NULL, then the port cannot be specified.
+ * If ret_ifindex is NULL, then the interface index cannot be specified.
+ * If ret_server_name is NULL, then server_name cannot be specified.
+ *
+ * ret_family is always AF_INET or AF_INET6.
+ */
+
+ m = strchr(s, '#');
+ if (m) {
+ if (!ret_server_name)
+ return -EINVAL;
+
+ if (isempty(m + 1))
+ return -EINVAL;
+
+ name = strdup(m + 1);
+ if (!name)
+ return -ENOMEM;
+
+ s = buf1 = strndup(s, m - s);
+ if (!buf1)
+ return -ENOMEM;
+ }
+
+ m = strchr(s, '%');
+ if (m) {
+ if (!ret_ifindex)
+ return -EINVAL;
+
+ if (isempty(m + 1))
+ return -EINVAL;
+
+ if (!ifname_valid_full(m + 1, IFNAME_VALID_ALTERNATIVE | IFNAME_VALID_NUMERIC))
+ return -EINVAL; /* We want to return -EINVAL for syntactically invalid names,
+ * and -ENODEV for valid but nonexistent interfaces. */
+
+ ifindex = resolve_interface(NULL, m + 1);
+ if (ifindex < 0)
+ return ifindex;
+
+ s = buf2 = strndup(s, m - s);
+ if (!buf2)
+ return -ENOMEM;
+ }
+
+ m = strrchr(s, ':');
+ if (m) {
+ if (*s == '[') {
+ _cleanup_free_ char *ip_str = NULL;
+
+ if (!ret_port)
+ return -EINVAL;
+
+ if (*(m - 1) != ']')
+ return -EINVAL;
+
+ family = AF_INET6;
+
+ r = parse_ip_port(m + 1, &port);
+ if (r < 0)
+ return r;
+
+ ip_str = strndup(s + 1, m - s - 2);
+ if (!ip_str)
+ return -ENOMEM;
+
+ r = in_addr_from_string(family, ip_str, &a);
+ if (r < 0)
+ return r;
+ } else {
+ /* First try to parse the string as IPv6 address without port number */
+ r = in_addr_from_string(AF_INET6, s, &a);
+ if (r < 0) {
+ /* Then the input should be IPv4 address with port number */
+ _cleanup_free_ char *ip_str = NULL;
+
+ if (!ret_port)
+ return -EINVAL;
+
+ family = AF_INET;
+
+ ip_str = strndup(s, m - s);
+ if (!ip_str)
+ return -ENOMEM;
+
+ r = in_addr_from_string(family, ip_str, &a);
+ if (r < 0)
+ return r;
+
+ r = parse_ip_port(m + 1, &port);
+ if (r < 0)
+ return r;
+ } else
+ family = AF_INET6;
+ }
+ } else {
+ family = AF_INET;
+ r = in_addr_from_string(family, s, &a);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_family)
+ *ret_family = family;
+ if (ret_address)
+ *ret_address = a;
+ if (ret_port)
+ *ret_port = port;
+ if (ret_ifindex)
+ *ret_ifindex = ifindex;
+ if (ret_server_name)
+ *ret_server_name = TAKE_PTR(name);
+
+ return r;
+}
+
+struct in_addr_full *in_addr_full_free(struct in_addr_full *a) {
+ if (!a)
+ return NULL;
+
+ free(a->server_name);
+ free(a->cached_server_string);
+ return mfree(a);
+}
+
+int in_addr_full_new(
+ int family,
+ const union in_addr_union *a,
+ uint16_t port,
+ int ifindex,
+ const char *server_name,
+ struct in_addr_full **ret) {
+
+ _cleanup_free_ char *name = NULL;
+ struct in_addr_full *x;
+
+ assert(ret);
+
+ if (!isempty(server_name)) {
+ name = strdup(server_name);
+ if (!name)
+ return -ENOMEM;
+ }
+
+ x = new(struct in_addr_full, 1);
+ if (!x)
+ return -ENOMEM;
+
+ *x = (struct in_addr_full) {
+ .family = family,
+ .address = *a,
+ .port = port,
+ .ifindex = ifindex,
+ .server_name = TAKE_PTR(name),
+ };
+
+ *ret = x;
+ return 0;
+}
+
+int in_addr_full_new_from_string(const char *s, struct in_addr_full **ret) {
+ _cleanup_free_ char *server_name = NULL;
+ int family, ifindex, r;
+ union in_addr_union a;
+ uint16_t port;
+
+ assert(s);
+
+ r = in_addr_port_ifindex_name_from_string_auto(s, &family, &a, &port, &ifindex, &server_name);
+ if (r < 0)
+ return r;
+
+ return in_addr_full_new(family, &a, port, ifindex, server_name, ret);
+}
+
+const char *in_addr_full_to_string(struct in_addr_full *a) {
+ assert(a);
+
+ if (!a->cached_server_string)
+ (void) in_addr_port_ifindex_name_to_string(
+ a->family,
+ &a->address,
+ a->port,
+ a->ifindex,
+ a->server_name,
+ &a->cached_server_string);
+
+ return a->cached_server_string;
+}
diff --git a/src/shared/socket-netlink.h b/src/shared/socket-netlink.h
new file mode 100644
index 0000000..eac5991
--- /dev/null
+++ b/src/shared/socket-netlink.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-netlink.h"
+
+#include "in-addr-util.h"
+#include "macro.h"
+#include "socket-util.h"
+
+int resolve_ifname(sd_netlink **rtnl, const char *name);
+int resolve_interface(sd_netlink **rtnl, const char *name);
+int resolve_interface_or_warn(sd_netlink **rtnl, const char *name);
+
+int make_socket_fd(int log_level, const char* address, int type, int flags);
+
+int socket_address_parse(SocketAddress *a, const char *s);
+int socket_address_parse_and_warn(SocketAddress *a, const char *s);
+int socket_address_parse_netlink(SocketAddress *a, const char *s);
+
+bool socket_address_is(const SocketAddress *a, const char *s, int type);
+bool socket_address_is_netlink(const SocketAddress *a, const char *s);
+
+int in_addr_port_ifindex_name_from_string_auto(
+ const char *s,
+ int *ret_family,
+ union in_addr_union *ret_address,
+ uint16_t *ret_port,
+ int *ret_ifindex,
+ char **ret_server_name);
+static inline int in_addr_ifindex_name_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex, char **server_name) {
+ return in_addr_port_ifindex_name_from_string_auto(s, family, ret, NULL, ifindex, server_name);
+}
+static inline int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex) {
+ return in_addr_ifindex_name_from_string_auto(s, family, ret, ifindex, NULL);
+}
+
+struct in_addr_full {
+ int family;
+ union in_addr_union address;
+ uint16_t port;
+ int ifindex;
+ char *server_name;
+ char *cached_server_string; /* Should not be handled directly, but through in_addr_full_to_string(). */
+};
+
+struct in_addr_full *in_addr_full_free(struct in_addr_full *a);
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct in_addr_full*, in_addr_full_free);
+int in_addr_full_new(int family, const union in_addr_union *a, uint16_t port, int ifindex, const char *server_name, struct in_addr_full **ret);
+int in_addr_full_new_from_string(const char *s, struct in_addr_full **ret);
+const char *in_addr_full_to_string(struct in_addr_full *a);
diff --git a/src/shared/spawn-ask-password-agent.c b/src/shared/spawn-ask-password-agent.c
new file mode 100644
index 0000000..1f07b19
--- /dev/null
+++ b/src/shared/spawn-ask-password-agent.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "process-util.h"
+#include "spawn-ask-password-agent.h"
+#include "util.h"
+
+static pid_t agent_pid = 0;
+
+int ask_password_agent_open(void) {
+ int r;
+
+ if (agent_pid > 0)
+ return 0;
+
+ /* We check STDIN here, not STDOUT, since this is about input,
+ * not output */
+ if (!isatty(STDIN_FILENO))
+ return 0;
+
+ if (!is_main_thread())
+ return -EPERM;
+
+ r = fork_agent("(sd-askpwagent)",
+ NULL, 0,
+ &agent_pid,
+ SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH,
+ SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH, "--watch", NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fork TTY ask password agent: %m");
+
+ return 1;
+}
+
+void ask_password_agent_close(void) {
+
+ if (agent_pid <= 0)
+ return;
+
+ /* Inform agent that we are done */
+ (void) kill_and_sigcont(agent_pid, SIGTERM);
+ (void) wait_for_terminate(agent_pid, NULL);
+ agent_pid = 0;
+}
+
+int ask_password_agent_open_if_enabled(BusTransport transport, bool ask_password) {
+
+ /* Open the ask password agent as a child process if necessary */
+
+ if (transport != BUS_TRANSPORT_LOCAL)
+ return 0;
+
+ if (!ask_password)
+ return 0;
+
+ return ask_password_agent_open();
+}
diff --git a/src/shared/spawn-ask-password-agent.h b/src/shared/spawn-ask-password-agent.h
new file mode 100644
index 0000000..a76cdb1
--- /dev/null
+++ b/src/shared/spawn-ask-password-agent.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "bus-util.h"
+
+int ask_password_agent_open(void);
+void ask_password_agent_close(void);
+
+int ask_password_agent_open_if_enabled(BusTransport transport, bool ask_password);
diff --git a/src/shared/spawn-polkit-agent.c b/src/shared/spawn-polkit-agent.c
new file mode 100644
index 0000000..a0024eb
--- /dev/null
+++ b/src/shared/spawn-polkit-agent.c
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "io-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "spawn-polkit-agent.h"
+#include "stdio-util.h"
+#include "time-util.h"
+#include "util.h"
+
+#if ENABLE_POLKIT
+static pid_t agent_pid = 0;
+
+int polkit_agent_open(void) {
+ char notify_fd[DECIMAL_STR_MAX(int) + 1];
+ int pipe_fd[2], r;
+
+ if (agent_pid > 0)
+ return 0;
+
+ /* Clients that run as root don't need to activate/query polkit */
+ if (geteuid() == 0)
+ return 0;
+
+ /* We check STDIN here, not STDOUT, since this is about input, not output */
+ if (!isatty(STDIN_FILENO))
+ return 0;
+
+ if (!is_main_thread())
+ return -EPERM;
+
+ if (pipe2(pipe_fd, 0) < 0)
+ return -errno;
+
+ xsprintf(notify_fd, "%i", pipe_fd[1]);
+
+ r = fork_agent("(polkit-agent)",
+ &pipe_fd[1], 1,
+ &agent_pid,
+ POLKIT_AGENT_BINARY_PATH,
+ POLKIT_AGENT_BINARY_PATH, "--notify-fd", notify_fd, "--fallback", NULL);
+
+ /* Close the writing side, because that's the one for the agent */
+ safe_close(pipe_fd[1]);
+
+ if (r < 0)
+ log_error_errno(r, "Failed to fork TTY ask password agent: %m");
+ else
+ /* Wait until the agent closes the fd */
+ fd_wait_for_event(pipe_fd[0], POLLHUP, USEC_INFINITY);
+
+ safe_close(pipe_fd[0]);
+
+ return r;
+}
+
+void polkit_agent_close(void) {
+
+ if (agent_pid <= 0)
+ return;
+
+ /* Inform agent that we are done */
+ (void) kill_and_sigcont(agent_pid, SIGTERM);
+ (void) wait_for_terminate(agent_pid, NULL);
+ agent_pid = 0;
+}
+
+#else
+
+int polkit_agent_open(void) {
+ return 0;
+}
+
+void polkit_agent_close(void) {
+}
+
+#endif
+
+int polkit_agent_open_if_enabled(BusTransport transport, bool ask_password) {
+
+ /* Open the polkit agent as a child process if necessary */
+
+ if (transport != BUS_TRANSPORT_LOCAL)
+ return 0;
+
+ if (!ask_password)
+ return 0;
+
+ return polkit_agent_open();
+}
diff --git a/src/shared/spawn-polkit-agent.h b/src/shared/spawn-polkit-agent.h
new file mode 100644
index 0000000..325dfdd
--- /dev/null
+++ b/src/shared/spawn-polkit-agent.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "bus-util.h"
+
+int polkit_agent_open(void);
+void polkit_agent_close(void);
+
+int polkit_agent_open_if_enabled(BusTransport transport, bool ask_password);
diff --git a/src/shared/specifier.c b/src/shared/specifier.c
new file mode 100644
index 0000000..86731f8
--- /dev/null
+++ b/src/shared/specifier.c
@@ -0,0 +1,358 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/utsname.h>
+
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "architecture.h"
+#include "format-util.h"
+#include "fs-util.h"
+#include "hostname-util.h"
+#include "macro.h"
+#include "os-util.h"
+#include "specifier.h"
+#include "string-util.h"
+#include "strv.h"
+#include "user-util.h"
+
+/*
+ * Generic infrastructure for replacing %x style specifiers in
+ * strings. Will call a callback for each replacement.
+ */
+
+/* Any ASCII character or digit: our pool of potential specifiers,
+ * and "%" used for escaping. */
+#define POSSIBLE_SPECIFIERS ALPHANUMERICAL "%"
+
+int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **_ret) {
+ size_t l, allocated = 0;
+ _cleanup_free_ char *ret = NULL;
+ char *t;
+ const char *f;
+ bool percent = false;
+ int r;
+
+ assert(text);
+ assert(table);
+
+ l = strlen(text);
+ if (!GREEDY_REALLOC(ret, allocated, l + 1))
+ return -ENOMEM;
+ t = ret;
+
+ for (f = text; *f; f++, l--)
+ if (percent) {
+ if (*f == '%')
+ *(t++) = '%';
+ else {
+ const Specifier *i;
+
+ for (i = table; i->specifier; i++)
+ if (i->specifier == *f)
+ break;
+
+ if (i->lookup) {
+ _cleanup_free_ char *w = NULL;
+ size_t k, j;
+
+ r = i->lookup(i->specifier, i->data, userdata, &w);
+ if (r < 0)
+ return r;
+
+ j = t - ret;
+ k = strlen(w);
+
+ if (!GREEDY_REALLOC(ret, allocated, j + k + l + 1))
+ return -ENOMEM;
+ memcpy(ret + j, w, k);
+ t = ret + j + k;
+ } else if (strchr(POSSIBLE_SPECIFIERS, *f))
+ /* Oops, an unknown specifier. */
+ return -EBADSLT;
+ else {
+ *(t++) = '%';
+ *(t++) = *f;
+ }
+ }
+
+ percent = false;
+ } else if (*f == '%')
+ percent = true;
+ else
+ *(t++) = *f;
+
+ /* If string ended with a stray %, also end with % */
+ if (percent)
+ *(t++) = '%';
+ *(t++) = 0;
+
+ /* Try to deallocate unused bytes, but don't sweat it too much */
+ if ((size_t)(t - ret) < allocated) {
+ t = realloc(ret, t - ret);
+ if (t)
+ ret = t;
+ }
+
+ *_ret = TAKE_PTR(ret);
+ return 0;
+}
+
+/* Generic handler for simple string replacements */
+
+int specifier_string(char specifier, const void *data, const void *userdata, char **ret) {
+ char *n;
+
+ n = strdup(strempty(data));
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret) {
+ sd_id128_t id;
+ char *n;
+ int r;
+
+ r = sd_id128_get_machine(&id);
+ if (r < 0)
+ return r;
+
+ n = new(char, 33);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = sd_id128_to_string(id, n);
+ return 0;
+}
+
+int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret) {
+ sd_id128_t id;
+ char *n;
+ int r;
+
+ r = sd_id128_get_boot(&id);
+ if (r < 0)
+ return r;
+
+ n = new(char, 33);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = sd_id128_to_string(id, n);
+ return 0;
+}
+
+int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *n;
+
+ n = gethostname_malloc();
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_short_host_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *n;
+
+ n = gethostname_short_malloc();
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret) {
+ struct utsname uts;
+ char *n;
+ int r;
+
+ r = uname(&uts);
+ if (r < 0)
+ return -errno;
+
+ n = strdup(uts.release);
+ if (!n)
+ return -ENOMEM;
+
+ *ret = n;
+ return 0;
+}
+
+int specifier_architecture(char specifier, const void *data, const void *userdata, char **ret) {
+ char *t;
+
+ t = strdup(architecture_to_string(uname_architecture()));
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+static int specifier_os_release_common(const char *field, char **ret) {
+ char *t = NULL;
+ int r;
+
+ r = parse_os_release(NULL, field, &t, NULL);
+ if (r < 0)
+ return r;
+ if (!t) {
+ /* fields in /etc/os-release might quite possibly be missing, even if everything is entirely
+ * valid otherwise. Let's hence return "" in that case. */
+ t = strdup("");
+ if (!t)
+ return -ENOMEM;
+ }
+
+ *ret = t;
+ return 0;
+}
+
+int specifier_os_id(char specifier, const void *data, const void *userdata, char **ret) {
+ return specifier_os_release_common("ID", ret);
+}
+
+int specifier_os_version_id(char specifier, const void *data, const void *userdata, char **ret) {
+ return specifier_os_release_common("VERSION_ID", ret);
+}
+
+int specifier_os_build_id(char specifier, const void *data, const void *userdata, char **ret) {
+ return specifier_os_release_common("BUILD_ID", ret);
+}
+
+int specifier_os_variant_id(char specifier, const void *data, const void *userdata, char **ret) {
+ return specifier_os_release_common("VARIANT_ID", ret);
+}
+
+int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *t;
+
+ t = gid_to_name(getgid());
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret) {
+ if (asprintf(ret, UID_FMT, getgid()) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret) {
+ char *t;
+
+ /* If we are UID 0 (root), this will not result in NSS, otherwise it might. This is good, as we want to be able
+ * to run this in PID 1, where our user ID is 0, but where NSS lookups are not allowed.
+
+ * We don't use getusername_malloc() here, because we don't want to look at $USER, to remain consistent with
+ * specifer_user_id() below.
+ */
+
+ t = uid_to_name(getuid());
+ if (!t)
+ return -ENOMEM;
+
+ *ret = t;
+ return 0;
+}
+
+int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret) {
+
+ if (asprintf(ret, UID_FMT, getuid()) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret) {
+
+ /* On PID 1 (which runs as root) this will not result in NSS,
+ * which is good. See above */
+
+ return get_home_dir(ret);
+}
+
+int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret) {
+
+ /* On PID 1 (which runs as root) this will not result in NSS,
+ * which is good. See above */
+
+ return get_shell(ret);
+}
+
+int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) {
+ const char *p;
+ char *copy;
+ int r;
+
+ r = tmp_dir(&p);
+ if (r < 0)
+ return r;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+}
+
+int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) {
+ const char *p;
+ char *copy;
+ int r;
+
+ r = var_tmp_dir(&p);
+ if (r < 0)
+ return r;
+
+ copy = strdup(p);
+ if (!copy)
+ return -ENOMEM;
+
+ *ret = copy;
+ return 0;
+}
+
+int specifier_escape_strv(char **l, char ***ret) {
+ char **z, **p, **q;
+
+ assert(ret);
+
+ if (strv_isempty(l)) {
+ *ret = NULL;
+ return 0;
+ }
+
+ z = new(char*, strv_length(l)+1);
+ if (!z)
+ return -ENOMEM;
+
+ for (p = l, q = z; *p; p++, q++) {
+
+ *q = specifier_escape(*p);
+ if (!*q) {
+ strv_free(z);
+ return -ENOMEM;
+ }
+ }
+
+ *q = NULL;
+ *ret = z;
+
+ return 0;
+}
diff --git a/src/shared/specifier.h b/src/shared/specifier.h
new file mode 100644
index 0000000..1323b41
--- /dev/null
+++ b/src/shared/specifier.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "string-util.h"
+
+typedef int (*SpecifierCallback)(char specifier, const void *data, const void *userdata, char **ret);
+
+typedef struct Specifier {
+ const char specifier;
+ const SpecifierCallback lookup;
+ const void *data;
+} Specifier;
+
+int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **ret);
+
+int specifier_string(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_short_host_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_architecture(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_os_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_os_version_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_os_build_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_os_variant_id(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret);
+
+int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret);
+int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret);
+
+/* Typically, in places where one of the above specifier is to be resolved the other similar ones are to be
+ * resolved, too. Hence let's define common macros for the relevant array entries.
+ *
+ * COMMON_SYSTEM_SPECIFIERS:
+ * %a: the native userspace architecture
+ * %b: the boot ID of the running system
+ * %B: the OS build ID, according to /etc/os-release
+ * %H: the hostname of the running system
+ * %l: the short hostname of the running system
+ * %m: the machine ID of the running system
+ * %o: the OS ID according to /etc/os-release
+ * %v: the kernel version
+ * %w: the OS version ID, according to /etc/os-release
+ * %W: the OS variant ID, according to /etc/os-release
+ *
+ * COMMON_CREDS_SPECIFIERS:
+ * %g: the groupname of the running user
+ * %G: the GID of the running user
+ * %u: the username of the running user
+ * %U: the UID of the running user
+ *
+ * COMMON_TMP_SPECIFIERS:
+ * %T: the temporary directory (e.g. /tmp, or $TMPDIR, $TEMP, $TMP)
+ * %V: the temporary directory for large, persistent stuff (e.g. /var/tmp, or $TMPDIR, $TEMP, $TMP)
+ */
+
+#define COMMON_SYSTEM_SPECIFIERS \
+ { 'a', specifier_architecture, NULL }, \
+ { 'b', specifier_boot_id, NULL }, \
+ { 'B', specifier_os_build_id, NULL }, \
+ { 'H', specifier_host_name, NULL }, \
+ { 'l', specifier_short_host_name, NULL }, \
+ { 'm', specifier_machine_id, NULL }, \
+ { 'o', specifier_os_id, NULL }, \
+ { 'v', specifier_kernel_release, NULL }, \
+ { 'w', specifier_os_version_id, NULL }, \
+ { 'W', specifier_os_variant_id, NULL }
+
+#define COMMON_CREDS_SPECIFIERS \
+ { 'g', specifier_group_name, NULL }, \
+ { 'G', specifier_group_id, NULL }, \
+ { 'u', specifier_user_name, NULL }, \
+ { 'U', specifier_user_id, NULL }
+
+#define COMMON_TMP_SPECIFIERS \
+ { 'T', specifier_tmp_dir, NULL }, \
+ { 'V', specifier_var_tmp_dir, NULL }
+
+static inline char* specifier_escape(const char *string) {
+ return strreplace(string, "%", "%%");
+}
+
+int specifier_escape_strv(char **l, char ***ret);
diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c
new file mode 100644
index 0000000..e0dd17a
--- /dev/null
+++ b/src/shared/switch-root.c
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "base-filesystem.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "missing_syscall.h"
+#include "mkdir.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "path-util.h"
+#include "rm-rf.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "switch-root.h"
+#include "user-util.h"
+#include "util.h"
+
+int switch_root(const char *new_root,
+ const char *old_root_after, /* path below the new root, where to place the old root after the transition */
+ bool unmount_old_root,
+ unsigned long mount_flags) { /* MS_MOVE or MS_BIND */
+
+ _cleanup_free_ char *resolved_old_root_after = NULL;
+ _cleanup_close_ int old_root_fd = -1;
+ bool old_root_remove;
+ const char *i;
+ int r;
+
+ assert(new_root);
+ assert(old_root_after);
+
+ if (path_equal(new_root, "/"))
+ return 0;
+
+ /* Check if we shall remove the contents of the old root */
+ old_root_remove = in_initrd();
+ if (old_root_remove) {
+ old_root_fd = open("/", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
+ if (old_root_fd < 0)
+ return log_error_errno(errno, "Failed to open root directory: %m");
+ }
+
+ /* Determine where we shall place the old root after the transition */
+ r = chase_symlinks(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after);
+ if (r == 0) /* Doesn't exist yet. Let's create it */
+ (void) mkdir_p_label(resolved_old_root_after, 0755);
+
+ /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted MS_SHARED. Hence
+ * remount them MS_PRIVATE here as a work-around.
+ *
+ * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */
+ if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0)
+ return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m");
+
+ FOREACH_STRING(i, "/sys", "/dev", "/run", "/proc") {
+ _cleanup_free_ char *chased = NULL;
+
+ r = chase_symlinks(i, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &chased, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, i);
+ if (r > 0) {
+ /* Already exists. Let's see if it is a mount point already. */
+ r = path_is_mount_point(chased, NULL, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased);
+ if (r > 0) /* If it is already mounted, then do nothing */
+ continue;
+ } else
+ /* Doesn't exist yet? */
+ (void) mkdir_p_label(chased, 0755);
+
+ if (mount(i, chased, NULL, mount_flags, NULL) < 0)
+ return log_error_errno(errno, "Failed to mount %s to %s: %m", i, chased);
+ }
+
+ /* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants
+ * them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error
+ * and switch_root() nevertheless. */
+ (void) base_filesystem_create(new_root, UID_INVALID, GID_INVALID);
+
+ if (chdir(new_root) < 0)
+ return log_error_errno(errno, "Failed to change directory to %s: %m", new_root);
+
+ /* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /),
+ * that's not possible however, and hence we simply overmount root */
+ if (pivot_root(new_root, resolved_old_root_after) >= 0) {
+
+ /* Immediately get rid of the old root, if detach_oldroot is set.
+ * Since we are running off it we need to do this lazily. */
+ if (unmount_old_root) {
+ r = umount_recursive(old_root_after, MNT_DETACH);
+ if (r < 0)
+ log_warning_errno(r, "Failed to unmount old root directory tree, ignoring: %m");
+ }
+
+ } else if (mount(new_root, "/", NULL, MS_MOVE, NULL) < 0)
+ return log_error_errno(errno, "Failed to move %s to /: %m", new_root);
+
+ if (chroot(".") < 0)
+ return log_error_errno(errno, "Failed to change root: %m");
+
+ if (chdir("/") < 0)
+ return log_error_errno(errno, "Failed to change directory: %m");
+
+ if (old_root_fd >= 0) {
+ struct stat rb;
+
+ if (fstat(old_root_fd, &rb) < 0)
+ log_warning_errno(errno, "Failed to stat old root directory, leaving: %m");
+ else
+ (void) rm_rf_children(TAKE_FD(old_root_fd), 0, &rb); /* takes possession of the dir fd, even on failure */
+ }
+
+ return 0;
+}
diff --git a/src/shared/switch-root.h b/src/shared/switch-root.h
new file mode 100644
index 0000000..4e04283
--- /dev/null
+++ b/src/shared/switch-root.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+int switch_root(const char *new_root, const char *oldroot, bool detach_oldroot, unsigned long mountflags);
diff --git a/src/shared/syscall-names.text b/src/shared/syscall-names.text
new file mode 100644
index 0000000..f1b7e29
--- /dev/null
+++ b/src/shared/syscall-names.text
@@ -0,0 +1,598 @@
+_llseek
+_newselect
+_sysctl
+accept
+accept4
+access
+acct
+add_key
+adjtimex
+alarm
+arc_gettls
+arc_settls
+arc_usr_cmpxchg
+arch_prctl
+arm_fadvise64_64
+arm_sync_file_range
+atomic_barrier
+atomic_cmpxchg_32
+bdflush
+bfin_spinlock
+bind
+bpf
+brk
+cache_sync
+cachectl
+cacheflush
+capget
+capset
+chdir
+chmod
+chown
+chown32
+chroot
+clock_adjtime
+clock_adjtime64
+clock_getres
+clock_getres_time64
+clock_gettime
+clock_gettime64
+clock_nanosleep
+clock_nanosleep_time64
+clock_settime
+clock_settime64
+clone
+clone2
+clone3
+close
+close_range
+connect
+copy_file_range
+creat
+create_module
+delete_module
+dipc
+dup
+dup2
+dup3
+epoll_create
+epoll_create1
+epoll_ctl
+epoll_ctl_old
+epoll_pwait
+epoll_wait
+epoll_wait_old
+eventfd
+eventfd2
+exec_with_loader
+execv
+execve
+execveat
+exit
+exit_group
+faccessat
+faccessat2
+fadvise64
+fadvise64_64
+fallocate
+fanotify_init
+fanotify_mark
+fchdir
+fchmod
+fchmodat
+fchown
+fchown32
+fchownat
+fcntl
+fcntl64
+fdatasync
+fgetxattr
+finit_module
+flistxattr
+flock
+fork
+fp_udfiex_crtl
+fremovexattr
+fsconfig
+fsetxattr
+fsmount
+fsopen
+fspick
+fstat
+fstat64
+fstatat64
+fstatfs
+fstatfs64
+fsync
+ftruncate
+ftruncate64
+futex
+futex_time64
+futimesat
+get_kernel_syms
+get_mempolicy
+get_robust_list
+get_thread_area
+getcpu
+getcwd
+getdents
+getdents64
+getdomainname
+getdtablesize
+getegid
+getegid32
+geteuid
+geteuid32
+getgid
+getgid32
+getgroups
+getgroups32
+gethostname
+getitimer
+getpagesize
+getpeername
+getpgid
+getpgrp
+getpid
+getpmsg
+getppid
+getpriority
+getrandom
+getresgid
+getresgid32
+getresuid
+getresuid32
+getrlimit
+getrusage
+getsid
+getsockname
+getsockopt
+gettid
+gettimeofday
+getuid
+getuid32
+getunwind
+getxattr
+getxgid
+getxpid
+getxuid
+idle
+init_module
+inotify_add_watch
+inotify_init
+inotify_init1
+inotify_rm_watch
+io_cancel
+io_destroy
+io_getevents
+io_pgetevents
+io_pgetevents_time64
+io_setup
+io_submit
+io_uring_enter
+io_uring_register
+io_uring_setup
+ioctl
+ioperm
+iopl
+ioprio_get
+ioprio_set
+ipc
+kcmp
+kern_features
+kexec_file_load
+kexec_load
+keyctl
+kill
+lchown
+lchown32
+lgetxattr
+link
+linkat
+listen
+listxattr
+llistxattr
+lookup_dcookie
+lremovexattr
+lseek
+lsetxattr
+lstat
+lstat64
+madvise
+mbind
+membarrier
+memfd_create
+memory_ordering
+migrate_pages
+mincore
+mkdir
+mkdirat
+mknod
+mknodat
+mlock
+mlock2
+mlockall
+mmap
+mmap2
+modify_ldt
+mount
+move_mount
+move_pages
+mprotect
+mq_getsetattr
+mq_notify
+mq_open
+mq_timedreceive
+mq_timedreceive_time64
+mq_timedsend
+mq_timedsend_time64
+mq_unlink
+mremap
+msgctl
+msgget
+msgrcv
+msgsnd
+msync
+multiplexer
+munlock
+munlockall
+munmap
+name_to_handle_at
+nanosleep
+newfstatat
+nfsservctl
+ni_syscall
+nice
+old_adjtimex
+old_getpagesize
+oldfstat
+oldlstat
+oldolduname
+oldstat
+oldumount
+olduname
+open
+open_by_handle_at
+open_tree
+openat
+openat2
+or1k_atomic
+osf_adjtime
+osf_afs_syscall
+osf_alt_plock
+osf_alt_setsid
+osf_alt_sigpending
+osf_asynch_daemon
+osf_audcntl
+osf_audgen
+osf_chflags
+osf_execve
+osf_exportfs
+osf_fchflags
+osf_fdatasync
+osf_fpathconf
+osf_fstat
+osf_fstatfs
+osf_fstatfs64
+osf_fuser
+osf_getaddressconf
+osf_getdirentries
+osf_getdomainname
+osf_getfh
+osf_getfsstat
+osf_gethostid
+osf_getitimer
+osf_getlogin
+osf_getmnt
+osf_getrusage
+osf_getsysinfo
+osf_gettimeofday
+osf_kloadcall
+osf_kmodcall
+osf_lstat
+osf_memcntl
+osf_mincore
+osf_mount
+osf_mremap
+osf_msfs_syscall
+osf_msleep
+osf_mvalid
+osf_mwakeup
+osf_naccept
+osf_nfssvc
+osf_ngetpeername
+osf_ngetsockname
+osf_nrecvfrom
+osf_nrecvmsg
+osf_nsendmsg
+osf_ntp_adjtime
+osf_ntp_gettime
+osf_old_creat
+osf_old_fstat
+osf_old_getpgrp
+osf_old_killpg
+osf_old_lstat
+osf_old_open
+osf_old_sigaction
+osf_old_sigblock
+osf_old_sigreturn
+osf_old_sigsetmask
+osf_old_sigvec
+osf_old_stat
+osf_old_vadvise
+osf_old_vtrace
+osf_old_wait
+osf_oldquota
+osf_pathconf
+osf_pid_block
+osf_pid_unblock
+osf_plock
+osf_priocntlset
+osf_profil
+osf_proplist_syscall
+osf_reboot
+osf_revoke
+osf_sbrk
+osf_security
+osf_select
+osf_set_program_attributes
+osf_set_speculative
+osf_sethostid
+osf_setitimer
+osf_setlogin
+osf_setsysinfo
+osf_settimeofday
+osf_shmat
+osf_signal
+osf_sigprocmask
+osf_sigsendset
+osf_sigstack
+osf_sigwaitprim
+osf_sstk
+osf_stat
+osf_statfs
+osf_statfs64
+osf_subsys_info
+osf_swapctl
+osf_swapon
+osf_syscall
+osf_sysinfo
+osf_table
+osf_uadmin
+osf_usleep_thread
+osf_uswitch
+osf_utc_adjtime
+osf_utc_gettime
+osf_utimes
+osf_utsname
+osf_wait4
+osf_waitid
+pause
+pciconfig_iobase
+pciconfig_read
+pciconfig_write
+perf_event_open
+perfctr
+perfmonctl
+personality
+pidfd_getfd
+pidfd_open
+pidfd_send_signal
+pipe
+pipe2
+pivot_root
+pkey_alloc
+pkey_free
+pkey_mprotect
+poll
+ppoll
+ppoll_time64
+prctl
+pread64
+preadv
+preadv2
+prlimit64
+process_madvise
+process_vm_readv
+process_vm_writev
+pselect6
+pselect6_time64
+ptrace
+pwrite64
+pwritev
+pwritev2
+query_module
+quotactl
+read
+readahead
+readdir
+readlink
+readlinkat
+readv
+reboot
+recv
+recvfrom
+recvmmsg
+recvmmsg_time64
+recvmsg
+remap_file_pages
+removexattr
+rename
+renameat
+renameat2
+request_key
+restart_syscall
+riscv_flush_icache
+rmdir
+rseq
+rt_sigaction
+rt_sigpending
+rt_sigprocmask
+rt_sigqueueinfo
+rt_sigreturn
+rt_sigsuspend
+rt_sigtimedwait
+rt_sigtimedwait_time64
+rt_tgsigqueueinfo
+rtas
+s390_guarded_storage
+s390_pci_mmio_read
+s390_pci_mmio_write
+s390_runtime_instr
+s390_sthyi
+sched_get_affinity
+sched_get_priority_max
+sched_get_priority_min
+sched_getaffinity
+sched_getattr
+sched_getparam
+sched_getscheduler
+sched_rr_get_interval
+sched_rr_get_interval_time64
+sched_set_affinity
+sched_setaffinity
+sched_setattr
+sched_setparam
+sched_setscheduler
+sched_yield
+seccomp
+select
+semctl
+semget
+semop
+semtimedop
+semtimedop_time64
+send
+sendfile
+sendfile64
+sendmmsg
+sendmsg
+sendto
+set_mempolicy
+set_robust_list
+set_thread_area
+set_tid_address
+setdomainname
+setfsgid
+setfsgid32
+setfsuid
+setfsuid32
+setgid
+setgid32
+setgroups
+setgroups32
+sethae
+sethostname
+setitimer
+setns
+setpgid
+setpgrp
+setpriority
+setregid
+setregid32
+setresgid
+setresgid32
+setresuid
+setresuid32
+setreuid
+setreuid32
+setrlimit
+setsid
+setsockopt
+settimeofday
+setuid
+setuid32
+setxattr
+sgetmask
+shmat
+shmctl
+shmdt
+shmget
+shutdown
+sigaction
+sigaltstack
+signal
+signalfd
+signalfd4
+sigpending
+sigprocmask
+sigreturn
+sigsuspend
+socket
+socketcall
+socketpair
+splice
+spu_create
+spu_run
+ssetmask
+stat
+stat64
+statfs
+statfs64
+statx
+stime
+subpage_prot
+swapcontext
+swapoff
+swapon
+switch_endian
+symlink
+symlinkat
+sync
+sync_file_range
+sync_file_range2
+syncfs
+sys_debug_setcontext
+syscall
+sysfs
+sysinfo
+syslog
+sysmips
+tee
+tgkill
+time
+timer_create
+timer_delete
+timer_getoverrun
+timer_gettime
+timer_gettime64
+timer_settime
+timer_settime64
+timerfd
+timerfd_create
+timerfd_gettime
+timerfd_gettime64
+timerfd_settime
+timerfd_settime64
+times
+tkill
+truncate
+truncate64
+udftrap
+ugetrlimit
+umask
+umount
+umount2
+uname
+unlink
+unlinkat
+unshare
+uselib
+userfaultfd
+ustat
+utime
+utimensat
+utimensat_time64
+utimes
+utimesat
+utrap_install
+vfork
+vhangup
+vm86
+vm86old
+vmsplice
+wait4
+waitid
+waitpid
+write
+writev
diff --git a/src/shared/sysctl-util.c b/src/shared/sysctl-util.c
new file mode 100644
index 0000000..670c331
--- /dev/null
+++ b/src/shared/sysctl-util.c
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "fd-util.h"
+#include "fileio.h"
+#include "log.h"
+#include "macro.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "sysctl-util.h"
+
+char *sysctl_normalize(char *s) {
+ char *n;
+
+ n = strpbrk(s, "/.");
+
+ /* If the first separator is a slash, the path is
+ * assumed to be normalized and slashes remain slashes
+ * and dots remains dots. */
+
+ if (n && *n == '.')
+ /* Dots become slashes and slashes become dots. Fun. */
+ do {
+ if (*n == '.')
+ *n = '/';
+ else
+ *n = '.';
+
+ n = strpbrk(n + 1, "/.");
+ } while (n);
+
+ path_simplify(s, true);
+
+ /* Kill the leading slash, but keep the first character of the string in the same place. */
+ if (*s == '/' && *(s+1))
+ memmove(s, s+1, strlen(s));
+
+ return s;
+}
+
+int sysctl_write(const char *property, const char *value) {
+ char *p;
+ _cleanup_close_ int fd = -1;
+
+ assert(property);
+ assert(value);
+
+ log_debug("Setting '%s' to '%.*s'.", property, (int) strcspn(value, NEWLINE), value);
+
+ p = strjoina("/proc/sys/", property);
+ fd = open(p, O_WRONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ if (!endswith(value, "\n"))
+ value = strjoina(value, "\n");
+
+ if (write(fd, value, strlen(value)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+int sysctl_writef(const char *property, const char *format, ...) {
+ _cleanup_free_ char *v = NULL;
+ va_list ap;
+ int r;
+
+ va_start(ap, format);
+ r = vasprintf(&v, format, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return -ENOMEM;
+
+ return sysctl_write(property, v);
+}
+
+int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value) {
+ const char *p;
+
+ assert(IN_SET(af, AF_INET, AF_INET6));
+ assert(property);
+ assert(value);
+
+ p = strjoina("/proc/sys/net/ipv", af == AF_INET ? "4" : "6",
+ ifname ? "/conf/" : "", strempty(ifname),
+ property[0] == '/' ? "" : "/", property);
+
+ log_debug("Setting '%s' to '%s'", p, value);
+
+ return write_string_file(p, value, WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER);
+}
+
+int sysctl_read(const char *property, char **content) {
+ char *p;
+
+ assert(property);
+ assert(content);
+
+ p = strjoina("/proc/sys/", property);
+ return read_full_file(p, content, NULL);
+}
+
+int sysctl_read_ip_property(int af, const char *ifname, const char *property, char **ret) {
+ _cleanup_free_ char *value = NULL;
+ const char *p;
+ int r;
+
+ assert(IN_SET(af, AF_INET, AF_INET6));
+ assert(property);
+
+ p = strjoina("/proc/sys/net/ipv", af == AF_INET ? "4" : "6",
+ ifname ? "/conf/" : "", strempty(ifname),
+ property[0] == '/' ? "" : "/", property);
+
+ r = read_one_line_file(p, &value);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ *ret = TAKE_PTR(value);
+
+ return r;
+}
diff --git a/src/shared/sysctl-util.h b/src/shared/sysctl-util.h
new file mode 100644
index 0000000..3236419
--- /dev/null
+++ b/src/shared/sysctl-util.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "macro.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+char *sysctl_normalize(char *s);
+int sysctl_read(const char *property, char **value);
+int sysctl_write(const char *property, const char *value);
+int sysctl_writef(const char *property, const char *format, ...) _printf_(2, 3);
+
+int sysctl_read_ip_property(int af, const char *ifname, const char *property, char **ret);
+int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value);
+static inline int sysctl_write_ip_property_boolean(int af, const char *ifname, const char *property, bool value) {
+ return sysctl_write_ip_property(af, ifname, property, one_zero(value));
+}
+
+#define DEFINE_SYSCTL_WRITE_IP_PROPERTY(name, type, format) \
+ static inline int sysctl_write_ip_property_##name(int af, const char *ifname, const char *property, type value) { \
+ char buf[DECIMAL_STR_MAX(type)]; \
+ xsprintf(buf, format, value); \
+ return sysctl_write_ip_property(af, ifname, property, buf); \
+ }
+
+DEFINE_SYSCTL_WRITE_IP_PROPERTY(int, int, "%i");
+DEFINE_SYSCTL_WRITE_IP_PROPERTY(uint32, uint32_t, "%" PRIu32);
diff --git a/src/shared/test-tables.h b/src/shared/test-tables.h
new file mode 100644
index 0000000..bb8177b
--- /dev/null
+++ b/src/shared/test-tables.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef const char* (*lookup_t)(int);
+typedef int (*reverse_t)(const char*);
+
+static inline void _test_table(const char *name,
+ lookup_t lookup,
+ reverse_t reverse,
+ int size,
+ bool sparse) {
+ int i, boring = 0;
+
+ for (i = -1; i < size + 1; i++) {
+ const char* val = lookup(i);
+ int rev;
+
+ if (val) {
+ rev = reverse(val);
+ boring = 0;
+ } else {
+ rev = reverse("--no-such--value----");
+ boring += i >= 0;
+ }
+
+ if (boring < 1 || i == size)
+ printf("%s: %d → %s → %d\n", name, i, val, rev);
+ else if (boring == 1)
+ printf("%*s ...\n", (int) strlen(name), "");
+
+ assert_se(!(i >= 0 && i < size ?
+ sparse ? rev != i && rev != -1 : val == NULL || rev != i :
+ val != NULL || rev != -1));
+ }
+}
+
+#define test_table(lower, upper) \
+ _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, false)
+
+#define test_table_sparse(lower, upper) \
+ _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, true)
diff --git a/src/shared/tests.c b/src/shared/tests.c
new file mode 100644
index 0000000..ab7d799
--- /dev/null
+++ b/src/shared/tests.c
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <util.h>
+
+/* When we include libgen.h because we need dirname() we immediately
+ * undefine basename() since libgen.h defines it as a macro to the POSIX
+ * version which is really broken. We prefer GNU basename(). */
+#include <libgen.h>
+#undef basename
+
+#include "sd-bus.h"
+
+#include "alloc-util.h"
+#include "bus-error.h"
+#include "bus-locator.h"
+#include "bus-util.h"
+#include "bus-wait-for-jobs.h"
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "env-file.h"
+#include "env-util.h"
+#include "fs-util.h"
+#include "log.h"
+#include "namespace-util.h"
+#include "path-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "strv.h"
+#include "tests.h"
+
+char* setup_fake_runtime_dir(void) {
+ char t[] = "/tmp/fake-xdg-runtime-XXXXXX", *p;
+
+ assert_se(mkdtemp(t));
+ assert_se(setenv("XDG_RUNTIME_DIR", t, 1) >= 0);
+ assert_se(p = strdup(t));
+
+ return p;
+}
+
+static void load_testdata_env(void) {
+ static bool called = false;
+ _cleanup_free_ char *s = NULL;
+ _cleanup_free_ char *envpath = NULL;
+ _cleanup_strv_free_ char **pairs = NULL;
+ char **k, **v;
+
+ if (called)
+ return;
+ called = true;
+
+ assert_se(readlink_and_make_absolute("/proc/self/exe", &s) >= 0);
+ dirname(s);
+
+ envpath = path_join(s, "systemd-runtest.env");
+ if (load_env_file_pairs(NULL, envpath, &pairs) < 0)
+ return;
+
+ STRV_FOREACH_PAIR(k, v, pairs)
+ setenv(*k, *v, 0);
+}
+
+int get_testdata_dir(const char *suffix, char **ret) {
+ const char *dir;
+ char *p;
+
+ load_testdata_env();
+
+ /* if the env var is set, use that */
+ dir = getenv("SYSTEMD_TEST_DATA");
+ if (!dir)
+ dir = SYSTEMD_TEST_DATA;
+ if (access(dir, F_OK) < 0)
+ return log_error_errno(errno, "ERROR: $SYSTEMD_TEST_DATA directory [%s] not accessible: %m", dir);
+
+ p = path_join(dir, suffix);
+ if (!p)
+ return log_oom();
+
+ *ret = p;
+ return 0;
+}
+
+const char* get_catalog_dir(void) {
+ const char *env;
+
+ load_testdata_env();
+
+ /* if the env var is set, use that */
+ env = getenv("SYSTEMD_CATALOG_DIR");
+ if (!env)
+ env = SYSTEMD_CATALOG_DIR;
+ if (access(env, F_OK) < 0) {
+ fprintf(stderr, "ERROR: $SYSTEMD_CATALOG_DIR directory [%s] does not exist\n", env);
+ exit(EXIT_FAILURE);
+ }
+ return env;
+}
+
+bool slow_tests_enabled(void) {
+ int r;
+
+ r = getenv_bool("SYSTEMD_SLOW_TESTS");
+ if (r >= 0)
+ return r;
+
+ if (r != -ENXIO)
+ log_warning_errno(r, "Cannot parse $SYSTEMD_SLOW_TESTS, ignoring.");
+ return SYSTEMD_SLOW_TESTS_DEFAULT;
+}
+
+void test_setup_logging(int level) {
+ log_set_max_level(level);
+ log_parse_environment();
+ log_open();
+}
+
+int log_tests_skipped(const char *message) {
+ log_notice("%s: %s, skipping tests.",
+ program_invocation_short_name, message);
+ return EXIT_TEST_SKIP;
+}
+
+int log_tests_skipped_errno(int r, const char *message) {
+ log_notice_errno(r, "%s: %s, skipping tests: %m",
+ program_invocation_short_name, message);
+ return EXIT_TEST_SKIP;
+}
+
+bool have_namespaces(void) {
+ siginfo_t si = {};
+ pid_t pid;
+
+ /* Checks whether namespaces are available. In some cases they aren't. We do this by calling unshare(), and we
+ * do so in a child process in order not to affect our own process. */
+
+ pid = fork();
+ assert_se(pid >= 0);
+
+ if (pid == 0) {
+ /* child */
+ if (detach_mount_namespace() < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ assert_se(waitid(P_PID, pid, &si, WEXITED) >= 0);
+ assert_se(si.si_code == CLD_EXITED);
+
+ if (si.si_status == EXIT_SUCCESS)
+ return true;
+
+ if (si.si_status == EXIT_FAILURE)
+ return false;
+
+ assert_not_reached("unexpected exit code");
+}
+
+bool can_memlock(void) {
+ /* Let's see if we can mlock() a larger blob of memory. BPF programs are charged against
+ * RLIMIT_MEMLOCK, hence let's first make sure we can lock memory at all, and skip the test if we
+ * cannot. Why not check RLIMIT_MEMLOCK explicitly? Because in container environments the
+ * RLIMIT_MEMLOCK value we see might not match the RLIMIT_MEMLOCK value actually in effect. */
+
+ void *p = mmap(NULL, CAN_MEMLOCK_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
+ if (p == MAP_FAILED)
+ return false;
+
+ bool b = mlock(p, CAN_MEMLOCK_SIZE) >= 0;
+ if (b)
+ assert_se(munlock(p, CAN_MEMLOCK_SIZE) >= 0);
+
+ assert_se(munmap(p, CAN_MEMLOCK_SIZE) >= 0);
+ return b;
+}
+
+static int allocate_scope(void) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *w = NULL;
+ _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
+ _cleanup_free_ char *scope = NULL;
+ const char *object;
+ int r;
+
+ /* Let's try to run this test in a scope of its own, with delegation turned on, so that PID 1 doesn't
+ * interfere with our cgroup management. */
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to system bus: %m");
+
+ r = bus_wait_for_jobs_new(bus, &w);
+ if (r < 0)
+ return log_oom();
+
+ if (asprintf(&scope, "%s-%" PRIx64 ".scope", program_invocation_short_name, random_u64()) < 0)
+ return log_oom();
+
+ r = bus_message_new_method_call(bus, &m, bus_systemd_mgr, "StartTransientUnit");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Name and Mode */
+ r = sd_bus_message_append(m, "ss", scope, "fail");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Properties */
+ r = sd_bus_message_open_container(m, 'a', "(sv)");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "(sv)", "PIDs", "au", 1, (uint32_t) getpid_cached());
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "(sv)", "Delegate", "b", 1);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_append(m, "(sv)", "CollectMode", "s", "inactive-or-failed");
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_message_close_container(m);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ /* Auxiliary units */
+ r = sd_bus_message_append(m, "a(sa(sv))", 0);
+ if (r < 0)
+ return bus_log_create_error(r);
+
+ r = sd_bus_call(bus, m, 0, &error, &reply);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start transient scope unit: %s", bus_error_message(&error, r));
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = bus_wait_for_jobs_one(w, object, false);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int enter_cgroup(char **ret_cgroup, bool enter_subroot) {
+ _cleanup_free_ char *cgroup_root = NULL, *cgroup_subroot = NULL;
+ CGroupMask supported;
+ int r;
+
+ r = allocate_scope();
+ if (r < 0)
+ log_warning_errno(r, "Couldn't allocate a scope unit for this test, proceeding without.");
+
+ r = cg_pid_get_path(NULL, 0, &cgroup_root);
+ if (r == -ENOMEDIUM)
+ return log_warning_errno(r, "cg_pid_get_path(NULL, 0, ...) failed: %m");
+ assert(r >= 0);
+
+ if (enter_subroot)
+ assert_se(asprintf(&cgroup_subroot, "%s/%" PRIx64, cgroup_root, random_u64()) >= 0);
+ else {
+ cgroup_subroot = strdup(cgroup_root);
+ assert_se(cgroup_subroot != NULL);
+ }
+
+ assert_se(cg_mask_supported(&supported) >= 0);
+
+ /* If this fails, then we don't mind as the later cgroup operations will fail too, and it's fine if
+ * we handle any errors at that point. */
+
+ r = cg_create_everywhere(supported, _CGROUP_MASK_ALL, cgroup_subroot);
+ if (r < 0)
+ return r;
+
+ r = cg_attach_everywhere(supported, cgroup_subroot, 0, NULL, NULL);
+ if (r < 0)
+ return r;
+
+ if (ret_cgroup)
+ *ret_cgroup = TAKE_PTR(cgroup_subroot);
+
+ return 0;
+}
+
+int enter_cgroup_subroot(char **ret_cgroup) {
+ return enter_cgroup(ret_cgroup, true);
+}
+
+int enter_cgroup_root(char **ret_cgroup) {
+ return enter_cgroup(ret_cgroup, false);
+}
+
+const char *ci_environment(void) {
+ /* We return a string because we might want to provide multiple bits of information later on: not
+ * just the general CI environment type, but also whether we're sanitizing or not, etc. The caller is
+ * expected to use strstr on the returned value. */
+ static const char *ans = POINTER_MAX;
+ const char *p;
+ int r;
+
+ if (ans != POINTER_MAX)
+ return ans;
+
+ /* We allow specifying the environment with $CITYPE. Nobody uses this so far, but we are ready. */
+ p = getenv("CITYPE");
+ if (!isempty(p))
+ return (ans = p);
+
+ if (getenv_bool("TRAVIS") > 0)
+ return (ans = "travis");
+ if (getenv_bool("SEMAPHORE") > 0)
+ return (ans = "semaphore");
+ if (getenv_bool("GITHUB_ACTIONS") > 0)
+ return (ans = "github-actions");
+ if (getenv("AUTOPKGTEST_ARTIFACTS") || getenv("AUTOPKGTEST_TMP"))
+ return (ans = "autopkgtest");
+
+ FOREACH_STRING(p, "CI", "CONTINOUS_INTEGRATION") {
+ /* Those vars are booleans according to Semaphore and Travis docs:
+ * https://docs.travis-ci.com/user/environment-variables/#default-environment-variables
+ * https://docs.semaphoreci.com/ci-cd-environment/environment-variables/#ci
+ */
+ r = getenv_bool(p);
+ if (r > 0)
+ return (ans = "unknown"); /* Some other unknown thing */
+ if (r == 0)
+ return (ans = NULL);
+ }
+
+ return (ans = NULL);
+}
diff --git a/src/shared/tests.h b/src/shared/tests.h
new file mode 100644
index 0000000..c135076
--- /dev/null
+++ b/src/shared/tests.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "sd-daemon.h"
+
+#include "macro.h"
+
+static inline bool manager_errno_skip_test(int r) {
+ return IN_SET(abs(r),
+ EPERM,
+ EACCES,
+ EADDRINUSE,
+ EHOSTDOWN,
+ ENOENT,
+ ENOMEDIUM /* cannot determine cgroup */
+ );
+}
+
+char* setup_fake_runtime_dir(void);
+int enter_cgroup_subroot(char **ret_cgroup);
+int enter_cgroup_root(char **ret_cgroup);
+int get_testdata_dir(const char *suffix, char **ret);
+const char* get_catalog_dir(void);
+bool slow_tests_enabled(void);
+void test_setup_logging(int level);
+int log_tests_skipped(const char *message);
+int log_tests_skipped_errno(int r, const char *message);
+
+bool have_namespaces(void);
+
+/* We use the small but non-trivial limit here */
+#define CAN_MEMLOCK_SIZE (512 * 1024U)
+bool can_memlock(void);
+
+#define TEST_REQ_RUNNING_SYSTEMD(x) \
+ if (sd_booted() > 0) { \
+ x; \
+ } else { \
+ printf("systemd not booted skipping '%s'\n", #x); \
+ }
+
+/* Provide a convenient way to check if we're running in CI. */
+const char *ci_environment(void);
diff --git a/src/shared/tmpfile-util-label.c b/src/shared/tmpfile-util-label.c
new file mode 100644
index 0000000..d37c0b0
--- /dev/null
+++ b/src/shared/tmpfile-util-label.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/stat.h>
+
+#include "selinux-util.h"
+#include "tmpfile-util-label.h"
+#include "tmpfile-util.h"
+
+int fopen_temporary_label(
+ const char *target,
+ const char *path,
+ FILE **f,
+ char **temp_path) {
+
+ int r;
+
+ r = mac_selinux_create_file_prepare(target, S_IFREG);
+ if (r < 0)
+ return r;
+
+ r = fopen_temporary(path, f, temp_path);
+
+ mac_selinux_create_file_clear();
+
+ return r;
+}
diff --git a/src/shared/tmpfile-util-label.h b/src/shared/tmpfile-util-label.h
new file mode 100644
index 0000000..01afc06
--- /dev/null
+++ b/src/shared/tmpfile-util-label.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdio.h>
+
+/* These functions are split out of tmpfile-util.h (and not for example just flags to the functions they wrap) in order
+ * to optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but
+ * not for all */
+
+int fopen_temporary_label(const char *target, const char *path, FILE **f, char **temp_path);
diff --git a/src/shared/tomoyo-util.c b/src/shared/tomoyo-util.c
new file mode 100644
index 0000000..2347179
--- /dev/null
+++ b/src/shared/tomoyo-util.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "tomoyo-util.h"
+
+bool mac_tomoyo_use(void) {
+ static int cached_use = -1;
+
+ if (cached_use < 0)
+ cached_use = (access("/sys/kernel/security/tomoyo/version",
+ F_OK) == 0);
+
+ return cached_use;
+}
diff --git a/src/shared/tomoyo-util.h b/src/shared/tomoyo-util.h
new file mode 100644
index 0000000..a6ee7d4
--- /dev/null
+++ b/src/shared/tomoyo-util.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+bool mac_tomoyo_use(void);
diff --git a/src/shared/udev-util.c b/src/shared/udev-util.c
new file mode 100644
index 0000000..030922e
--- /dev/null
+++ b/src/shared/udev-util.c
@@ -0,0 +1,371 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "device-util.h"
+#include "env-file.h"
+#include "escape.h"
+#include "log.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "signal-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "udev-util.h"
+#include "utf8.h"
+
+static const char* const resolve_name_timing_table[_RESOLVE_NAME_TIMING_MAX] = {
+ [RESOLVE_NAME_NEVER] = "never",
+ [RESOLVE_NAME_LATE] = "late",
+ [RESOLVE_NAME_EARLY] = "early",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(resolve_name_timing, ResolveNameTiming);
+
+int udev_parse_config_full(
+ unsigned *ret_children_max,
+ usec_t *ret_exec_delay_usec,
+ usec_t *ret_event_timeout_usec,
+ ResolveNameTiming *ret_resolve_name_timing,
+ int *ret_timeout_signal) {
+
+ _cleanup_free_ char *log_val = NULL, *children_max = NULL, *exec_delay = NULL, *event_timeout = NULL, *resolve_names = NULL, *timeout_signal = NULL;
+ int r;
+
+ r = parse_env_file(NULL, "/etc/udev/udev.conf",
+ "udev_log", &log_val,
+ "children_max", &children_max,
+ "exec_delay", &exec_delay,
+ "event_timeout", &event_timeout,
+ "resolve_names", &resolve_names,
+ "timeout_signal", &timeout_signal);
+ if (r == -ENOENT)
+ return 0;
+ if (r < 0)
+ return r;
+
+ if (log_val) {
+ const char *log;
+ size_t n;
+
+ /* unquote */
+ n = strlen(log_val);
+ if (n >= 2 &&
+ ((log_val[0] == '"' && log_val[n-1] == '"') ||
+ (log_val[0] == '\'' && log_val[n-1] == '\''))) {
+ log_val[n - 1] = '\0';
+ log = log_val + 1;
+ } else
+ log = log_val;
+
+ /* we set the udev log level here explicitly, this is supposed
+ * to regulate the code in libudev/ and udev/. */
+ r = log_set_max_level_from_string_realm(LOG_REALM_UDEV, log);
+ if (r < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to set udev log level '%s', ignoring: %m", log);
+ }
+
+ if (ret_children_max && children_max) {
+ r = safe_atou(children_max, ret_children_max);
+ if (r < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to parse children_max=%s, ignoring: %m", children_max);
+ }
+
+ if (ret_exec_delay_usec && exec_delay) {
+ r = parse_sec(exec_delay, ret_exec_delay_usec);
+ if (r < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to parse exec_delay=%s, ignoring: %m", exec_delay);
+ }
+
+ if (ret_event_timeout_usec && event_timeout) {
+ r = parse_sec(event_timeout, ret_event_timeout_usec);
+ if (r < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to parse event_timeout=%s, ignoring: %m", event_timeout);
+ }
+
+ if (ret_resolve_name_timing && resolve_names) {
+ ResolveNameTiming t;
+
+ t = resolve_name_timing_from_string(resolve_names);
+ if (t < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to parse resolve_names=%s, ignoring.", resolve_names);
+ else
+ *ret_resolve_name_timing = t;
+ }
+
+ if (ret_timeout_signal && timeout_signal) {
+ r = signal_from_string(timeout_signal);
+ if (r < 0)
+ log_syntax(NULL, LOG_WARNING, "/etc/udev/udev.conf", 0, r,
+ "failed to parse timeout_signal=%s, ignoring: %m", timeout_signal);
+ else
+ *ret_timeout_signal = r;
+ }
+
+ return 0;
+}
+
+/* Note that if -ENOENT is returned, it will be logged at debug level rather than error,
+ * because it's an expected, common occurrence that the caller will handle with a fallback */
+static int device_new_from_dev_path(const char *devlink, sd_device **ret_device) {
+ struct stat st;
+ int r;
+
+ assert(devlink);
+
+ if (stat(devlink, &st) < 0)
+ return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
+ "Failed to stat() %s: %m", devlink);
+
+ if (!S_ISBLK(st.st_mode))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK),
+ "%s does not point to a block device: %m", devlink);
+
+ r = sd_device_new_from_devnum(ret_device, 'b', st.st_rdev);
+ if (r < 0)
+ return log_error_errno(r, "Failed to initialize device from %s: %m", devlink);
+
+ return 0;
+}
+
+struct DeviceMonitorData {
+ const char *sysname;
+ const char *devlink;
+ sd_device *device;
+};
+
+static void device_monitor_data_free(struct DeviceMonitorData *d) {
+ assert(d);
+
+ sd_device_unref(d->device);
+}
+
+static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
+ struct DeviceMonitorData *data = userdata;
+ const char *sysname;
+
+ assert(device);
+ assert(data);
+ assert(data->sysname || data->devlink);
+ assert(!data->device);
+
+ /* Ignore REMOVE events here. We are waiting for initialization after all, not de-initialization. We
+ * might see a REMOVE event from an earlier use of the device (devices by the same name are recycled
+ * by the kernel after all), which we should not get confused by. After all we cannot distinguish use
+ * cycles of the devices, as the udev queue is entirely asynchronous.
+ *
+ * If we see a REMOVE event here for the use cycle we actually care about then we won't notice of
+ * course, but that should be OK, given the timeout logic used on the wait loop: this will be noticed
+ * by means of -ETIMEDOUT. Thus we won't notice immediately, but eventually, and that should be
+ * sufficient for an error path that should regularly not happen.
+ *
+ * (And yes, we only need to special case REMOVE. It's the only "negative" event type, where a device
+ * ceases to exist. All other event types are "positive": the device exists and is registered in the
+ * udev database, thus whenever we see the event, we can consider it initialized.) */
+ if (device_for_action(device, DEVICE_ACTION_REMOVE))
+ return 0;
+
+ if (data->sysname && sd_device_get_sysname(device, &sysname) >= 0 && streq(sysname, data->sysname))
+ goto found;
+
+ if (data->devlink) {
+ const char *devlink;
+
+ FOREACH_DEVICE_DEVLINK(device, devlink)
+ if (path_equal(devlink, data->devlink))
+ goto found;
+
+ if (sd_device_get_devname(device, &devlink) >= 0 && path_equal(devlink, data->devlink))
+ goto found;
+ }
+
+ return 0;
+
+found:
+ data->device = sd_device_ref(device);
+ return sd_event_exit(sd_device_monitor_get_event(monitor), 0);
+}
+
+static int device_wait_for_initialization_internal(
+ sd_device *_device,
+ const char *devlink,
+ const char *subsystem,
+ usec_t deadline,
+ sd_device **ret) {
+
+ _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
+ _cleanup_(sd_event_source_unrefp) sd_event_source *timeout_source = NULL;
+ _cleanup_(sd_event_unrefp) sd_event *event = NULL;
+ /* Ensure that if !_device && devlink, device gets unrefd on errors since it will be new */
+ _cleanup_(sd_device_unrefp) sd_device *device = sd_device_ref(_device);
+ _cleanup_(device_monitor_data_free) struct DeviceMonitorData data = {
+ .devlink = devlink,
+ };
+ int r;
+
+ assert(device || (subsystem && devlink));
+
+ /* Devlink might already exist, if it does get the device to use the sysname filtering */
+ if (!device && devlink) {
+ r = device_new_from_dev_path(devlink, &device);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ }
+
+ if (device) {
+ if (sd_device_get_is_initialized(device) > 0) {
+ if (ret)
+ *ret = sd_device_ref(device);
+ return 0;
+ }
+ /* We need either the sysname or the devlink for filtering */
+ assert_se(sd_device_get_sysname(device, &data.sysname) >= 0 || devlink);
+ }
+
+ /* Wait until the device is initialized, so that we can get access to the ID_PATH property */
+
+ r = sd_event_new(&event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get default event: %m");
+
+ r = sd_device_monitor_new(&monitor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to acquire monitor: %m");
+
+ if (device && !subsystem) {
+ r = sd_device_get_subsystem(device, &subsystem);
+ if (r < 0 && r != -ENOENT)
+ return log_device_error_errno(device, r, "Failed to get subsystem: %m");
+ }
+
+ if (subsystem) {
+ r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, subsystem, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add %s subsystem match to monitor: %m", subsystem);
+ }
+
+ r = sd_device_monitor_attach_event(monitor, event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to attach event to device monitor: %m");
+
+ r = sd_device_monitor_start(monitor, device_monitor_handler, &data);
+ if (r < 0)
+ return log_error_errno(r, "Failed to start device monitor: %m");
+
+ if (deadline != USEC_INFINITY) {
+ r = sd_event_add_time(
+ event, &timeout_source,
+ CLOCK_MONOTONIC, deadline, 0,
+ NULL, INT_TO_PTR(-ETIMEDOUT));
+ if (r < 0)
+ return log_error_errno(r, "Failed to add timeout event source: %m");
+ }
+
+ /* Check again, maybe things changed. Udev will re-read the db if the device wasn't initialized
+ * yet. */
+ if (!device && devlink) {
+ r = device_new_from_dev_path(devlink, &device);
+ if (r < 0 && r != -ENOENT)
+ return r;
+ }
+ if (device && sd_device_get_is_initialized(device) > 0) {
+ if (ret)
+ *ret = sd_device_ref(device);
+ return 0;
+ }
+
+ r = sd_event_loop(event);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for device to be initialized: %m");
+
+ if (ret)
+ *ret = TAKE_PTR(data.device);
+ return 0;
+}
+
+int device_wait_for_initialization(sd_device *device, const char *subsystem, usec_t deadline, sd_device **ret) {
+ return device_wait_for_initialization_internal(device, NULL, subsystem, deadline, ret);
+}
+
+int device_wait_for_devlink(const char *devlink, const char *subsystem, usec_t deadline, sd_device **ret) {
+ return device_wait_for_initialization_internal(NULL, devlink, subsystem, deadline, ret);
+}
+
+int device_is_renaming(sd_device *dev) {
+ int r;
+
+ assert(dev);
+
+ r = sd_device_get_property_value(dev, "ID_RENAMING", NULL);
+ if (r == -ENOENT)
+ return false;
+ if (r < 0)
+ return r;
+
+ return true;
+}
+
+bool device_for_action(sd_device *dev, DeviceAction action) {
+ DeviceAction a;
+
+ assert(dev);
+
+ if (device_get_action(dev, &a) < 0)
+ return false;
+
+ return a == action;
+}
+
+int udev_rule_parse_value(char *str, char **ret_value, char **ret_endpos) {
+ char *i, *j;
+ int r;
+ bool is_escaped;
+
+ /* value must be double quotated */
+ is_escaped = str[0] == 'e';
+ str += is_escaped;
+ if (str[0] != '"')
+ return -EINVAL;
+ str++;
+
+ if (!is_escaped) {
+ /* unescape double quotation '\"'->'"' */
+ for (i = j = str; *i != '"'; i++, j++) {
+ if (*i == '\0')
+ return -EINVAL;
+ if (i[0] == '\\' && i[1] == '"')
+ i++;
+ *j = *i;
+ }
+ j[0] = '\0';
+ } else {
+ _cleanup_free_ char *unescaped = NULL;
+
+ /* find the end position of value */
+ for (i = str; *i != '"'; i++) {
+ if (i[0] == '\\')
+ i++;
+ if (*i == '\0')
+ return -EINVAL;
+ }
+ i[0] = '\0';
+
+ r = cunescape_length(str, i - str, 0, &unescaped);
+ if (r < 0)
+ return r;
+ assert(r <= i - str);
+ memcpy(str, unescaped, r + 1);
+ }
+
+ *ret_value = str;
+ *ret_endpos = i + 1;
+ return 0;
+}
diff --git a/src/shared/udev-util.h b/src/shared/udev-util.h
new file mode 100644
index 0000000..270861e
--- /dev/null
+++ b/src/shared/udev-util.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-device.h"
+
+#include "device-private.h"
+#include "time-util.h"
+
+typedef enum ResolveNameTiming {
+ RESOLVE_NAME_NEVER,
+ RESOLVE_NAME_LATE,
+ RESOLVE_NAME_EARLY,
+ _RESOLVE_NAME_TIMING_MAX,
+ _RESOLVE_NAME_TIMING_INVALID = -1,
+} ResolveNameTiming;
+
+ResolveNameTiming resolve_name_timing_from_string(const char *s) _pure_;
+const char *resolve_name_timing_to_string(ResolveNameTiming i) _const_;
+
+int udev_parse_config_full(
+ unsigned *ret_children_max,
+ usec_t *ret_exec_delay_usec,
+ usec_t *ret_event_timeout_usec,
+ ResolveNameTiming *ret_resolve_name_timing,
+ int *ret_timeout_signal);
+
+static inline int udev_parse_config(void) {
+ return udev_parse_config_full(NULL, NULL, NULL, NULL, NULL);
+}
+
+int device_wait_for_initialization(sd_device *device, const char *subsystem, usec_t deadline, sd_device **ret);
+int device_wait_for_devlink(const char *path, const char *subsystem, usec_t deadline, sd_device **ret);
+int device_is_renaming(sd_device *dev);
+bool device_for_action(sd_device *dev, DeviceAction action);
+
+int udev_rule_parse_value(char *str, char **ret_value, char **ret_endpos);
diff --git a/src/shared/uid-range.c b/src/shared/uid-range.c
new file mode 100644
index 0000000..5d5bf7f
--- /dev/null
+++ b/src/shared/uid-range.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "sort-util.h"
+#include "uid-range.h"
+#include "user-util.h"
+
+static bool uid_range_intersect(UidRange *range, uid_t start, uid_t nr) {
+ assert(range);
+
+ return range->start <= start + nr &&
+ range->start + range->nr >= start;
+}
+
+static void uid_range_coalesce(UidRange **p, unsigned *n) {
+ assert(p);
+ assert(n);
+
+ for (unsigned i = 0; i < *n; i++) {
+ for (unsigned j = i + 1; j < *n; j++) {
+ UidRange *x = (*p)+i, *y = (*p)+j;
+
+ if (uid_range_intersect(x, y->start, y->nr)) {
+ uid_t begin, end;
+
+ begin = MIN(x->start, y->start);
+ end = MAX(x->start + x->nr, y->start + y->nr);
+
+ x->start = begin;
+ x->nr = end - begin;
+
+ if (*n > j+1)
+ memmove(y, y+1, sizeof(UidRange) * (*n - j -1));
+
+ (*n)--;
+ j--;
+ }
+ }
+ }
+}
+
+static int uid_range_compare(const UidRange *a, const UidRange *b) {
+ int r;
+
+ r = CMP(a->start, b->start);
+ if (r != 0)
+ return r;
+
+ return CMP(a->nr, b->nr);
+}
+
+int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr) {
+ bool found = false;
+ UidRange *x;
+
+ assert(p);
+ assert(n);
+
+ if (nr <= 0)
+ return 0;
+
+ for (unsigned i = 0; i < *n; i++) {
+ x = (*p) + i;
+ if (uid_range_intersect(x, start, nr)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ uid_t begin, end;
+
+ begin = MIN(x->start, start);
+ end = MAX(x->start + x->nr, start + nr);
+
+ x->start = begin;
+ x->nr = end - begin;
+ } else {
+ UidRange *t;
+
+ t = reallocarray(*p, *n + 1, sizeof(UidRange));
+ if (!t)
+ return -ENOMEM;
+
+ *p = t;
+ x = t + ((*n) ++);
+
+ x->start = start;
+ x->nr = nr;
+ }
+
+ typesafe_qsort(*p, *n, uid_range_compare);
+ uid_range_coalesce(p, n);
+
+ return *n;
+}
+
+int uid_range_add_str(UidRange **p, unsigned *n, const char *s) {
+ uid_t start, nr;
+ const char *t;
+ int r;
+
+ assert(p);
+ assert(n);
+ assert(s);
+
+ t = strchr(s, '-');
+ if (t) {
+ char *b;
+ uid_t end;
+
+ b = strndupa(s, t - s);
+ r = parse_uid(b, &start);
+ if (r < 0)
+ return r;
+
+ r = parse_uid(t+1, &end);
+ if (r < 0)
+ return r;
+
+ if (end < start)
+ return -EINVAL;
+
+ nr = end - start + 1;
+ } else {
+ r = parse_uid(s, &start);
+ if (r < 0)
+ return r;
+
+ nr = 1;
+ }
+
+ return uid_range_add(p, n, start, nr);
+}
+
+int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid) {
+ uid_t closest = UID_INVALID, candidate;
+
+ assert(p);
+ assert(uid);
+
+ candidate = *uid - 1;
+
+ for (unsigned i = 0; i < n; i++) {
+ uid_t begin, end;
+
+ begin = p[i].start;
+ end = p[i].start + p[i].nr - 1;
+
+ if (candidate >= begin && candidate <= end) {
+ *uid = candidate;
+ return 1;
+ }
+
+ if (end < candidate)
+ closest = end;
+ }
+
+ if (closest == UID_INVALID)
+ return -EBUSY;
+
+ *uid = closest;
+ return 1;
+}
+
+bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid) {
+ assert(p);
+ assert(uid);
+
+ for (unsigned i = 0; i < n; i++)
+ if (uid >= p[i].start && uid < p[i].start + p[i].nr)
+ return true;
+
+ return false;
+}
diff --git a/src/shared/uid-range.h b/src/shared/uid-range.h
new file mode 100644
index 0000000..ef168cd
--- /dev/null
+++ b/src/shared/uid-range.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+typedef struct UidRange {
+ uid_t start, nr;
+} UidRange;
+
+int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr);
+int uid_range_add_str(UidRange **p, unsigned *n, const char *s);
+
+int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid);
+bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid);
diff --git a/src/shared/unit-file.c b/src/shared/unit-file.c
new file mode 100644
index 0000000..4c30719
--- /dev/null
+++ b/src/shared/unit-file.c
@@ -0,0 +1,601 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "sd-id128.h"
+
+#include "dirent-util.h"
+#include "fd-util.h"
+#include "fs-util.h"
+#include "macro.h"
+#include "path-lookup.h"
+#include "set.h"
+#include "special.h"
+#include "stat-util.h"
+#include "string-util.h"
+#include "strv.h"
+#include "unit-file.h"
+
+bool unit_type_may_alias(UnitType type) {
+ return IN_SET(type,
+ UNIT_SERVICE,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_DEVICE,
+ UNIT_TIMER,
+ UNIT_PATH);
+}
+
+bool unit_type_may_template(UnitType type) {
+ return IN_SET(type,
+ UNIT_SERVICE,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_TIMER,
+ UNIT_PATH);
+}
+
+int unit_symlink_name_compatible(const char *symlink, const char *target, bool instance_propagation) {
+ _cleanup_free_ char *template = NULL;
+ int r, un_type1, un_type2;
+
+ un_type1 = unit_name_classify(symlink);
+
+ /* The straightforward case: the symlink name matches the target and we have a valid unit */
+ if (streq(symlink, target) &&
+ (un_type1 & (UNIT_NAME_PLAIN | UNIT_NAME_INSTANCE)))
+ return 1;
+
+ r = unit_name_template(symlink, &template);
+ if (r == -EINVAL)
+ return 0; /* Not a template */
+ if (r < 0)
+ return r;
+
+ un_type2 = unit_name_classify(target);
+
+ /* An instance name points to a target that is just the template name */
+ if (un_type1 == UNIT_NAME_INSTANCE &&
+ un_type2 == UNIT_NAME_TEMPLATE &&
+ streq(template, target))
+ return 1;
+
+ /* foo@.target.requires/bar@.service: instance will be propagated */
+ if (instance_propagation &&
+ un_type1 == UNIT_NAME_TEMPLATE &&
+ un_type2 == UNIT_NAME_TEMPLATE &&
+ streq(template, target))
+ return 1;
+
+ return 0;
+}
+
+int unit_validate_alias_symlink_and_warn(const char *filename, const char *target) {
+ const char *src, *dst;
+ _cleanup_free_ char *src_instance = NULL, *dst_instance = NULL;
+ UnitType src_unit_type, dst_unit_type;
+ int src_name_type, dst_name_type;
+
+ /* Check if the *alias* symlink is valid. This applies to symlinks like
+ * /etc/systemd/system/dbus.service → dbus-broker.service, but not to .wants or .requires symlinks
+ * and such. Neither does this apply to symlinks which *link* units, i.e. symlinks to outside of the
+ * unit lookup path.
+ *
+ * -EINVAL is returned if the something is wrong with the source filename or the source unit type is
+ * not allowed to symlink,
+ * -EXDEV if the target filename is not a valid unit name or doesn't match the source.
+ */
+
+ src = basename(filename);
+ dst = basename(target);
+
+ /* src checks */
+
+ src_name_type = unit_name_to_instance(src, &src_instance);
+ if (src_name_type < 0)
+ return log_notice_errno(src_name_type,
+ "%s: not a valid unit name \"%s\": %m", filename, src);
+
+ src_unit_type = unit_name_to_type(src);
+ assert(src_unit_type >= 0); /* unit_name_to_instance() checked the suffix already */
+
+ if (!unit_type_may_alias(src_unit_type))
+ return log_notice_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: symlinks are not allowed for units of this type, rejecting.",
+ filename);
+
+ if (src_name_type != UNIT_NAME_PLAIN &&
+ !unit_type_may_template(src_unit_type))
+ return log_notice_errno(SYNTHETIC_ERRNO(EINVAL),
+ "%s: templates not allowed for %s units, rejecting.",
+ filename, unit_type_to_string(src_unit_type));
+
+ /* dst checks */
+
+ dst_name_type = unit_name_to_instance(dst, &dst_instance);
+ if (dst_name_type < 0)
+ return log_notice_errno(dst_name_type == -EINVAL ? SYNTHETIC_ERRNO(EXDEV) : dst_name_type,
+ "%s points to \"%s\" which is not a valid unit name: %m",
+ filename, dst);
+
+ if (!(dst_name_type == src_name_type ||
+ (src_name_type == UNIT_NAME_INSTANCE && dst_name_type == UNIT_NAME_TEMPLATE)))
+ return log_notice_errno(SYNTHETIC_ERRNO(EXDEV),
+ "%s: symlink target name type \"%s\" does not match source, rejecting.",
+ filename, dst);
+
+ if (dst_name_type == UNIT_NAME_INSTANCE) {
+ assert(src_instance);
+ assert(dst_instance);
+ if (!streq(src_instance, dst_instance))
+ return log_notice_errno(SYNTHETIC_ERRNO(EXDEV),
+ "%s: unit symlink target \"%s\" instance name doesn't match, rejecting.",
+ filename, dst);
+ }
+
+ dst_unit_type = unit_name_to_type(dst);
+ if (dst_unit_type != src_unit_type)
+ return log_notice_errno(SYNTHETIC_ERRNO(EXDEV),
+ "%s: symlink target \"%s\" has incompatible suffix, rejecting.",
+ filename, dst);
+
+ return 0;
+}
+
+#define FOLLOW_MAX 8
+
+static int unit_ids_map_get(
+ Hashmap *unit_ids_map,
+ const char *unit_name,
+ const char **ret_fragment_path) {
+
+ /* Resolve recursively until we hit an absolute path, i.e. a non-aliased unit.
+ *
+ * We distinguish the case where unit_name was not found in the hashmap at all, and the case where
+ * some symlink was broken.
+ *
+ * If a symlink target points to an instance name, then we also check for the template. */
+
+ const char *id = NULL;
+ int r;
+
+ for (unsigned n = 0; n < FOLLOW_MAX; n++) {
+ const char *t = hashmap_get(unit_ids_map, id ?: unit_name);
+ if (!t) {
+ _cleanup_free_ char *template = NULL;
+
+ if (!id)
+ return -ENOENT;
+
+ r = unit_name_template(id, &template);
+ if (r == -EINVAL)
+ return -ENXIO; /* we failed to find the symlink target */
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine template name for %s: %m", id);
+
+ t = hashmap_get(unit_ids_map, template);
+ if (!t)
+ return -ENXIO;
+
+ /* We successfully switched from instanced name to a template, let's continue */
+ }
+
+ if (path_is_absolute(t)) {
+ if (ret_fragment_path)
+ *ret_fragment_path = t;
+ return 0;
+ }
+
+ id = t;
+ }
+
+ return -ELOOP;
+}
+
+static bool lookup_paths_mtime_exclude(const LookupPaths *lp, const char *path) {
+ /* Paths that are under our exclusive control. Users shall not alter those directly. */
+
+ return streq_ptr(path, lp->generator) ||
+ streq_ptr(path, lp->generator_early) ||
+ streq_ptr(path, lp->generator_late) ||
+ streq_ptr(path, lp->transient) ||
+ streq_ptr(path, lp->persistent_control) ||
+ streq_ptr(path, lp->runtime_control);
+}
+
+#define HASH_KEY SD_ID128_MAKE(4e,86,1b,e3,39,b3,40,46,98,5d,b8,11,34,8f,c3,c1)
+
+bool lookup_paths_timestamp_hash_same(const LookupPaths *lp, uint64_t timestamp_hash, uint64_t *ret_new) {
+ struct siphash state;
+
+ siphash24_init(&state, HASH_KEY.bytes);
+
+ char **dir;
+ STRV_FOREACH(dir, (char**) lp->search_path) {
+ struct stat st;
+
+ if (lookup_paths_mtime_exclude(lp, *dir))
+ continue;
+
+ /* Determine the latest lookup path modification time */
+ if (stat(*dir, &st) < 0) {
+ if (errno == ENOENT)
+ continue;
+
+ log_debug_errno(errno, "Failed to stat %s, ignoring: %m", *dir);
+ continue;
+ }
+
+ siphash24_compress_usec_t(timespec_load(&st.st_mtim), &state);
+ }
+
+ uint64_t updated = siphash24_finalize(&state);
+ if (ret_new)
+ *ret_new = updated;
+ if (updated != timestamp_hash)
+ log_debug("Modification times have changed, need to update cache.");
+ return updated == timestamp_hash;
+}
+
+int unit_file_build_name_map(
+ const LookupPaths *lp,
+ uint64_t *cache_timestamp_hash,
+ Hashmap **unit_ids_map,
+ Hashmap **unit_names_map,
+ Set **path_cache) {
+
+ /* Build two mappings: any name → main unit (i.e. the end result of symlink resolution), unit name →
+ * all aliases (i.e. the entry for a given key is a a list of all names which point to this key). The
+ * key is included in the value iff we saw a file or symlink with that name. In other words, if we
+ * have a key, but it is not present in the value for itself, there was an alias pointing to it, but
+ * the unit itself is not loadable.
+ *
+ * At the same, build a cache of paths where to find units. The non-const parameters are for input
+ * and output. Existing contents will be freed before the new contents are stored.
+ */
+
+ _cleanup_hashmap_free_ Hashmap *ids = NULL, *names = NULL;
+ _cleanup_set_free_free_ Set *paths = NULL;
+ uint64_t timestamp_hash;
+ char **dir;
+ int r;
+
+ /* Before doing anything, check if the timestamp hash that was passed is still valid.
+ * If yes, do nothing. */
+ if (cache_timestamp_hash &&
+ lookup_paths_timestamp_hash_same(lp, *cache_timestamp_hash, &timestamp_hash))
+ return 0;
+
+ /* The timestamp hash is now set based on the mtimes from before when we start reading files.
+ * If anything is modified concurrently, we'll consider the cache outdated. */
+
+ if (path_cache) {
+ paths = set_new(&path_hash_ops_free);
+ if (!paths)
+ return log_oom();
+ }
+
+ STRV_FOREACH(dir, (char**) lp->search_path) {
+ struct dirent *de;
+ _cleanup_closedir_ DIR *d = NULL;
+
+ d = opendir(*dir);
+ if (!d) {
+ if (errno != ENOENT)
+ log_warning_errno(errno, "Failed to open \"%s\", ignoring: %m", *dir);
+ continue;
+ }
+
+ FOREACH_DIRENT_ALL(de, d, log_warning_errno(errno, "Failed to read \"%s\", ignoring: %m", *dir)) {
+ char *filename;
+ _cleanup_free_ char *_filename_free = NULL, *simplified = NULL;
+ const char *suffix, *dst = NULL;
+ bool valid_unit_name;
+
+ valid_unit_name = unit_name_is_valid(de->d_name, UNIT_NAME_ANY);
+
+ /* We only care about valid units and dirs with certain suffixes, let's ignore the
+ * rest. */
+ if (!valid_unit_name &&
+ !ENDSWITH_SET(de->d_name, ".wants", ".requires", ".d"))
+ continue;
+
+ filename = path_join(*dir, de->d_name);
+ if (!filename)
+ return log_oom();
+
+ if (paths) {
+ r = set_consume(paths, filename);
+ if (r < 0)
+ return log_oom();
+ /* We will still use filename below. This is safe because we know the set
+ * holds a reference. */
+ } else
+ _filename_free = filename; /* Make sure we free the filename. */
+
+ if (!valid_unit_name)
+ continue;
+ assert_se(suffix = strrchr(de->d_name, '.'));
+
+ /* search_path is ordered by priority (highest first). If the name is already mapped
+ * to something (incl. itself), it means that we have already seen it, and we should
+ * ignore it here. */
+ if (hashmap_contains(ids, de->d_name))
+ continue;
+
+ dirent_ensure_type(d, de);
+ if (de->d_type == DT_LNK) {
+ /* We don't explicitly check for alias loops here. unit_ids_map_get() which
+ * limits the number of hops should be used to access the map. */
+
+ _cleanup_free_ char *target = NULL;
+
+ r = readlinkat_malloc(dirfd(d), de->d_name, &target);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to read symlink %s/%s, ignoring: %m",
+ *dir, de->d_name);
+ continue;
+ }
+
+ const bool is_abs = path_is_absolute(target);
+ if (lp->root_dir || !is_abs) {
+ char *target_abs = path_join(is_abs ? lp->root_dir : *dir, target);
+ if (!target_abs)
+ return log_oom();
+
+ free_and_replace(target, target_abs);
+ }
+
+ /* Get rid of "." and ".." components in target path */
+ r = chase_symlinks(target, lp->root_dir, CHASE_NOFOLLOW | CHASE_NONEXISTENT, &simplified, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to resolve symlink %s pointing to %s, ignoring: %m",
+ filename, target);
+ continue;
+ }
+
+ /* Check if the symlink goes outside of our search path.
+ * If yes, it's a linked unit file or mask, and we don't care about the target name.
+ * Let's just store the link destination directly.
+ * If not, let's verify that it's a good symlink. */
+ char *tail = path_startswith_strv(simplified, lp->search_path);
+ if (tail) {
+ bool self_alias;
+
+ dst = basename(simplified);
+ self_alias = streq(dst, de->d_name);
+
+ if (is_path(tail))
+ log_full(self_alias ? LOG_DEBUG : LOG_WARNING,
+ "Suspicious symlink %s→%s, treating as alias.",
+ filename, simplified);
+
+ r = unit_validate_alias_symlink_and_warn(filename, simplified);
+ if (r < 0)
+ continue;
+
+ if (self_alias) {
+ /* A self-alias that has no effect */
+ log_debug("%s: self-alias: %s/%s → %s, ignoring.",
+ __func__, *dir, de->d_name, dst);
+ continue;
+ }
+
+ log_debug("%s: alias: %s/%s → %s", __func__, *dir, de->d_name, dst);
+ } else {
+ dst = simplified;
+
+ log_debug("%s: linked unit file: %s/%s → %s", __func__, *dir, de->d_name, dst);
+ }
+
+ } else {
+ dst = filename;
+ log_debug("%s: normal unit file: %s", __func__, dst);
+ }
+
+ r = hashmap_put_strdup(&ids, de->d_name, dst);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to add entry to hashmap (%s→%s): %m",
+ de->d_name, dst);
+ }
+ }
+
+ /* Let's also put the names in the reverse db. */
+ const char *dummy, *src;
+ HASHMAP_FOREACH_KEY(dummy, src, ids) {
+ const char *dst;
+
+ r = unit_ids_map_get(ids, src, &dst);
+ if (r < 0)
+ continue;
+
+ if (null_or_empty_path(dst) != 0)
+ continue;
+
+ /* Do not treat instance symlinks that point to the template as aliases */
+ if (unit_name_is_valid(basename(dst), UNIT_NAME_TEMPLATE) &&
+ unit_name_is_valid(src, UNIT_NAME_INSTANCE))
+ continue;
+
+ r = string_strv_hashmap_put(&names, basename(dst), src);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to add entry to hashmap (%s→%s): %m",
+ basename(dst), src);
+ }
+
+ if (cache_timestamp_hash)
+ *cache_timestamp_hash = timestamp_hash;
+
+ hashmap_free_and_replace(*unit_ids_map, ids);
+ hashmap_free_and_replace(*unit_names_map, names);
+ if (path_cache)
+ set_free_and_replace(*path_cache, paths);
+
+ return 1;
+}
+
+int unit_file_find_fragment(
+ Hashmap *unit_ids_map,
+ Hashmap *unit_name_map,
+ const char *unit_name,
+ const char **ret_fragment_path,
+ Set **ret_names) {
+
+ const char *fragment = NULL;
+ _cleanup_free_ char *template = NULL, *instance = NULL;
+ _cleanup_set_free_free_ Set *names = NULL;
+ char **t, **nnn;
+ int r, name_type;
+
+ /* Finds a fragment path, and returns the set of names:
+ * if we have …/foo.service and …/foo-alias.service→foo.service,
+ * and …/foo@.service and …/foo-alias@.service→foo@.service,
+ * and …/foo@inst.service,
+ * this should return:
+ * foo.service → …/foo.service, {foo.service, foo-alias.service},
+ * foo-alias.service → …/foo.service, {foo.service, foo-alias.service},
+ * foo@.service → …/foo@.service, {foo@.service, foo-alias@.service},
+ * foo-alias@.service → …/foo@.service, {foo@.service, foo-alias@.service},
+ * foo@bar.service → …/foo@.service, {foo@bar.service, foo-alias@bar.service},
+ * foo-alias@bar.service → …/foo@.service, {foo@bar.service, foo-alias@bar.service},
+ * foo-alias@inst.service → …/foo@inst.service, {foo@inst.service, foo-alias@inst.service}.
+ */
+
+ name_type = unit_name_to_instance(unit_name, &instance);
+ if (name_type < 0)
+ return name_type;
+
+ names = set_new(&string_hash_ops);
+ if (!names)
+ return -ENOMEM;
+
+ /* The unit always has its own name if it's not a template. */
+ if (IN_SET(name_type, UNIT_NAME_PLAIN, UNIT_NAME_INSTANCE)) {
+ r = set_put_strdup(&names, unit_name);
+ if (r < 0)
+ return r;
+ }
+
+ /* First try to load fragment under the original name */
+ r = unit_ids_map_get(unit_ids_map, unit_name, &fragment);
+ if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO))
+ return log_debug_errno(r, "Cannot load unit %s: %m", unit_name);
+
+ if (fragment) {
+ /* Add any aliases of the original name to the set of names */
+ nnn = hashmap_get(unit_name_map, basename(fragment));
+ STRV_FOREACH(t, nnn) {
+ if (name_type == UNIT_NAME_INSTANCE && unit_name_is_valid(*t, UNIT_NAME_TEMPLATE)) {
+ char *inst;
+
+ r = unit_name_replace_instance(*t, instance, &inst);
+ if (r < 0)
+ return log_debug_errno(r, "Cannot build instance name %s+%s: %m", *t, instance);
+
+ if (!streq(unit_name, inst))
+ log_debug("%s: %s has alias %s", __func__, unit_name, inst);
+
+ log_info("%s: %s+%s → %s", __func__, *t, instance, inst);
+ r = set_consume(names, inst);
+ } else {
+ if (!streq(unit_name, *t))
+ log_debug("%s: %s has alias %s", __func__, unit_name, *t);
+
+ r = set_put_strdup(&names, *t);
+ }
+ if (r < 0)
+ return r;
+ }
+ }
+
+ if (!fragment && name_type == UNIT_NAME_INSTANCE) {
+ /* Look for a fragment under the template name */
+
+ r = unit_name_template(unit_name, &template);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine template name: %m");
+
+ r = unit_ids_map_get(unit_ids_map, template, &fragment);
+ if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO))
+ return log_debug_errno(r, "Cannot load template %s: %m", template);
+
+ if (fragment) {
+ /* Add any aliases of the original name to the set of names */
+ nnn = hashmap_get(unit_name_map, basename(fragment));
+ STRV_FOREACH(t, nnn) {
+ _cleanup_free_ char *inst = NULL;
+ const char *inst_fragment = NULL;
+
+ r = unit_name_replace_instance(*t, instance, &inst);
+ if (r < 0)
+ return log_debug_errno(r, "Cannot build instance name %s+%s: %m", template, instance);
+
+ /* Exclude any aliases that point in some other direction. */
+ r = unit_ids_map_get(unit_ids_map, inst, &inst_fragment);
+ if (r < 0 && !IN_SET(r, -ENOENT, -ENXIO))
+ return log_debug_errno(r, "Cannot find instance fragment %s: %m", inst);
+
+ if (inst_fragment &&
+ !streq(basename(inst_fragment), basename(fragment))) {
+ log_debug("Instance %s has fragment %s and is not an alias of %s.",
+ inst, inst_fragment, unit_name);
+ continue;
+ }
+
+ if (!streq(unit_name, inst))
+ log_debug("%s: %s has alias %s", __func__, unit_name, inst);
+ r = set_consume(names, TAKE_PTR(inst));
+ if (r < 0)
+ return r;
+ }
+ }
+ }
+
+ *ret_fragment_path = fragment;
+ *ret_names = TAKE_PTR(names);
+
+ // FIXME: if instance, consider any unit names with different template name
+ return 0;
+}
+
+static const char * const rlmap[] = {
+ "emergency", SPECIAL_EMERGENCY_TARGET,
+ "-b", SPECIAL_EMERGENCY_TARGET,
+ "rescue", SPECIAL_RESCUE_TARGET,
+ "single", SPECIAL_RESCUE_TARGET,
+ "-s", SPECIAL_RESCUE_TARGET,
+ "s", SPECIAL_RESCUE_TARGET,
+ "S", SPECIAL_RESCUE_TARGET,
+ "1", SPECIAL_RESCUE_TARGET,
+ "2", SPECIAL_MULTI_USER_TARGET,
+ "3", SPECIAL_MULTI_USER_TARGET,
+ "4", SPECIAL_MULTI_USER_TARGET,
+ "5", SPECIAL_GRAPHICAL_TARGET,
+ NULL
+};
+
+static const char * const rlmap_initrd[] = {
+ "emergency", SPECIAL_EMERGENCY_TARGET,
+ "rescue", SPECIAL_RESCUE_TARGET,
+ NULL
+};
+
+const char* runlevel_to_target(const char *word) {
+ const char * const *rlmap_ptr;
+ size_t i;
+
+ if (!word)
+ return NULL;
+
+ if (in_initrd()) {
+ word = startswith(word, "rd.");
+ if (!word)
+ return NULL;
+ }
+
+ rlmap_ptr = in_initrd() ? rlmap_initrd : rlmap;
+
+ for (i = 0; rlmap_ptr[i]; i += 2)
+ if (streq(word, rlmap_ptr[i]))
+ return rlmap_ptr[i+1];
+
+ return NULL;
+}
diff --git a/src/shared/unit-file.h b/src/shared/unit-file.h
new file mode 100644
index 0000000..5463b0a
--- /dev/null
+++ b/src/shared/unit-file.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "hashmap.h"
+#include "time-util.h"
+#include "unit-name.h"
+
+typedef enum UnitFileState UnitFileState;
+typedef enum UnitFileScope UnitFileScope;
+typedef struct LookupPaths LookupPaths;
+
+enum UnitFileState {
+ UNIT_FILE_ENABLED,
+ UNIT_FILE_ENABLED_RUNTIME,
+ UNIT_FILE_LINKED,
+ UNIT_FILE_LINKED_RUNTIME,
+ UNIT_FILE_ALIAS,
+ UNIT_FILE_MASKED,
+ UNIT_FILE_MASKED_RUNTIME,
+ UNIT_FILE_STATIC,
+ UNIT_FILE_DISABLED,
+ UNIT_FILE_INDIRECT,
+ UNIT_FILE_GENERATED,
+ UNIT_FILE_TRANSIENT,
+ UNIT_FILE_BAD,
+ _UNIT_FILE_STATE_MAX,
+ _UNIT_FILE_STATE_INVALID = -1
+};
+
+enum UnitFileScope {
+ UNIT_FILE_SYSTEM,
+ UNIT_FILE_GLOBAL,
+ UNIT_FILE_USER,
+ _UNIT_FILE_SCOPE_MAX,
+ _UNIT_FILE_SCOPE_INVALID = -1
+};
+
+bool unit_type_may_alias(UnitType type) _const_;
+bool unit_type_may_template(UnitType type) _const_;
+
+int unit_symlink_name_compatible(const char *symlink, const char *target, bool instance_propagation);
+int unit_validate_alias_symlink_and_warn(const char *filename, const char *target);
+
+bool lookup_paths_timestamp_hash_same(const LookupPaths *lp, uint64_t timestamp_hash, uint64_t *ret_new);
+int unit_file_build_name_map(
+ const LookupPaths *lp,
+ uint64_t *cache_timestamp_hash,
+ Hashmap **unit_ids_map,
+ Hashmap **unit_names_map,
+ Set **path_cache);
+
+int unit_file_find_fragment(
+ Hashmap *unit_ids_map,
+ Hashmap *unit_name_map,
+ const char *unit_name,
+ const char **ret_fragment_path,
+ Set **ret_names);
+
+const char* runlevel_to_target(const char *rl);
diff --git a/src/shared/user-record-nss.c b/src/shared/user-record-nss.c
new file mode 100644
index 0000000..88b8fc2
--- /dev/null
+++ b/src/shared/user-record-nss.c
@@ -0,0 +1,531 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "errno-util.h"
+#include "format-util.h"
+#include "libcrypt-util.h"
+#include "strv.h"
+#include "user-record-nss.h"
+#include "user-util.h"
+#include "utf8.h"
+
+#define SET_IF(field, condition, value, fallback) \
+ field = (condition) ? (value) : (fallback)
+
+static inline const char* utf8_only(const char *s) {
+ return s && utf8_is_valid(s) ? s : NULL;
+}
+
+static inline int strv_extend_strv_utf8_only(char ***dst, char **src, bool filter_duplicates) {
+ _cleanup_free_ char **t = NULL;
+ size_t l, j = 0;
+
+ /* First, do a shallow copy of s, filtering for only valid utf-8 strings */
+ l = strv_length(src);
+ t = new(char*, l + 1);
+ if (!t)
+ return -ENOMEM;
+
+ for (size_t i = 0; i < l; i++)
+ if (utf8_is_valid(src[i]))
+ t[j++] = src[i];
+ if (j == 0)
+ return 0;
+
+ t[j] = NULL;
+ return strv_extend_strv(dst, t, filter_duplicates);
+}
+
+int nss_passwd_to_user_record(
+ const struct passwd *pwd,
+ const struct spwd *spwd,
+ UserRecord **ret) {
+
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+ int r;
+
+ assert(pwd);
+ assert(ret);
+
+ if (isempty(pwd->pw_name))
+ return -EINVAL;
+
+ if (spwd && !streq_ptr(spwd->sp_namp, pwd->pw_name))
+ return -EINVAL;
+
+ hr = user_record_new();
+ if (!hr)
+ return -ENOMEM;
+
+ r = free_and_strdup(&hr->user_name, pwd->pw_name);
+ if (r < 0)
+ return r;
+
+ /* Some bad NSS modules synthesize GECOS fields with embedded ":" or "\n" characters, which are not
+ * something we can output in /etc/passwd compatible format, since these are record separators
+ * there. We normally refuse that, but we need to maintain compatibility with arbitrary NSS modules,
+ * hence let's do what glibc does: mangle the data to fit the format. */
+ if (isempty(pwd->pw_gecos) || streq_ptr(pwd->pw_gecos, hr->user_name))
+ hr->real_name = mfree(hr->real_name);
+ else if (valid_gecos(pwd->pw_gecos)) {
+ r = free_and_strdup(&hr->real_name, pwd->pw_gecos);
+ if (r < 0)
+ return r;
+ } else {
+ _cleanup_free_ char *mangled = NULL;
+
+ mangled = mangle_gecos(pwd->pw_gecos);
+ if (!mangled)
+ return -ENOMEM;
+
+ free_and_replace(hr->real_name, mangled);
+ }
+
+ r = free_and_strdup(&hr->home_directory, utf8_only(empty_to_null(pwd->pw_dir)));
+ if (r < 0)
+ return r;
+
+ r = free_and_strdup(&hr->shell, utf8_only(empty_to_null(pwd->pw_shell)));
+ if (r < 0)
+ return r;
+
+ hr->uid = pwd->pw_uid;
+ hr->gid = pwd->pw_gid;
+
+ if (spwd &&
+ looks_like_hashed_password(utf8_only(spwd->sp_pwdp))) { /* Ignore locked, disabled, and mojibake passwords */
+ strv_free_erase(hr->hashed_password);
+ hr->hashed_password = strv_new(spwd->sp_pwdp);
+ if (!hr->hashed_password)
+ return -ENOMEM;
+ } else
+ hr->hashed_password = strv_free_erase(hr->hashed_password);
+
+ /* shadow-utils suggests using "chage -E 0" (or -E 1, depending on which man page you check)
+ * for locking a whole account, hence check for that. Note that it also defines a way to lock
+ * just a password instead of the whole account, but that's mostly pointless in times of
+ * password-less authorization, hence let's not bother. */
+
+ SET_IF(hr->locked,
+ spwd && spwd->sp_expire >= 0,
+ spwd->sp_expire <= 1, -1);
+
+ SET_IF(hr->not_after_usec,
+ spwd && spwd->sp_expire > 1 && (uint64_t) spwd->sp_expire < (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_expire * USEC_PER_DAY, UINT64_MAX);
+
+ SET_IF(hr->password_change_now,
+ spwd && spwd->sp_lstchg >= 0,
+ spwd->sp_lstchg == 0, -1);
+
+ SET_IF(hr->last_password_change_usec,
+ spwd && spwd->sp_lstchg > 0 && (uint64_t) spwd->sp_lstchg <= (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_lstchg * USEC_PER_DAY, UINT64_MAX);
+
+ SET_IF(hr->password_change_min_usec,
+ spwd && spwd->sp_min > 0 && (uint64_t) spwd->sp_min <= (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_min * USEC_PER_DAY, UINT64_MAX);
+
+ SET_IF(hr->password_change_max_usec,
+ spwd && spwd->sp_max > 0 && (uint64_t) spwd->sp_max <= (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_max * USEC_PER_DAY, UINT64_MAX);
+
+ SET_IF(hr->password_change_warn_usec,
+ spwd && spwd->sp_warn > 0 && (uint64_t) spwd->sp_warn <= (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_warn * USEC_PER_DAY, UINT64_MAX);
+
+ SET_IF(hr->password_change_inactive_usec,
+ spwd && spwd->sp_inact > 0 && (uint64_t) spwd->sp_inact <= (UINT64_MAX-1)/USEC_PER_DAY,
+ spwd->sp_inact * USEC_PER_DAY, UINT64_MAX);
+
+ hr->json = json_variant_unref(hr->json);
+ r = json_build(&hr->json, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(hr->user_name)),
+ JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(hr->uid)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(hr->gid)),
+ JSON_BUILD_PAIR_CONDITION(hr->real_name, "realName", JSON_BUILD_STRING(hr->real_name)),
+ JSON_BUILD_PAIR_CONDITION(hr->home_directory, "homeDirectory", JSON_BUILD_STRING(hr->home_directory)),
+ JSON_BUILD_PAIR_CONDITION(hr->shell, "shell", JSON_BUILD_STRING(hr->shell)),
+ JSON_BUILD_PAIR_CONDITION(!strv_isempty(hr->hashed_password), "privileged", JSON_BUILD_OBJECT(JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_STRV(hr->hashed_password)))),
+ JSON_BUILD_PAIR_CONDITION(hr->locked >= 0, "locked", JSON_BUILD_BOOLEAN(hr->locked)),
+ JSON_BUILD_PAIR_CONDITION(hr->not_after_usec != UINT64_MAX, "notAfterUSec", JSON_BUILD_UNSIGNED(hr->not_after_usec)),
+ JSON_BUILD_PAIR_CONDITION(hr->password_change_now >= 0, "passwordChangeNow", JSON_BUILD_BOOLEAN(hr->password_change_now)),
+ JSON_BUILD_PAIR_CONDITION(hr->last_password_change_usec != UINT64_MAX, "lastPasswordChangeUSec", JSON_BUILD_UNSIGNED(hr->last_password_change_usec)),
+ JSON_BUILD_PAIR_CONDITION(hr->password_change_min_usec != UINT64_MAX, "passwordChangeMinUSec", JSON_BUILD_UNSIGNED(hr->password_change_min_usec)),
+ JSON_BUILD_PAIR_CONDITION(hr->password_change_max_usec != UINT64_MAX, "passwordChangeMaxUSec", JSON_BUILD_UNSIGNED(hr->password_change_max_usec)),
+ JSON_BUILD_PAIR_CONDITION(hr->password_change_warn_usec != UINT64_MAX, "passwordChangeWarnUSec", JSON_BUILD_UNSIGNED(hr->password_change_warn_usec)),
+ JSON_BUILD_PAIR_CONDITION(hr->password_change_inactive_usec != UINT64_MAX, "passwordChangeInactiveUSec", JSON_BUILD_UNSIGNED(hr->password_change_inactive_usec))));
+
+ if (r < 0)
+ return r;
+
+ hr->mask = USER_RECORD_REGULAR |
+ (!strv_isempty(hr->hashed_password) ? USER_RECORD_PRIVILEGED : 0);
+
+ *ret = TAKE_PTR(hr);
+ return 0;
+}
+
+int nss_spwd_for_passwd(const struct passwd *pwd, struct spwd *ret_spwd, char **ret_buffer) {
+ size_t buflen = 4096;
+ int r;
+
+ assert(pwd);
+ assert(ret_spwd);
+ assert(ret_buffer);
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ struct spwd spwd, *result;
+
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getspnam_r(pwd->pw_name, &spwd, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+
+ *ret_spwd = *result;
+ *ret_buffer = TAKE_PTR(buf);
+ return 0;
+ }
+ if (r < 0)
+ return -EIO; /* Weird, this should not return negative! */
+ if (r != ERANGE)
+ return -r;
+
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+}
+
+int nss_user_record_by_name(
+ const char *name,
+ bool with_shadow,
+ UserRecord **ret) {
+
+ _cleanup_free_ char *buf = NULL, *sbuf = NULL;
+ struct passwd pwd, *result;
+ bool incomplete = false;
+ size_t buflen = 4096;
+ struct spwd spwd, *sresult = NULL;
+ int r;
+
+ assert(name);
+ assert(ret);
+
+ for (;;) {
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getpwnam_r(name, &pwd, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+
+ break;
+ }
+
+ if (r < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getpwnam_r() returned a negative value");
+ if (r != ERANGE)
+ return -r;
+
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+
+ if (with_shadow) {
+ r = nss_spwd_for_passwd(result, &spwd, &sbuf);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to do shadow lookup for user %s, ignoring: %m", name);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ } else
+ sresult = &spwd;
+ } else
+ incomplete = true;
+
+ r = nss_passwd_to_user_record(result, sresult, ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->incomplete = incomplete;
+ return 0;
+}
+
+int nss_user_record_by_uid(
+ uid_t uid,
+ bool with_shadow,
+ UserRecord **ret) {
+
+ _cleanup_free_ char *buf = NULL, *sbuf = NULL;
+ struct passwd pwd, *result;
+ bool incomplete = false;
+ size_t buflen = 4096;
+ struct spwd spwd, *sresult = NULL;
+ int r;
+
+ assert(ret);
+
+ for (;;) {
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getpwuid_r(uid, &pwd, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+
+ break;
+ }
+ if (r < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getpwuid_r() returned a negative value");
+ if (r != ERANGE)
+ return -r;
+
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+
+ if (with_shadow) {
+ r = nss_spwd_for_passwd(result, &spwd, &sbuf);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to do shadow lookup for UID " UID_FMT ", ignoring: %m", uid);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ } else
+ sresult = &spwd;
+ } else
+ incomplete = true;
+
+ r = nss_passwd_to_user_record(result, sresult, ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->incomplete = incomplete;
+ return 0;
+}
+
+int nss_group_to_group_record(
+ const struct group *grp,
+ const struct sgrp *sgrp,
+ GroupRecord **ret) {
+
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+ int r;
+
+ assert(grp);
+ assert(ret);
+
+ if (isempty(grp->gr_name))
+ return -EINVAL;
+
+ if (sgrp && !streq_ptr(sgrp->sg_namp, grp->gr_name))
+ return -EINVAL;
+
+ g = group_record_new();
+ if (!g)
+ return -ENOMEM;
+
+ g->group_name = strdup(grp->gr_name);
+ if (!g->group_name)
+ return -ENOMEM;
+
+ r = strv_extend_strv_utf8_only(&g->members, grp->gr_mem, false);
+ if (r < 0)
+ return r;
+
+ g->gid = grp->gr_gid;
+
+ if (sgrp) {
+ if (looks_like_hashed_password(utf8_only(sgrp->sg_passwd))) {
+ g->hashed_password = strv_new(sgrp->sg_passwd);
+ if (!g->hashed_password)
+ return -ENOMEM;
+ }
+
+ r = strv_extend_strv_utf8_only(&g->members, sgrp->sg_mem, true);
+ if (r < 0)
+ return r;
+
+ r = strv_extend_strv_utf8_only(&g->administrators, sgrp->sg_adm, false);
+ if (r < 0)
+ return r;
+ }
+
+ r = json_build(&g->json, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(g->group_name)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(g->gid)),
+ JSON_BUILD_PAIR_CONDITION(!strv_isempty(g->members), "members", JSON_BUILD_STRV(g->members)),
+ JSON_BUILD_PAIR_CONDITION(!strv_isempty(g->hashed_password), "privileged", JSON_BUILD_OBJECT(JSON_BUILD_PAIR("hashedPassword", JSON_BUILD_STRV(g->hashed_password)))),
+ JSON_BUILD_PAIR_CONDITION(!strv_isempty(g->administrators), "administrators", JSON_BUILD_STRV(g->administrators))));
+ if (r < 0)
+ return r;
+
+ g->mask = USER_RECORD_REGULAR |
+ (!strv_isempty(g->hashed_password) ? USER_RECORD_PRIVILEGED : 0);
+
+ *ret = TAKE_PTR(g);
+ return 0;
+}
+
+int nss_sgrp_for_group(const struct group *grp, struct sgrp *ret_sgrp, char **ret_buffer) {
+ size_t buflen = 4096;
+ int r;
+
+ assert(grp);
+ assert(ret_sgrp);
+ assert(ret_buffer);
+
+ for (;;) {
+ _cleanup_free_ char *buf = NULL;
+ struct sgrp sgrp, *result;
+
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getsgnam_r(grp->gr_name, &sgrp, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+
+ *ret_sgrp = *result;
+ *ret_buffer = TAKE_PTR(buf);
+ return 0;
+ }
+ if (r < 0)
+ return -EIO; /* Weird, this should not return negative! */
+ if (r != ERANGE)
+ return -r;
+
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+}
+
+int nss_group_record_by_name(
+ const char *name,
+ bool with_shadow,
+ GroupRecord **ret) {
+
+ _cleanup_free_ char *buf = NULL, *sbuf = NULL;
+ struct group grp, *result;
+ bool incomplete = false;
+ size_t buflen = 4096;
+ struct sgrp sgrp, *sresult = NULL;
+ int r;
+
+ assert(name);
+ assert(ret);
+
+ for (;;) {
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getgrnam_r(name, &grp, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+
+ break;
+ }
+
+ if (r < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getgrnam_r() returned a negative value");
+ if (r != ERANGE)
+ return -r;
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+
+ if (with_shadow) {
+ r = nss_sgrp_for_group(result, &sgrp, &sbuf);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to do shadow lookup for group %s, ignoring: %m", result->gr_name);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ } else
+ sresult = &sgrp;
+ } else
+ incomplete = true;
+
+ r = nss_group_to_group_record(result, sresult, ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->incomplete = incomplete;
+ return 0;
+}
+
+int nss_group_record_by_gid(
+ gid_t gid,
+ bool with_shadow,
+ GroupRecord **ret) {
+
+ _cleanup_free_ char *buf = NULL, *sbuf = NULL;
+ struct group grp, *result;
+ bool incomplete = false;
+ size_t buflen = 4096;
+ struct sgrp sgrp, *sresult = NULL;
+ int r;
+
+ assert(ret);
+
+ for (;;) {
+ buf = malloc(buflen);
+ if (!buf)
+ return -ENOMEM;
+
+ r = getgrgid_r(gid, &grp, buf, buflen, &result);
+ if (r == 0) {
+ if (!result)
+ return -ESRCH;
+ break;
+ }
+
+ if (r < 0)
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "getgrgid_r() returned a negative value");
+ if (r != ERANGE)
+ return -r;
+ if (buflen > SIZE_MAX / 2)
+ return -ERANGE;
+
+ buflen *= 2;
+ buf = mfree(buf);
+ }
+
+ if (with_shadow) {
+ r = nss_sgrp_for_group(result, &sgrp, &sbuf);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to do shadow lookup for group %s, ignoring: %m", result->gr_name);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ } else
+ sresult = &sgrp;
+ } else
+ incomplete = true;
+
+ r = nss_group_to_group_record(result, sresult, ret);
+ if (r < 0)
+ return r;
+
+ (*ret)->incomplete = incomplete;
+ return 0;
+}
diff --git a/src/shared/user-record-nss.h b/src/shared/user-record-nss.h
new file mode 100644
index 0000000..22ab04d
--- /dev/null
+++ b/src/shared/user-record-nss.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <grp.h>
+#include <gshadow.h>
+#include <pwd.h>
+#include <shadow.h>
+
+#include "group-record.h"
+#include "user-record.h"
+
+/* Synthesize UserRecord and GroupRecord objects from NSS data */
+
+int nss_passwd_to_user_record(const struct passwd *pwd, const struct spwd *spwd, UserRecord **ret);
+int nss_spwd_for_passwd(const struct passwd *pwd, struct spwd *ret_spwd, char **ret_buffer);
+
+int nss_user_record_by_name(const char *name, bool with_shadow, UserRecord **ret);
+int nss_user_record_by_uid(uid_t uid, bool with_shadow, UserRecord **ret);
+
+int nss_group_to_group_record(const struct group *grp, const struct sgrp *sgrp, GroupRecord **ret);
+int nss_sgrp_for_group(const struct group *grp, struct sgrp *ret_sgrp, char **ret_buffer);
+
+int nss_group_record_by_name(const char *name, bool with_shadow, GroupRecord **ret);
+int nss_group_record_by_gid(gid_t gid, bool with_shadow, GroupRecord **ret);
diff --git a/src/shared/user-record-show.c b/src/shared/user-record-show.c
new file mode 100644
index 0000000..2979028
--- /dev/null
+++ b/src/shared/user-record-show.c
@@ -0,0 +1,584 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "format-util.h"
+#include "fs-util.h"
+#include "process-util.h"
+#include "rlimit-util.h"
+#include "strv.h"
+#include "terminal-util.h"
+#include "user-record-show.h"
+#include "user-util.h"
+#include "userdb.h"
+
+const char *user_record_state_color(const char *state) {
+ if (STR_IN_SET(state, "unfixated", "absent"))
+ return ansi_grey();
+ else if (streq(state, "active"))
+ return ansi_highlight_green();
+ else if (STR_IN_SET(state, "locked", "dirty"))
+ return ansi_highlight_yellow();
+
+ return NULL;
+}
+
+void user_record_show(UserRecord *hr, bool show_full_group_info) {
+ const char *hd, *ip, *shell;
+ UserStorage storage;
+ usec_t t;
+ size_t k;
+ int r, b;
+
+ printf(" User name: %s\n",
+ user_record_user_name_and_realm(hr));
+
+ if (hr->state) {
+ const char *color;
+
+ color = user_record_state_color(hr->state);
+
+ printf(" State: %s%s%s\n",
+ strempty(color), hr->state, color ? ansi_normal() : "");
+ }
+
+ printf(" Disposition: %s\n", user_disposition_to_string(user_record_disposition(hr)));
+
+ if (hr->last_change_usec != USEC_INFINITY) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Last Change: %s\n", format_timestamp(buf, sizeof(buf), hr->last_change_usec));
+
+ if (hr->last_change_usec > now(CLOCK_REALTIME))
+ printf(" %sModification time lies in the future, system clock wrong?%s\n",
+ ansi_highlight_yellow(), ansi_normal());
+ }
+
+ if (hr->last_password_change_usec != USEC_INFINITY &&
+ hr->last_password_change_usec != hr->last_change_usec) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Last Passw.: %s\n", format_timestamp(buf, sizeof(buf), hr->last_password_change_usec));
+ }
+
+ r = user_record_test_blocked(hr);
+ switch (r) {
+
+ case -ENOLCK:
+ printf(" Login OK: %sno%s (record is locked)\n", ansi_highlight_red(), ansi_normal());
+ break;
+
+ case -EL2HLT:
+ printf(" Login OK: %sno%s (record not valid yet))\n", ansi_highlight_red(), ansi_normal());
+ break;
+
+ case -EL3HLT:
+ printf(" Login OK: %sno%s (record not valid anymore))\n", ansi_highlight_red(), ansi_normal());
+ break;
+
+ case -ESTALE:
+ default: {
+ usec_t y;
+
+ if (r < 0 && r != -ESTALE) {
+ errno = -r;
+ printf(" Login OK: %sno%s (%m)\n", ansi_highlight_red(), ansi_normal());
+ break;
+ }
+
+ if (is_nologin_shell(user_record_shell(hr))) {
+ printf(" Login OK: %sno%s (nologin shell)\n", ansi_highlight_red(), ansi_normal());
+ break;
+ }
+
+ y = user_record_ratelimit_next_try(hr);
+ if (y != USEC_INFINITY && y > now(CLOCK_REALTIME)) {
+ printf(" Login OK: %sno%s (ratelimit)\n", ansi_highlight_red(), ansi_normal());
+ break;
+ }
+
+ printf(" Login OK: %syes%s\n", ansi_highlight_green(), ansi_normal());
+ break;
+ }}
+
+ r = user_record_test_password_change_required(hr);
+ switch (r) {
+
+ case -EKEYREVOKED:
+ printf(" Password OK: %schange now%s\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+
+ case -EOWNERDEAD:
+ printf(" Password OK: %sexpired%s (change now!)\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+
+ case -EKEYREJECTED:
+ printf(" Password OK: %sexpired%s (for good)\n", ansi_highlight_red(), ansi_normal());
+ break;
+
+ case -EKEYEXPIRED:
+ printf(" Password OK: %sexpires soon%s\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+
+ case -ENETDOWN:
+ printf(" Password OK: %sno timestamp%s\n", ansi_highlight_red(), ansi_normal());
+ break;
+
+ case -EROFS:
+ printf(" Password OK: %schange not permitted%s\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+
+ case -ESTALE:
+ printf(" Password OK: %slast password change in future%s\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+
+ default:
+ if (r < 0) {
+ errno = -r;
+ printf(" Password OK: %sno%s (%m)\n", ansi_highlight_yellow(), ansi_normal());
+ break;
+ }
+
+ printf(" Password OK: %syes%s\n", ansi_highlight_green(), ansi_normal());
+ break;
+ }
+
+ if (uid_is_valid(hr->uid))
+ printf(" UID: " UID_FMT "\n", hr->uid);
+ if (gid_is_valid(hr->gid)) {
+ if (show_full_group_info) {
+ _cleanup_(group_record_unrefp) GroupRecord *gr = NULL;
+
+ r = groupdb_by_gid(hr->gid, 0, &gr);
+ if (r < 0) {
+ errno = -r;
+ printf(" GID: " GID_FMT " (unresolvable: %m)\n", hr->gid);
+ } else
+ printf(" GID: " GID_FMT " (%s)\n", hr->gid, gr->group_name);
+ } else
+ printf(" GID: " GID_FMT "\n", hr->gid);
+ } else if (uid_is_valid(hr->uid)) /* Show UID as GID if not separately configured */
+ printf(" GID: " GID_FMT "\n", (gid_t) hr->uid);
+
+ if (show_full_group_info) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+
+ r = membershipdb_by_user(hr->user_name, 0, &iterator);
+ if (r < 0) {
+ errno = -r;
+ printf(" Aux. Groups: (can't acquire: %m)\n");
+ } else {
+ const char *prefix = " Aux. Groups:";
+
+ for (;;) {
+ _cleanup_free_ char *group = NULL;
+
+ r = membershipdb_iterator_get(iterator, NULL, &group);
+ if (r == -ESRCH)
+ break;
+ if (r < 0) {
+ errno = -r;
+ printf("%s (can't iterate: %m)\n", prefix);
+ break;
+ }
+
+ printf("%s %s\n", prefix, group);
+ prefix = " ";
+ }
+ }
+ }
+
+ if (hr->real_name && !streq(hr->real_name, hr->user_name))
+ printf(" Real Name: %s\n", hr->real_name);
+
+ hd = user_record_home_directory(hr);
+ if (hd)
+ printf(" Directory: %s\n", hd);
+
+ storage = user_record_storage(hr);
+ if (storage >= 0) /* Let's be political, and clarify which storage we like, and which we don't. About CIFS we don't complain. */
+ printf(" Storage: %s%s\n", user_storage_to_string(storage),
+ storage == USER_LUKS ? " (strong encryption)" :
+ storage == USER_FSCRYPT ? " (weak encryption)" :
+ IN_SET(storage, USER_DIRECTORY, USER_SUBVOLUME) ? " (no encryption)" : "");
+
+ ip = user_record_image_path(hr);
+ if (ip && !streq_ptr(ip, hd))
+ printf(" Image Path: %s\n", ip);
+
+ b = user_record_removable(hr);
+ if (b >= 0)
+ printf(" Removable: %s\n", yes_no(b));
+
+ shell = user_record_shell(hr);
+ if (shell)
+ printf(" Shell: %s\n", shell);
+
+ if (hr->email_address)
+ printf(" Email: %s\n", hr->email_address);
+ if (hr->location)
+ printf(" Location: %s\n", hr->location);
+ if (hr->password_hint)
+ printf(" Passw. Hint: %s\n", hr->password_hint);
+ if (hr->icon_name)
+ printf(" Icon Name: %s\n", hr->icon_name);
+
+ if (hr->time_zone)
+ printf(" Time Zone: %s\n", hr->time_zone);
+
+ if (hr->preferred_language)
+ printf(" Language: %s\n", hr->preferred_language);
+
+ if (!strv_isempty(hr->environment)) {
+ char **i;
+
+ STRV_FOREACH(i, hr->environment) {
+ printf(i == hr->environment ?
+ " Environment: %s\n" :
+ " %s\n", *i);
+ }
+ }
+
+ if (hr->locked >= 0)
+ printf(" Locked: %s\n", yes_no(hr->locked));
+
+ if (hr->not_before_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Not Before: %s\n", format_timestamp(buf, sizeof(buf), hr->not_before_usec));
+ }
+
+ if (hr->not_after_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Not After: %s\n", format_timestamp(buf, sizeof(buf), hr->not_after_usec));
+ }
+
+ if (hr->umask != MODE_INVALID)
+ printf(" UMask: 0%03o\n", hr->umask);
+
+ if (nice_is_valid(hr->nice_level))
+ printf(" Nice: %i\n", hr->nice_level);
+
+ for (int j = 0; j < _RLIMIT_MAX; j++) {
+ if (hr->rlimits[j])
+ printf(" Limit: RLIMIT_%s=%" PRIu64 ":%" PRIu64 "\n",
+ rlimit_to_string(j), (uint64_t) hr->rlimits[j]->rlim_cur, (uint64_t) hr->rlimits[j]->rlim_max);
+ }
+
+ if (hr->tasks_max != UINT64_MAX)
+ printf(" Tasks Max: %" PRIu64 "\n", hr->tasks_max);
+
+ if (hr->memory_high != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf(" Memory High: %s\n", format_bytes(buf, sizeof(buf), hr->memory_high));
+ }
+
+ if (hr->memory_max != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf(" Memory Max: %s\n", format_bytes(buf, sizeof(buf), hr->memory_max));
+ }
+
+ if (hr->cpu_weight != UINT64_MAX)
+ printf(" CPU Weight: %" PRIu64 "\n", hr->cpu_weight);
+
+ if (hr->io_weight != UINT64_MAX)
+ printf(" IO Weight: %" PRIu64 "\n", hr->io_weight);
+
+ if (hr->access_mode != MODE_INVALID)
+ printf(" Access Mode: 0%03oo\n", user_record_access_mode(hr));
+
+ if (storage == USER_LUKS) {
+ printf("LUKS Discard: online=%s offline=%s\n", yes_no(user_record_luks_discard(hr)), yes_no(user_record_luks_offline_discard(hr)));
+
+ if (!sd_id128_is_null(hr->luks_uuid))
+ printf(" LUKS UUID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(hr->luks_uuid));
+ if (!sd_id128_is_null(hr->partition_uuid))
+ printf(" Part UUID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(hr->partition_uuid));
+ if (!sd_id128_is_null(hr->file_system_uuid))
+ printf(" FS UUID: " SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(hr->file_system_uuid));
+
+ if (hr->file_system_type)
+ printf(" File System: %s\n", user_record_file_system_type(hr));
+
+ if (hr->luks_cipher)
+ printf(" LUKS Cipher: %s\n", hr->luks_cipher);
+ if (hr->luks_cipher_mode)
+ printf(" Cipher Mode: %s\n", hr->luks_cipher_mode);
+ if (hr->luks_volume_key_size != UINT64_MAX)
+ printf(" Volume Key: %" PRIu64 "bit\n", hr->luks_volume_key_size * 8);
+
+ if (hr->luks_pbkdf_type)
+ printf(" PBKDF Type: %s\n", hr->luks_pbkdf_type);
+ if (hr->luks_pbkdf_hash_algorithm)
+ printf(" PBKDF Hash: %s\n", hr->luks_pbkdf_hash_algorithm);
+ if (hr->luks_pbkdf_time_cost_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" PBKDF Time: %s\n", format_timespan(buf, sizeof(buf), hr->luks_pbkdf_time_cost_usec, 0));
+ }
+ if (hr->luks_pbkdf_memory_cost != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf(" PBKDF Bytes: %s\n", format_bytes(buf, sizeof(buf), hr->luks_pbkdf_memory_cost));
+ }
+ if (hr->luks_pbkdf_parallel_threads != UINT64_MAX)
+ printf("PBKDF Thread: %" PRIu64 "\n", hr->luks_pbkdf_parallel_threads);
+
+ } else if (storage == USER_CIFS) {
+
+ if (hr->cifs_service)
+ printf("CIFS Service: %s\n", hr->cifs_service);
+ }
+
+ if (hr->cifs_user_name)
+ printf(" CIFS User: %s\n", user_record_cifs_user_name(hr));
+ if (hr->cifs_domain)
+ printf(" CIFS Domain: %s\n", hr->cifs_domain);
+
+ if (storage != USER_CLASSIC)
+ printf(" Mount Flags: %s %s %s\n",
+ hr->nosuid ? "nosuid" : "suid",
+ hr->nodev ? "nodev" : "dev",
+ hr->noexec ? "noexec" : "exec");
+
+ if (hr->skeleton_directory)
+ printf(" Skel. Dir.: %s\n", user_record_skeleton_directory(hr));
+
+ if (hr->disk_size != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf(" Disk Size: %s\n", format_bytes(buf, sizeof(buf), hr->disk_size));
+ }
+
+ if (hr->disk_usage != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+
+ if (hr->disk_size != UINT64_MAX) {
+ unsigned permille;
+
+ permille = (unsigned) DIV_ROUND_UP(hr->disk_usage * 1000U, hr->disk_size); /* Round up! */
+ printf(" Disk Usage: %s (= %u.%01u%%)\n",
+ format_bytes(buf, sizeof(buf), hr->disk_usage),
+ permille / 10, permille % 10);
+ } else
+ printf(" Disk Usage: %s\n", format_bytes(buf, sizeof(buf), hr->disk_usage));
+ }
+
+ if (hr->disk_free != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+
+ if (hr->disk_size != UINT64_MAX) {
+ const char *color_on, *color_off;
+ unsigned permille;
+
+ permille = (unsigned) ((hr->disk_free * 1000U) / hr->disk_size); /* Round down! */
+
+ /* Color the output red or yellow if we are below 10% resp. 25% free. Because 10% and
+ * 25% can be a lot of space still, let's additionally make some absolute
+ * restrictions: 1G and 2G */
+ if (permille <= 100U &&
+ hr->disk_free < 1024U*1024U*1024U /* 1G */) {
+ color_on = ansi_highlight_red();
+ color_off = ansi_normal();
+ } else if (permille <= 250U &&
+ hr->disk_free < 2U*1024U*1024U*1024U /* 2G */) {
+ color_on = ansi_highlight_yellow();
+ color_off = ansi_normal();
+ } else
+ color_on = color_off = "";
+
+ printf(" Disk Free: %s%s (= %u.%01u%%)%s\n",
+ color_on,
+ format_bytes(buf, sizeof(buf), hr->disk_free),
+ permille / 10, permille % 10,
+ color_off);
+ } else
+ printf(" Disk Free: %s\n", format_bytes(buf, sizeof(buf), hr->disk_free));
+ }
+
+ if (hr->disk_floor != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf(" Disk Floor: %s\n", format_bytes(buf, sizeof(buf), hr->disk_floor));
+ }
+
+ if (hr->disk_ceiling != UINT64_MAX) {
+ char buf[FORMAT_BYTES_MAX];
+ printf("Disk Ceiling: %s\n", format_bytes(buf, sizeof(buf), hr->disk_ceiling));
+ }
+
+ if (hr->good_authentication_counter != UINT64_MAX)
+ printf(" Good Auth.: %" PRIu64 "\n", hr->good_authentication_counter);
+
+ if (hr->last_good_authentication_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Last Good: %s\n", format_timestamp(buf, sizeof(buf), hr->last_good_authentication_usec));
+ }
+
+ if (hr->bad_authentication_counter != UINT64_MAX)
+ printf(" Bad Auth.: %" PRIu64 "\n", hr->bad_authentication_counter);
+
+ if (hr->last_bad_authentication_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Last Bad: %s\n", format_timestamp(buf, sizeof(buf), hr->last_bad_authentication_usec));
+ }
+
+ t = user_record_ratelimit_next_try(hr);
+ if (t != USEC_INFINITY) {
+ usec_t n = now(CLOCK_REALTIME);
+
+ if (t <= n)
+ printf(" Next Try: anytime\n");
+ else {
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" Next Try: %sin %s%s\n",
+ ansi_highlight_red(),
+ format_timespan(buf, sizeof(buf), t - n, USEC_PER_SEC),
+ ansi_normal());
+ }
+ }
+
+ if (storage != USER_CLASSIC) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" Auth. Limit: %" PRIu64 " attempts per %s\n", user_record_ratelimit_burst(hr),
+ format_timespan(buf, sizeof(buf), user_record_ratelimit_interval_usec(hr), 0));
+ }
+
+ if (hr->enforce_password_policy >= 0)
+ printf(" Passwd Pol.: %s\n", yes_no(hr->enforce_password_policy));
+
+ if (hr->password_change_min_usec != UINT64_MAX ||
+ hr->password_change_max_usec != UINT64_MAX ||
+ hr->password_change_warn_usec != UINT64_MAX ||
+ hr->password_change_inactive_usec != UINT64_MAX) {
+
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" Passwd Chg.:");
+
+ if (hr->password_change_min_usec != UINT64_MAX) {
+ printf(" min %s", format_timespan(buf, sizeof(buf), hr->password_change_min_usec, 0));
+
+ if (hr->password_change_max_usec != UINT64_MAX)
+ printf(" …");
+ }
+
+ if (hr->password_change_max_usec != UINT64_MAX)
+ printf(" max %s", format_timespan(buf, sizeof(buf), hr->password_change_max_usec, 0));
+
+ if (hr->password_change_warn_usec != UINT64_MAX)
+ printf("/warn %s", format_timespan(buf, sizeof(buf), hr->password_change_warn_usec, 0));
+
+ if (hr->password_change_inactive_usec != UINT64_MAX)
+ printf("/inactive %s", format_timespan(buf, sizeof(buf), hr->password_change_inactive_usec, 0));
+
+ printf("\n");
+ }
+
+ if (hr->password_change_now >= 0)
+ printf("Pas. Ch. Now: %s\n", yes_no(hr->password_change_now));
+
+ if (!strv_isempty(hr->ssh_authorized_keys))
+ printf("SSH Pub. Key: %zu\n", strv_length(hr->ssh_authorized_keys));
+
+ if (!strv_isempty(hr->pkcs11_token_uri)) {
+ char **i;
+
+ STRV_FOREACH(i, hr->pkcs11_token_uri)
+ printf(i == hr->pkcs11_token_uri ?
+ "PKCS11 Token: %s\n" :
+ " %s\n", *i);
+ }
+
+ if (hr->n_fido2_hmac_credential > 0)
+ printf(" FIDO2 Token: %zu\n", hr->n_fido2_hmac_credential);
+
+ if (!strv_isempty(hr->recovery_key_type))
+ printf("Recovery Key: %zu\n", strv_length(hr->recovery_key_type));
+
+ k = strv_length(hr->hashed_password);
+ if (k == 0)
+ printf(" Passwords: %snone%s\n",
+ user_record_disposition(hr) == USER_REGULAR ? ansi_highlight_yellow() : ansi_normal(), ansi_normal());
+ else
+ printf(" Passwords: %zu\n", k);
+
+ if (hr->signed_locally >= 0)
+ printf(" Local Sig.: %s\n", yes_no(hr->signed_locally));
+
+ if (hr->stop_delay_usec != UINT64_MAX) {
+ char buf[FORMAT_TIMESPAN_MAX];
+ printf(" Stop Delay: %s\n", format_timespan(buf, sizeof(buf), hr->stop_delay_usec, 0));
+ }
+
+ if (hr->auto_login >= 0)
+ printf("Autom. Login: %s\n", yes_no(hr->auto_login));
+
+ if (hr->kill_processes >= 0)
+ printf(" Kill Proc.: %s\n", yes_no(hr->kill_processes));
+
+ if (hr->service)
+ printf(" Service: %s\n", hr->service);
+}
+
+void group_record_show(GroupRecord *gr, bool show_full_user_info) {
+ int r;
+
+ printf(" Group name: %s\n",
+ group_record_group_name_and_realm(gr));
+
+ printf(" Disposition: %s\n", user_disposition_to_string(group_record_disposition(gr)));
+
+ if (gr->last_change_usec != USEC_INFINITY) {
+ char buf[FORMAT_TIMESTAMP_MAX];
+ printf(" Last Change: %s\n", format_timestamp(buf, sizeof(buf), gr->last_change_usec));
+ }
+
+ if (gid_is_valid(gr->gid))
+ printf(" GID: " GID_FMT "\n", gr->gid);
+
+ if (show_full_user_info) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+
+ r = membershipdb_by_group(gr->group_name, 0, &iterator);
+ if (r < 0) {
+ errno = -r;
+ printf(" Members: (can't acquire: %m)");
+ } else {
+ const char *prefix = " Members:";
+
+ for (;;) {
+ _cleanup_free_ char *user = NULL;
+
+ r = membershipdb_iterator_get(iterator, &user, NULL);
+ if (r == -ESRCH)
+ break;
+ if (r < 0) {
+ errno = -r;
+ printf("%s (can't iterate: %m\n", prefix);
+ break;
+ }
+
+ printf("%s %s\n", prefix, user);
+ prefix = " ";
+ }
+ }
+ } else {
+ const char *prefix = " Members:";
+ char **i;
+
+ STRV_FOREACH(i, gr->members) {
+ printf("%s %s\n", prefix, *i);
+ prefix = " ";
+ }
+ }
+
+ if (!strv_isempty(gr->administrators)) {
+ const char *prefix = " Admins:";
+ char **i;
+
+ STRV_FOREACH(i, gr->administrators) {
+ printf("%s %s\n", prefix, *i);
+ prefix = " ";
+ }
+ }
+
+ if (gr->description && !streq(gr->description, gr->group_name))
+ printf(" Description: %s\n", gr->description);
+
+ if (!strv_isempty(gr->hashed_password))
+ printf(" Passwords: %zu\n", strv_length(gr->hashed_password));
+
+ if (gr->service)
+ printf(" Service: %s\n", gr->service);
+}
diff --git a/src/shared/user-record-show.h b/src/shared/user-record-show.h
new file mode 100644
index 0000000..dcef065
--- /dev/null
+++ b/src/shared/user-record-show.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "user-record.h"
+#include "group-record.h"
+
+const char *user_record_state_color(const char *state);
+
+void user_record_show(UserRecord *hr, bool show_full_group_info);
+void group_record_show(GroupRecord *gr, bool show_full_user_info);
diff --git a/src/shared/user-record.c b/src/shared/user-record.c
new file mode 100644
index 0000000..6c48c56
--- /dev/null
+++ b/src/shared/user-record.c
@@ -0,0 +1,2272 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mount.h>
+
+#include "cgroup-util.h"
+#include "dns-domain.h"
+#include "env-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "hexdecoct.h"
+#include "hostname-util.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "pkcs11-util.h"
+#include "rlimit-util.h"
+#include "stat-util.h"
+#include "string-table.h"
+#include "strv.h"
+#include "user-record.h"
+#include "user-util.h"
+
+#define DEFAULT_RATELIMIT_BURST 30
+#define DEFAULT_RATELIMIT_INTERVAL_USEC (1*USEC_PER_MINUTE)
+
+#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES
+static int parse_alloc_uid(const char *path, const char *name, const char *t, uid_t *ret_uid) {
+ uid_t uid;
+ int r;
+
+ r = parse_uid(t, &uid);
+ if (r < 0)
+ return log_debug_errno(r, "%s: failed to parse %s %s, ignoring: %m", path, name, t);
+ if (uid == 0)
+ uid = 1;
+
+ *ret_uid = uid;
+ return 0;
+}
+#endif
+
+int read_login_defs(UGIDAllocationRange *ret_defs, const char *path, const char *root) {
+ UGIDAllocationRange defs = {
+ .system_alloc_uid_min = SYSTEM_ALLOC_UID_MIN,
+ .system_uid_max = SYSTEM_UID_MAX,
+ .system_alloc_gid_min = SYSTEM_ALLOC_GID_MIN,
+ .system_gid_max = SYSTEM_GID_MAX,
+ };
+
+#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES
+ _cleanup_fclose_ FILE *f = NULL;
+ int r;
+
+ if (!path)
+ path = "/etc/login.defs";
+
+ r = chase_symlinks_and_fopen_unlocked(path, root, CHASE_PREFIX_ROOT, "re", &f, NULL);
+ if (r == -ENOENT)
+ goto assign;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to open %s: %m", path);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *t;
+
+ r = read_line(f, LINE_MAX, &line);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read %s: %m", path);
+ if (r == 0)
+ break;
+
+ if ((t = first_word(line, "SYS_UID_MIN")))
+ (void) parse_alloc_uid(path, "SYS_UID_MIN", t, &defs.system_alloc_uid_min);
+ else if ((t = first_word(line, "SYS_UID_MAX")))
+ (void) parse_alloc_uid(path, "SYS_UID_MAX", t, &defs.system_uid_max);
+ else if ((t = first_word(line, "SYS_GID_MIN")))
+ (void) parse_alloc_uid(path, "SYS_GID_MIN", t, &defs.system_alloc_gid_min);
+ else if ((t = first_word(line, "SYS_GID_MAX")))
+ (void) parse_alloc_uid(path, "SYS_GID_MAX", t, &defs.system_gid_max);
+ }
+
+ assign:
+ if (defs.system_alloc_uid_min > defs.system_uid_max) {
+ log_debug("%s: SYS_UID_MIN > SYS_UID_MAX, resetting.", path);
+ defs.system_alloc_uid_min = MIN(defs.system_uid_max - 1, (uid_t) SYSTEM_ALLOC_UID_MIN);
+ /* Look at sys_uid_max to make sure sys_uid_min..sys_uid_max remains a valid range. */
+ }
+ if (defs.system_alloc_gid_min > defs.system_gid_max) {
+ log_debug("%s: SYS_GID_MIN > SYS_GID_MAX, resetting.", path);
+ defs.system_alloc_gid_min = MIN(defs.system_gid_max - 1, (gid_t) SYSTEM_ALLOC_GID_MIN);
+ /* Look at sys_gid_max to make sure sys_gid_min..sys_gid_max remains a valid range. */
+ }
+#endif
+
+ *ret_defs = defs;
+ return 0;
+}
+
+const UGIDAllocationRange *acquire_ugid_allocation_range(void) {
+#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES
+ static thread_local UGIDAllocationRange defs = {
+#else
+ static const UGIDAllocationRange defs = {
+#endif
+ .system_alloc_uid_min = SYSTEM_ALLOC_UID_MIN,
+ .system_uid_max = SYSTEM_UID_MAX,
+ .system_alloc_gid_min = SYSTEM_ALLOC_GID_MIN,
+ .system_gid_max = SYSTEM_GID_MAX,
+ };
+
+#if ENABLE_COMPAT_MUTABLE_UID_BOUNDARIES
+ /* This function will ignore failure to read the file, so it should only be called from places where
+ * we don't crucially depend on the answer. In other words, it's appropriate for journald, but
+ * probably not for sysusers. */
+
+ static thread_local bool initialized = false;
+
+ if (!initialized) {
+ (void) read_login_defs(&defs, NULL, NULL);
+ initialized = true;
+ }
+#endif
+
+ return &defs;
+}
+
+bool uid_is_system(uid_t uid) {
+ const UGIDAllocationRange *defs;
+ assert_se(defs = acquire_ugid_allocation_range());
+
+ return uid <= defs->system_uid_max;
+}
+
+bool gid_is_system(gid_t gid) {
+ const UGIDAllocationRange *defs;
+ assert_se(defs = acquire_ugid_allocation_range());
+
+ return gid <= defs->system_gid_max;
+}
+
+UserRecord* user_record_new(void) {
+ UserRecord *h;
+
+ h = new(UserRecord, 1);
+ if (!h)
+ return NULL;
+
+ *h = (UserRecord) {
+ .n_ref = 1,
+ .disposition = _USER_DISPOSITION_INVALID,
+ .last_change_usec = UINT64_MAX,
+ .last_password_change_usec = UINT64_MAX,
+ .umask = MODE_INVALID,
+ .nice_level = INT_MAX,
+ .not_before_usec = UINT64_MAX,
+ .not_after_usec = UINT64_MAX,
+ .locked = -1,
+ .storage = _USER_STORAGE_INVALID,
+ .access_mode = MODE_INVALID,
+ .disk_size = UINT64_MAX,
+ .disk_size_relative = UINT64_MAX,
+ .tasks_max = UINT64_MAX,
+ .memory_high = UINT64_MAX,
+ .memory_max = UINT64_MAX,
+ .cpu_weight = UINT64_MAX,
+ .io_weight = UINT64_MAX,
+ .uid = UID_INVALID,
+ .gid = GID_INVALID,
+ .nodev = true,
+ .nosuid = true,
+ .luks_discard = -1,
+ .luks_offline_discard = -1,
+ .luks_volume_key_size = UINT64_MAX,
+ .luks_pbkdf_time_cost_usec = UINT64_MAX,
+ .luks_pbkdf_memory_cost = UINT64_MAX,
+ .luks_pbkdf_parallel_threads = UINT64_MAX,
+ .disk_usage = UINT64_MAX,
+ .disk_free = UINT64_MAX,
+ .disk_ceiling = UINT64_MAX,
+ .disk_floor = UINT64_MAX,
+ .signed_locally = -1,
+ .good_authentication_counter = UINT64_MAX,
+ .bad_authentication_counter = UINT64_MAX,
+ .last_good_authentication_usec = UINT64_MAX,
+ .last_bad_authentication_usec = UINT64_MAX,
+ .ratelimit_begin_usec = UINT64_MAX,
+ .ratelimit_count = UINT64_MAX,
+ .ratelimit_interval_usec = UINT64_MAX,
+ .ratelimit_burst = UINT64_MAX,
+ .removable = -1,
+ .enforce_password_policy = -1,
+ .auto_login = -1,
+ .stop_delay_usec = UINT64_MAX,
+ .kill_processes = -1,
+ .password_change_min_usec = UINT64_MAX,
+ .password_change_max_usec = UINT64_MAX,
+ .password_change_warn_usec = UINT64_MAX,
+ .password_change_inactive_usec = UINT64_MAX,
+ .password_change_now = -1,
+ .pkcs11_protected_authentication_path_permitted = -1,
+ .fido2_user_presence_permitted = -1,
+ };
+
+ return h;
+}
+
+static void pkcs11_encrypted_key_done(Pkcs11EncryptedKey *k) {
+ if (!k)
+ return;
+
+ free(k->uri);
+ erase_and_free(k->data);
+ erase_and_free(k->hashed_password);
+}
+
+static void fido2_hmac_credential_done(Fido2HmacCredential *c) {
+ if (!c)
+ return;
+
+ free(c->id);
+}
+
+static void fido2_hmac_salt_done(Fido2HmacSalt *s) {
+ if (!s)
+ return;
+
+ fido2_hmac_credential_done(&s->credential);
+ erase_and_free(s->salt);
+ erase_and_free(s->hashed_password);
+}
+
+static void recovery_key_done(RecoveryKey *k) {
+ if (!k)
+ return;
+
+ free(k->type);
+ erase_and_free(k->hashed_password);
+}
+
+static UserRecord* user_record_free(UserRecord *h) {
+ if (!h)
+ return NULL;
+
+ free(h->user_name);
+ free(h->realm);
+ free(h->user_name_and_realm_auto);
+ free(h->real_name);
+ free(h->email_address);
+ erase_and_free(h->password_hint);
+ free(h->location);
+ free(h->icon_name);
+
+ free(h->shell);
+
+ strv_free(h->environment);
+ free(h->time_zone);
+ free(h->preferred_language);
+ rlimit_free_all(h->rlimits);
+
+ free(h->skeleton_directory);
+
+ strv_free_erase(h->hashed_password);
+ strv_free_erase(h->ssh_authorized_keys);
+ strv_free_erase(h->password);
+ strv_free_erase(h->token_pin);
+
+ free(h->cifs_service);
+ free(h->cifs_user_name);
+ free(h->cifs_domain);
+
+ free(h->image_path);
+ free(h->image_path_auto);
+ free(h->home_directory);
+ free(h->home_directory_auto);
+
+ strv_free(h->member_of);
+
+ free(h->file_system_type);
+ free(h->luks_cipher);
+ free(h->luks_cipher_mode);
+ free(h->luks_pbkdf_hash_algorithm);
+ free(h->luks_pbkdf_type);
+
+ free(h->state);
+ free(h->service);
+
+ strv_free(h->pkcs11_token_uri);
+ for (size_t i = 0; i < h->n_pkcs11_encrypted_key; i++)
+ pkcs11_encrypted_key_done(h->pkcs11_encrypted_key + i);
+ free(h->pkcs11_encrypted_key);
+
+ for (size_t i = 0; i < h->n_fido2_hmac_credential; i++)
+ fido2_hmac_credential_done(h->fido2_hmac_credential + i);
+ for (size_t i = 0; i < h->n_fido2_hmac_salt; i++)
+ fido2_hmac_salt_done(h->fido2_hmac_salt + i);
+
+ strv_free(h->recovery_key_type);
+ for (size_t i = 0; i < h->n_recovery_key; i++)
+ recovery_key_done(h->recovery_key + i);
+
+ json_variant_unref(h->json);
+
+ return mfree(h);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(UserRecord, user_record, user_record_free);
+
+int json_dispatch_realm(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ r = dns_name_is_valid(n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to check if JSON field '%s' is a valid DNS domain.", strna(name));
+ if (r == 0)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid DNS domain.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+int json_dispatch_gecos(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (valid_gecos(n)) {
+ if (free_and_strdup(s, n) < 0)
+ return json_log_oom(variant, flags);
+ } else {
+ _cleanup_free_ char *m = NULL;
+
+ json_log(variant, flags|JSON_DEBUG, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid GECOS compatible string, mangling.", strna(name));
+
+ m = mangle_gecos(n);
+ if (!m)
+ return json_log_oom(variant, flags);
+
+ free_and_replace(*s, m);
+ }
+
+ return 0;
+}
+
+static int json_dispatch_nice(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ int *nl = userdata;
+ intmax_t m;
+
+ if (json_variant_is_null(variant)) {
+ *nl = INT_MAX;
+ return 0;
+ }
+
+ if (!json_variant_is_integer(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ m = json_variant_integer(variant);
+ if (m < PRIO_MIN || m >= PRIO_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not a valid nice level.", strna(name));
+
+ *nl = m;
+ return 0;
+}
+
+static int json_dispatch_rlimit_value(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ rlim_t *ret = userdata;
+
+ if (json_variant_is_null(variant))
+ *ret = RLIM_INFINITY;
+ else if (json_variant_is_unsigned(variant)) {
+ uintmax_t w;
+
+ w = json_variant_unsigned(variant);
+ if (w == RLIM_INFINITY || (uintmax_t) w != json_variant_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "Resource limit value '%s' is out of range.", name);
+
+ *ret = (rlim_t) w;
+ } else
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit value '%s' is not an unsigned integer.", name);
+
+ return 0;
+}
+
+static int json_dispatch_rlimits(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ struct rlimit** limits = userdata;
+ JsonVariant *value;
+ const char *key;
+ int r;
+
+ assert_se(limits);
+
+ if (json_variant_is_null(variant)) {
+ rlimit_free_all(limits);
+ return 0;
+ }
+
+ if (!json_variant_is_object(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name));
+
+ JSON_VARIANT_OBJECT_FOREACH(key, value, variant) {
+ JsonVariant *jcur, *jmax;
+ struct rlimit rl;
+ const char *p;
+ int l;
+
+ p = startswith(key, "RLIMIT_");
+ if (!p)
+ l = -1;
+ else
+ l = rlimit_from_string(p);
+ if (l < 0)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' not known.", key);
+
+ if (!json_variant_is_object(value))
+ return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' has invalid value.", key);
+
+ if (json_variant_elements(value) != 4)
+ return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' value is does not have two fields as expected.", key);
+
+ jcur = json_variant_by_key(value, "cur");
+ if (!jcur)
+ return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' lacks 'cur' field.", key);
+ r = json_dispatch_rlimit_value("cur", jcur, flags, &rl.rlim_cur);
+ if (r < 0)
+ return r;
+
+ jmax = json_variant_by_key(value, "max");
+ if (!jmax)
+ return json_log(value, flags, SYNTHETIC_ERRNO(EINVAL), "Resource limit '%s' lacks 'max' field.", key);
+ r = json_dispatch_rlimit_value("max", jmax, flags, &rl.rlim_max);
+ if (r < 0)
+ return r;
+
+ if (limits[l])
+ *(limits[l]) = rl;
+ else {
+ limits[l] = newdup(struct rlimit, &rl, 1);
+ if (!limits[l])
+ return log_oom();
+ }
+ }
+
+ return 0;
+}
+
+static int json_dispatch_filename_or_path(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ assert(s);
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (!filename_is_valid(n) && !path_is_normalized(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid file name or normalized path.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+static int json_dispatch_path(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (!path_is_normalized(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a normalized file system path.", strna(name));
+ if (!path_is_absolute(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an absolute file system path.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+static int json_dispatch_home_directory(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (!valid_home(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid home directory path.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+static int json_dispatch_image_path(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (empty_or_root(n) || !path_is_valid(n) || !path_is_absolute(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid image path.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+static int json_dispatch_umask(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ mode_t *m = userdata;
+ uintmax_t k;
+
+ if (json_variant_is_null(variant)) {
+ *m = (mode_t) -1;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a number.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k > 0777)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' outside of valid range 0…0777.", strna(name));
+
+ *m = (mode_t) k;
+ return 0;
+}
+
+static int json_dispatch_access_mode(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ mode_t *m = userdata;
+ uintmax_t k;
+
+ if (json_variant_is_null(variant)) {
+ *m = (mode_t) -1;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a number.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k > 07777)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' outside of valid range 0…07777.", strna(name));
+
+ *m = (mode_t) k;
+ return 0;
+}
+
+static int json_dispatch_environment(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_strv_free_ char **n = NULL;
+ char ***l = userdata;
+ size_t i;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *l = strv_free(*l);
+ return 0;
+ }
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name));
+
+ for (i = 0; i < json_variant_elements(variant); i++) {
+ _cleanup_free_ char *c = NULL;
+ JsonVariant *e;
+ const char *a;
+
+ e = json_variant_by_index(variant, i);
+ if (!json_variant_is_string(e))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of strings.", strna(name));
+
+ assert_se(a = json_variant_string(e));
+
+ if (!env_assignment_is_valid(a))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of environment variables.", strna(name));
+
+ c = strdup(a);
+ if (!c)
+ return json_log_oom(variant, flags);
+
+ r = strv_env_replace(&n, c);
+ if (r < 0)
+ return json_log_oom(variant, flags);
+
+ c = NULL;
+ }
+
+ strv_free_and_replace(*l, n);
+ return 0;
+}
+
+int json_dispatch_user_disposition(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserDisposition *disposition = userdata, k;
+
+ if (json_variant_is_null(variant)) {
+ *disposition = _USER_DISPOSITION_INVALID;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ k = user_disposition_from_string(json_variant_string(variant));
+ if (k < 0)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Disposition type '%s' not known.", json_variant_string(variant));
+
+ *disposition = k;
+ return 0;
+}
+
+static int json_dispatch_storage(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserStorage *storage = userdata, k;
+
+ if (json_variant_is_null(variant)) {
+ *storage = _USER_STORAGE_INVALID;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ k = user_storage_from_string(json_variant_string(variant));
+ if (k < 0)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "Storage type '%s' not known.", json_variant_string(variant));
+
+ *storage = k;
+ return 0;
+}
+
+static int json_dispatch_disk_size(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uint64_t *size = userdata;
+ uintmax_t k;
+
+ if (json_variant_is_null(variant)) {
+ *size = UINT64_MAX;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an integer.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k < USER_DISK_SIZE_MIN || k > USER_DISK_SIZE_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not in valid range %" PRIu64 "…%" PRIu64 ".", strna(name), USER_DISK_SIZE_MIN, USER_DISK_SIZE_MAX);
+
+ *size = k;
+ return 0;
+}
+
+static int json_dispatch_tasks_or_memory_max(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uint64_t *limit = userdata;
+ uintmax_t k;
+
+ if (json_variant_is_null(variant)) {
+ *limit = UINT64_MAX;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a integer.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k <= 0 || k >= UINT64_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not in valid range %" PRIu64 "…%" PRIu64 ".", strna(name), (uint64_t) 1, UINT64_MAX-1);
+
+ *limit = k;
+ return 0;
+}
+
+static int json_dispatch_weight(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ uint64_t *weight = userdata;
+ uintmax_t k;
+
+ if (json_variant_is_null(variant)) {
+ *weight = UINT64_MAX;
+ return 0;
+ }
+
+ if (!json_variant_is_unsigned(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a integer.", strna(name));
+
+ k = json_variant_unsigned(variant);
+ if (k <= CGROUP_WEIGHT_MIN || k >= CGROUP_WEIGHT_MAX)
+ return json_log(variant, flags, SYNTHETIC_ERRNO(ERANGE), "JSON field '%s' is not in valid range %" PRIu64 "…%" PRIu64 ".", strna(name), (uint64_t) CGROUP_WEIGHT_MIN, (uint64_t) CGROUP_WEIGHT_MAX);
+
+ *weight = k;
+ return 0;
+}
+
+int json_dispatch_user_group_list(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_strv_free_ char **l = NULL;
+ char ***list = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of strings.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+
+ if (!json_variant_is_string(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string.");
+
+ if (!valid_user_group_name(json_variant_string(e), FLAGS_SET(flags, JSON_RELAX) ? VALID_USER_RELAX : 0))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a valid user/group name: %s", json_variant_string(e));
+
+ r = strv_extend(&l, json_variant_string(e));
+ if (r < 0)
+ return json_log(e, flags, r, "Failed to append array element: %m");
+ }
+
+ r = strv_extend_strv(list, l, true);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to merge user/group arrays: %m");
+
+ return 0;
+}
+
+static int dispatch_secret(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch secret_dispatch_table[] = {
+ { "password", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, password), 0 },
+ { "tokenPin", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, token_pin), 0 },
+ { "pkcs11Pin", /* legacy alias */ _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, token_pin), 0 },
+ { "pkcs11ProtectedAuthenticationPathPermitted", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, pkcs11_protected_authentication_path_permitted), 0 },
+ { "fido2UserPresencePermitted", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, fido2_user_presence_permitted), 0 },
+ {},
+ };
+
+ return json_dispatch(variant, secret_dispatch_table, NULL, flags, userdata);
+}
+
+static int dispatch_pkcs11_uri(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ char **s = userdata;
+ const char *n;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *s = mfree(*s);
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ n = json_variant_string(variant);
+ if (!pkcs11_uri_valid(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid RFC7512 PKCS#11 URI.", strna(name));
+
+ r = free_and_strdup(s, n);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to allocate string: %m");
+
+ return 0;
+}
+
+static int dispatch_pkcs11_uri_array(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ _cleanup_strv_free_ char **z = NULL;
+ char ***l = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ *l = strv_free(*l);
+ return 0;
+ }
+
+ if (json_variant_is_string(variant)) {
+ const char *n;
+
+ n = json_variant_string(variant);
+ if (!pkcs11_uri_valid(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a valid RFC7512 PKCS#11 URI.", strna(name));
+
+ z = strv_new(n);
+ if (!z)
+ return log_oom();
+
+ } else {
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string or array of strings.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ const char *n;
+
+ if (!json_variant_is_string(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string.");
+
+ n = json_variant_string(e);
+ if (!pkcs11_uri_valid(n))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element in '%s' is not a valid RFC7512 PKCS#11 URI: %s", strna(name), n);
+
+ r = strv_extend(&z, n);
+ if (r < 0)
+ return log_oom();
+ }
+ }
+
+ strv_free_and_replace(*l, z);
+ return 0;
+}
+
+static int dispatch_pkcs11_key_data(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ Pkcs11EncryptedKey *k = userdata;
+ size_t l;
+ void *b;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ k->data = erase_and_free(k->data);
+ k->size = 0;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ r = unbase64mem(json_variant_string(variant), (size_t) -1, &b, &l);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to decode encrypted PKCS#11 key: %m");
+
+ erase_and_free(k->data);
+ k->data = b;
+ k->size = l;
+
+ return 0;
+}
+
+static int dispatch_pkcs11_key(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserRecord *h = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ Pkcs11EncryptedKey *array, *k;
+
+ static const JsonDispatch pkcs11_key_dispatch_table[] = {
+ { "uri", JSON_VARIANT_STRING, dispatch_pkcs11_uri, offsetof(Pkcs11EncryptedKey, uri), JSON_MANDATORY },
+ { "data", JSON_VARIANT_STRING, dispatch_pkcs11_key_data, 0, JSON_MANDATORY },
+ { "hashedPassword", JSON_VARIANT_STRING, json_dispatch_string, offsetof(Pkcs11EncryptedKey, hashed_password), JSON_MANDATORY },
+ {},
+ };
+
+ if (!json_variant_is_object(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not an object.");
+
+ array = reallocarray(h->pkcs11_encrypted_key, h->n_pkcs11_encrypted_key + 1, sizeof(Pkcs11EncryptedKey));
+ if (!array)
+ return log_oom();
+
+ h->pkcs11_encrypted_key = array;
+ k = h->pkcs11_encrypted_key + h->n_pkcs11_encrypted_key;
+ *k = (Pkcs11EncryptedKey) {};
+
+ r = json_dispatch(e, pkcs11_key_dispatch_table, NULL, flags, k);
+ if (r < 0) {
+ pkcs11_encrypted_key_done(k);
+ return r;
+ }
+
+ h->n_pkcs11_encrypted_key++;
+ }
+
+ return 0;
+}
+
+static int dispatch_fido2_hmac_credential(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ Fido2HmacCredential *k = userdata;
+ size_t l;
+ void *b;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ k->id = mfree(k->id);
+ k->size = 0;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ r = unbase64mem(json_variant_string(variant), (size_t) -1, &b, &l);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to decode FIDO2 credential ID: %m");
+
+ free_and_replace(k->id, b);
+ k->size = l;
+
+ return 0;
+}
+
+static int dispatch_fido2_hmac_credential_array(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserRecord *h = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of strings.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ Fido2HmacCredential *array;
+ size_t l;
+ void *b;
+
+ if (!json_variant_is_string(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not a string.");
+
+ array = reallocarray(h->fido2_hmac_credential, h->n_fido2_hmac_credential + 1, sizeof(Fido2HmacCredential));
+ if (!array)
+ return log_oom();
+
+ r = unbase64mem(json_variant_string(e), (size_t) -1, &b, &l);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to decode FIDO2 credential ID: %m");
+
+ h->fido2_hmac_credential = array;
+
+ h->fido2_hmac_credential[h->n_fido2_hmac_credential++] = (Fido2HmacCredential) {
+ .id = b,
+ .size = l,
+ };
+ }
+
+ return 0;
+}
+
+static int dispatch_fido2_hmac_salt_value(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ Fido2HmacSalt *k = userdata;
+ size_t l;
+ void *b;
+ int r;
+
+ if (json_variant_is_null(variant)) {
+ k->salt = erase_and_free(k->salt);
+ k->salt_size = 0;
+ return 0;
+ }
+
+ if (!json_variant_is_string(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not a string.", strna(name));
+
+ r = unbase64mem(json_variant_string(variant), (size_t) -1, &b, &l);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to decode FIDO2 salt: %m");
+
+ erase_and_free(k->salt);
+ k->salt = b;
+ k->salt_size = l;
+
+ return 0;
+}
+
+static int dispatch_fido2_hmac_salt(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserRecord *h = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ Fido2HmacSalt *array, *k;
+
+ static const JsonDispatch fido2_hmac_salt_dispatch_table[] = {
+ { "credential", JSON_VARIANT_STRING, dispatch_fido2_hmac_credential, offsetof(Fido2HmacSalt, credential), JSON_MANDATORY },
+ { "salt", JSON_VARIANT_STRING, dispatch_fido2_hmac_salt_value, 0, JSON_MANDATORY },
+ { "hashedPassword", JSON_VARIANT_STRING, json_dispatch_string, offsetof(Fido2HmacSalt, hashed_password), JSON_MANDATORY },
+ {},
+ };
+
+ if (!json_variant_is_object(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not an object.");
+
+ array = reallocarray(h->fido2_hmac_salt, h->n_fido2_hmac_salt + 1, sizeof(Fido2HmacSalt));
+ if (!array)
+ return log_oom();
+
+ h->fido2_hmac_salt = array;
+ k = h->fido2_hmac_salt + h->n_fido2_hmac_salt;
+ *k = (Fido2HmacSalt) {};
+
+ r = json_dispatch(e, fido2_hmac_salt_dispatch_table, NULL, flags, k);
+ if (r < 0) {
+ fido2_hmac_salt_done(k);
+ return r;
+ }
+
+ h->n_fido2_hmac_salt++;
+ }
+
+ return 0;
+}
+
+static int dispatch_recovery_key(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+ UserRecord *h = userdata;
+ JsonVariant *e;
+ int r;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ RecoveryKey *array, *k;
+
+ static const JsonDispatch recovery_key_dispatch_table[] = {
+ { "type", JSON_VARIANT_STRING, json_dispatch_string, 0, JSON_MANDATORY },
+ { "hashedPassword", JSON_VARIANT_STRING, json_dispatch_string, offsetof(RecoveryKey, hashed_password), JSON_MANDATORY },
+ {},
+ };
+
+ if (!json_variant_is_object(e))
+ return json_log(e, flags, SYNTHETIC_ERRNO(EINVAL), "JSON array element is not an object.");
+
+ array = reallocarray(h->recovery_key, h->n_recovery_key + 1, sizeof(RecoveryKey));
+ if (!array)
+ return log_oom();
+
+ h->recovery_key = array;
+ k = h->recovery_key + h->n_recovery_key;
+ *k = (RecoveryKey) {};
+
+ r = json_dispatch(e, recovery_key_dispatch_table, NULL, flags, k);
+ if (r < 0) {
+ recovery_key_done(k);
+ return r;
+ }
+
+ h->n_recovery_key++;
+ }
+
+ return 0;
+}
+
+static int dispatch_privileged(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch privileged_dispatch_table[] = {
+ { "passwordHint", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, password_hint), 0 },
+ { "hashedPassword", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, hashed_password), JSON_SAFE },
+ { "sshAuthorizedKeys", _JSON_VARIANT_TYPE_INVALID, json_dispatch_strv, offsetof(UserRecord, ssh_authorized_keys), 0 },
+ { "pkcs11EncryptedKey", JSON_VARIANT_ARRAY, dispatch_pkcs11_key, 0, 0 },
+ { "fido2HmacSalt", JSON_VARIANT_ARRAY, dispatch_fido2_hmac_salt, 0, 0 },
+ { "recoveryKey", JSON_VARIANT_ARRAY, dispatch_recovery_key, 0, 0 },
+ {},
+ };
+
+ return json_dispatch(variant, privileged_dispatch_table, NULL, flags, userdata);
+}
+
+static int dispatch_binding(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch binding_dispatch_table[] = {
+ { "imagePath", JSON_VARIANT_STRING, json_dispatch_image_path, offsetof(UserRecord, image_path), 0 },
+ { "homeDirectory", JSON_VARIANT_STRING, json_dispatch_home_directory, offsetof(UserRecord, home_directory), 0 },
+ { "partitionUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, partition_uuid), 0 },
+ { "luksUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, luks_uuid), 0 },
+ { "fileSystemUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, file_system_uuid), 0 },
+ { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, uid), 0 },
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, gid), 0 },
+ { "storage", JSON_VARIANT_STRING, json_dispatch_storage, offsetof(UserRecord, storage), 0 },
+ { "fileSystemType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, file_system_type), JSON_SAFE },
+ { "luksCipher", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher), JSON_SAFE },
+ { "luksCipherMode", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher_mode), JSON_SAFE },
+ { "luksVolumeKeySize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_volume_key_size), 0 },
+ {},
+ };
+
+ char smid[SD_ID128_STRING_MAX];
+ JsonVariant *m;
+ sd_id128_t mid;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_object(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name));
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to determine machine ID: %m");
+
+ m = json_variant_by_key(variant, sd_id128_to_string(mid, smid));
+ if (!m)
+ return 0;
+
+ return json_dispatch(m, binding_dispatch_table, NULL, flags, userdata);
+}
+
+int per_machine_id_match(JsonVariant *ids, JsonDispatchFlags flags) {
+ sd_id128_t mid;
+ int r;
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return json_log(ids, flags, r, "Failed to acquire machine ID: %m");
+
+ if (json_variant_is_string(ids)) {
+ sd_id128_t k;
+
+ r = sd_id128_from_string(json_variant_string(ids), &k);
+ if (r < 0) {
+ json_log(ids, flags, r, "%s is not a valid machine ID, ignoring: %m", json_variant_string(ids));
+ return 0;
+ }
+
+ return sd_id128_equal(mid, k);
+ }
+
+ if (json_variant_is_array(ids)) {
+ JsonVariant *e;
+
+ JSON_VARIANT_ARRAY_FOREACH(e, ids) {
+ sd_id128_t k;
+
+ if (!json_variant_is_string(e)) {
+ json_log(e, flags, 0, "Machine ID is not a string, ignoring: %m");
+ continue;
+ }
+
+ r = sd_id128_from_string(json_variant_string(e), &k);
+ if (r < 0) {
+ json_log(e, flags, r, "%s is not a valid machine ID, ignoring: %m", json_variant_string(e));
+ continue;
+ }
+
+ if (sd_id128_equal(mid, k))
+ return true;
+ }
+
+ return false;
+ }
+
+ json_log(ids, flags, 0, "Machine ID is not a string or array of strings, ignoring: %m");
+ return false;
+}
+
+int per_machine_hostname_match(JsonVariant *hns, JsonDispatchFlags flags) {
+ _cleanup_free_ char *hn = NULL;
+ int r;
+
+ r = gethostname_strict(&hn);
+ if (r == -ENXIO) {
+ json_log(hns, flags, r, "No hostname set, not matching perMachine hostname record: %m");
+ return false;
+ }
+ if (r < 0)
+ return json_log(hns, flags, r, "Failed to acquire hostname: %m");
+
+ if (json_variant_is_string(hns))
+ return streq(json_variant_string(hns), hn);
+
+ if (json_variant_is_array(hns)) {
+ JsonVariant *e;
+
+ JSON_VARIANT_ARRAY_FOREACH(e, hns) {
+
+ if (!json_variant_is_string(e)) {
+ json_log(e, flags, 0, "Hostname is not a string, ignoring: %m");
+ continue;
+ }
+
+ if (streq(json_variant_string(hns), hn))
+ return true;
+ }
+
+ return false;
+ }
+
+ json_log(hns, flags, 0, "Hostname is not a string or array of strings, ignoring: %m");
+ return false;
+}
+
+static int dispatch_per_machine(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch per_machine_dispatch_table[] = {
+ { "matchMachineId", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 },
+ { "matchHostname", _JSON_VARIANT_TYPE_INVALID, NULL, 0, 0 },
+ { "iconName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, icon_name), JSON_SAFE },
+ { "location", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, location), 0 },
+ { "shell", JSON_VARIANT_STRING, json_dispatch_filename_or_path, offsetof(UserRecord, shell), 0 },
+ { "umask", JSON_VARIANT_UNSIGNED, json_dispatch_umask, offsetof(UserRecord, umask), 0 },
+ { "environment", JSON_VARIANT_ARRAY, json_dispatch_environment, offsetof(UserRecord, environment), 0 },
+ { "timeZone", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, time_zone), JSON_SAFE },
+ { "preferredLanguage", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, preferred_language), JSON_SAFE },
+ { "niceLevel", _JSON_VARIANT_TYPE_INVALID, json_dispatch_nice, offsetof(UserRecord, nice_level), 0 },
+ { "resourceLimits", _JSON_VARIANT_TYPE_INVALID, json_dispatch_rlimits, offsetof(UserRecord, rlimits), 0 },
+ { "locked", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, locked), 0 },
+ { "notBeforeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_before_usec), 0 },
+ { "notAfterUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_after_usec), 0 },
+ { "storage", JSON_VARIANT_STRING, json_dispatch_storage, offsetof(UserRecord, storage), 0 },
+ { "diskSize", JSON_VARIANT_UNSIGNED, json_dispatch_disk_size, offsetof(UserRecord, disk_size), 0 },
+ { "diskSizeRelative", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_size_relative), 0 },
+ { "skeletonDirectory", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, skeleton_directory), 0 },
+ { "accessMode", JSON_VARIANT_UNSIGNED, json_dispatch_access_mode, offsetof(UserRecord, access_mode), 0 },
+ { "tasksMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, tasks_max), 0 },
+ { "memoryHigh", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_high), 0 },
+ { "memoryMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_max), 0 },
+ { "cpuWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, cpu_weight), 0 },
+ { "ioWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, io_weight), 0 },
+ { "mountNoDevices", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nodev), 0 },
+ { "mountNoSuid", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nosuid), 0 },
+ { "mountNoExecute", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, noexec), 0 },
+ { "cifsDomain", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_domain), JSON_SAFE },
+ { "cifsUserName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_user_name), JSON_SAFE },
+ { "cifsService", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_service), JSON_SAFE },
+ { "imagePath", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, image_path), 0 },
+ { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, uid), 0 },
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, gid), 0 },
+ { "memberOf", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(UserRecord, member_of), JSON_RELAX},
+ { "fileSystemType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, file_system_type), JSON_SAFE },
+ { "partitionUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, partition_uuid), 0 },
+ { "luksUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, luks_uuid), 0 },
+ { "fileSystemUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, file_system_uuid), 0 },
+ { "luksDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_discard), 0, },
+ { "luksOfflineDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_offline_discard), 0, },
+ { "luksCipher", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher), JSON_SAFE },
+ { "luksCipherMode", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher_mode), JSON_SAFE },
+ { "luksVolumeKeySize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_volume_key_size), 0 },
+ { "luksPbkdfHashAlgorithm", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_hash_algorithm), JSON_SAFE },
+ { "luksPbkdfType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_type), JSON_SAFE },
+ { "luksPbkdfTimeCostUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_time_cost_usec), 0 },
+ { "luksPbkdfMemoryCost", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_memory_cost), 0 },
+ { "luksPbkdfParallelThreads", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_parallel_threads), 0 },
+ { "rateLimitIntervalUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_interval_usec), 0 },
+ { "rateLimitBurst", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_burst), 0 },
+ { "enforcePasswordPolicy", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, enforce_password_policy), 0 },
+ { "autoLogin", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, auto_login), 0 },
+ { "stopDelayUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, stop_delay_usec), 0 },
+ { "killProcesses", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, kill_processes), 0 },
+ { "passwordChangeMinUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_min_usec), 0 },
+ { "passwordChangeMaxUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_max_usec), 0 },
+ { "passwordChangeWarnUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_warn_usec), 0 },
+ { "passwordChangeInactiveUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_inactive_usec), 0 },
+ { "passwordChangeNow", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, password_change_now), 0 },
+ { "pkcs11TokenUri", JSON_VARIANT_ARRAY, dispatch_pkcs11_uri_array, offsetof(UserRecord, pkcs11_token_uri), 0 },
+ { "fido2HmacCredential", JSON_VARIANT_ARRAY, dispatch_fido2_hmac_credential_array, 0, 0 },
+ {},
+ };
+
+ JsonVariant *e;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_array(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array.", strna(name));
+
+ JSON_VARIANT_ARRAY_FOREACH(e, variant) {
+ bool matching = false;
+ JsonVariant *m;
+
+ if (!json_variant_is_object(e))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an array of objects.", strna(name));
+
+ m = json_variant_by_key(e, "matchMachineId");
+ if (m) {
+ r = per_machine_id_match(m, flags);
+ if (r < 0)
+ return r;
+
+ matching = r > 0;
+ }
+
+ if (!matching) {
+ m = json_variant_by_key(e, "matchHostname");
+ if (m) {
+ r = per_machine_hostname_match(m, flags);
+ if (r < 0)
+ return r;
+
+ matching = r > 0;
+ }
+ }
+
+ if (!matching)
+ continue;
+
+ r = json_dispatch(e, per_machine_dispatch_table, NULL, flags, userdata);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+static int dispatch_status(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) {
+
+ static const JsonDispatch status_dispatch_table[] = {
+ { "diskUsage", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_usage), 0 },
+ { "diskFree", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_free), 0 },
+ { "diskSize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_size), 0 },
+ { "diskCeiling", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_ceiling), 0 },
+ { "diskFloor", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_floor), 0 },
+ { "state", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, state), JSON_SAFE },
+ { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, service), JSON_SAFE },
+ { "signedLocally", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, signed_locally), 0 },
+ { "goodAuthenticationCounter", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, good_authentication_counter), 0 },
+ { "badAuthenticationCounter", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, bad_authentication_counter), 0 },
+ { "lastGoodAuthenticationUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_good_authentication_usec), 0 },
+ { "lastBadAuthenticationUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_bad_authentication_usec), 0 },
+ { "rateLimitBeginUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_begin_usec), 0 },
+ { "rateLimitCount", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_count), 0 },
+ { "removable", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, removable), 0 },
+ {},
+ };
+
+ char smid[SD_ID128_STRING_MAX];
+ JsonVariant *m;
+ sd_id128_t mid;
+ int r;
+
+ if (!variant)
+ return 0;
+
+ if (!json_variant_is_object(variant))
+ return json_log(variant, flags, SYNTHETIC_ERRNO(EINVAL), "JSON field '%s' is not an object.", strna(name));
+
+ r = sd_id128_get_machine(&mid);
+ if (r < 0)
+ return json_log(variant, flags, r, "Failed to determine machine ID: %m");
+
+ m = json_variant_by_key(variant, sd_id128_to_string(mid, smid));
+ if (!m)
+ return 0;
+
+ return json_dispatch(m, status_dispatch_table, NULL, flags, userdata);
+}
+
+int user_record_build_image_path(UserStorage storage, const char *user_name_and_realm, char **ret) {
+ const char *suffix;
+ char *z;
+
+ assert(storage >= 0);
+ assert(user_name_and_realm);
+ assert(ret);
+
+ if (storage == USER_LUKS)
+ suffix = ".home";
+ else if (IN_SET(storage, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT))
+ suffix = ".homedir";
+ else {
+ *ret = NULL;
+ return 0;
+ }
+
+ z = strjoin("/home/", user_name_and_realm, suffix);
+ if (!z)
+ return -ENOMEM;
+
+ *ret = z;
+ return 1;
+}
+
+static int user_record_augment(UserRecord *h, JsonDispatchFlags json_flags) {
+ int r;
+
+ assert(h);
+
+ if (!FLAGS_SET(h->mask, USER_RECORD_REGULAR))
+ return 0;
+
+ assert(h->user_name);
+
+ if (!h->user_name_and_realm_auto && h->realm) {
+ h->user_name_and_realm_auto = strjoin(h->user_name, "@", h->realm);
+ if (!h->user_name_and_realm_auto)
+ return json_log_oom(h->json, json_flags);
+ }
+
+ /* Let's add in the following automatisms only for regular users, they don't make sense for any others */
+ if (user_record_disposition(h) != USER_REGULAR)
+ return 0;
+
+ if (!h->home_directory && !h->home_directory_auto) {
+ h->home_directory_auto = path_join("/home/", h->user_name);
+ if (!h->home_directory_auto)
+ return json_log_oom(h->json, json_flags);
+ }
+
+ if (!h->image_path && !h->image_path_auto) {
+ r = user_record_build_image_path(user_record_storage(h), user_record_user_name_and_realm(h), &h->image_path_auto);
+ if (r < 0)
+ return json_log(h->json, json_flags, r, "Failed to determine default image path: %m");
+ }
+
+ return 0;
+}
+
+int user_group_record_mangle(
+ JsonVariant *v,
+ UserRecordLoadFlags load_flags,
+ JsonVariant **ret_variant,
+ UserRecordMask *ret_mask) {
+
+ static const struct {
+ UserRecordMask mask;
+ const char *name;
+ } mask_field[] = {
+ { USER_RECORD_PRIVILEGED, "privileged" },
+ { USER_RECORD_SECRET, "secret" },
+ { USER_RECORD_BINDING, "binding" },
+ { USER_RECORD_PER_MACHINE, "perMachine" },
+ { USER_RECORD_STATUS, "status" },
+ { USER_RECORD_SIGNATURE, "signature" },
+ };
+
+ JsonDispatchFlags json_flags = USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(load_flags);
+ _cleanup_(json_variant_unrefp) JsonVariant *w = NULL;
+ JsonVariant *array[ELEMENTSOF(mask_field) * 2];
+ size_t n_retain = 0, i;
+ UserRecordMask m = 0;
+ int r;
+
+ assert((load_flags & _USER_RECORD_MASK_MAX) == 0); /* detect mistakes when accidentally passing
+ * UserRecordMask bit masks as UserRecordLoadFlags
+ * value */
+
+ assert(v);
+ assert(ret_variant);
+ assert(ret_mask);
+
+ /* Note that this function is shared with the group record parser, hence we try to be generic in our
+ * log message wording here, to cover both cases. */
+
+ if (!json_variant_is_object(v))
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record is not a JSON object, refusing.");
+
+ if (USER_RECORD_ALLOW_MASK(load_flags) == 0) /* allow nothing? */
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EINVAL), "Nothing allowed in record, refusing.");
+
+ if (USER_RECORD_STRIP_MASK(load_flags) == _USER_RECORD_MASK_MAX) /* strip everything? */
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EINVAL), "Stripping everything from record, refusing.");
+
+ /* Check if we have the special sections and if they match our flags set */
+ for (i = 0; i < ELEMENTSOF(mask_field); i++) {
+ JsonVariant *e, *k;
+
+ if (FLAGS_SET(USER_RECORD_STRIP_MASK(load_flags), mask_field[i].mask)) {
+ if (!w)
+ w = json_variant_ref(v);
+
+ r = json_variant_filter(&w, STRV_MAKE(mask_field[i].name));
+ if (r < 0)
+ return json_log(w, json_flags, r, "Failed to remove field from variant: %m");
+
+ continue;
+ }
+
+ e = json_variant_by_key_full(v, mask_field[i].name, &k);
+ if (e) {
+ if (!FLAGS_SET(USER_RECORD_ALLOW_MASK(load_flags), mask_field[i].mask))
+ return json_log(e, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record contains '%s' field, which is not allowed.", mask_field[i].name);
+
+ if (FLAGS_SET(load_flags, USER_RECORD_STRIP_REGULAR)) {
+ array[n_retain++] = k;
+ array[n_retain++] = e;
+ }
+
+ m |= mask_field[i].mask;
+ } else {
+ if (FLAGS_SET(USER_RECORD_REQUIRE_MASK(load_flags), mask_field[i].mask))
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record lacks '%s' field, which is required.", mask_field[i].name);
+ }
+ }
+
+ if (FLAGS_SET(load_flags, USER_RECORD_STRIP_REGULAR)) {
+ /* If we are supposed to strip regular items, then let's instead just allocate a new object
+ * with just the stuff we need. */
+
+ w = json_variant_unref(w);
+ r = json_variant_new_object(&w, array, n_retain);
+ if (r < 0)
+ return json_log(v, json_flags, r, "Failed to allocate new object: %m");
+ } else {
+ /* And now check if there's anything else in the record */
+ for (i = 0; i < json_variant_elements(v); i += 2) {
+ const char *f;
+ bool special = false;
+ size_t j;
+
+ assert_se(f = json_variant_string(json_variant_by_index(v, i)));
+
+ for (j = 0; j < ELEMENTSOF(mask_field); j++)
+ if (streq(f, mask_field[j].name)) { /* already covered in the loop above */
+ special = true;
+ continue;
+ }
+
+ if (!special) {
+ if ((load_flags & (USER_RECORD_ALLOW_REGULAR|USER_RECORD_REQUIRE_REGULAR)) == 0)
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record contains '%s' field, which is not allowed.", f);
+
+ m |= USER_RECORD_REGULAR;
+ break;
+ }
+ }
+ }
+
+ if (FLAGS_SET(load_flags, USER_RECORD_REQUIRE_REGULAR) && !FLAGS_SET(m, USER_RECORD_REGULAR))
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record lacks basic identity fields, which are required.");
+
+ if (m == 0)
+ return json_log(v, json_flags, SYNTHETIC_ERRNO(EBADMSG), "Record is empty.");
+
+ if (w)
+ *ret_variant = TAKE_PTR(w);
+ else
+ *ret_variant = json_variant_ref(v);
+
+ *ret_mask = m;
+ return 0;
+}
+
+int user_record_load(UserRecord *h, JsonVariant *v, UserRecordLoadFlags load_flags) {
+
+ static const JsonDispatch user_dispatch_table[] = {
+ { "userName", JSON_VARIANT_STRING, json_dispatch_user_group_name, offsetof(UserRecord, user_name), JSON_RELAX},
+ { "realm", JSON_VARIANT_STRING, json_dispatch_realm, offsetof(UserRecord, realm), 0 },
+ { "realName", JSON_VARIANT_STRING, json_dispatch_gecos, offsetof(UserRecord, real_name), 0 },
+ { "emailAddress", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, email_address), JSON_SAFE },
+ { "iconName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, icon_name), JSON_SAFE },
+ { "location", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, location), 0 },
+ { "disposition", JSON_VARIANT_STRING, json_dispatch_user_disposition, offsetof(UserRecord, disposition), 0 },
+ { "lastChangeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_change_usec), 0 },
+ { "lastPasswordChangeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, last_password_change_usec), 0 },
+ { "shell", JSON_VARIANT_STRING, json_dispatch_filename_or_path, offsetof(UserRecord, shell), 0 },
+ { "umask", JSON_VARIANT_UNSIGNED, json_dispatch_umask, offsetof(UserRecord, umask), 0 },
+ { "environment", JSON_VARIANT_ARRAY, json_dispatch_environment, offsetof(UserRecord, environment), 0 },
+ { "timeZone", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, time_zone), JSON_SAFE },
+ { "preferredLanguage", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, preferred_language), JSON_SAFE },
+ { "niceLevel", _JSON_VARIANT_TYPE_INVALID, json_dispatch_nice, offsetof(UserRecord, nice_level), 0 },
+ { "resourceLimits", _JSON_VARIANT_TYPE_INVALID, json_dispatch_rlimits, offsetof(UserRecord, rlimits), 0 },
+ { "locked", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, locked), 0 },
+ { "notBeforeUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_before_usec), 0 },
+ { "notAfterUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, not_after_usec), 0 },
+ { "storage", JSON_VARIANT_STRING, json_dispatch_storage, offsetof(UserRecord, storage), 0 },
+ { "diskSize", JSON_VARIANT_UNSIGNED, json_dispatch_disk_size, offsetof(UserRecord, disk_size), 0 },
+ { "diskSizeRelative", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, disk_size_relative), 0 },
+ { "skeletonDirectory", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, skeleton_directory), 0 },
+ { "accessMode", JSON_VARIANT_UNSIGNED, json_dispatch_access_mode, offsetof(UserRecord, access_mode), 0 },
+ { "tasksMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, tasks_max), 0 },
+ { "memoryHigh", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_high), 0 },
+ { "memoryMax", JSON_VARIANT_UNSIGNED, json_dispatch_tasks_or_memory_max, offsetof(UserRecord, memory_max), 0 },
+ { "cpuWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, cpu_weight), 0 },
+ { "ioWeight", JSON_VARIANT_UNSIGNED, json_dispatch_weight, offsetof(UserRecord, io_weight), 0 },
+ { "mountNoDevices", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nodev), 0 },
+ { "mountNoSuid", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, nosuid), 0 },
+ { "mountNoExecute", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(UserRecord, noexec), 0 },
+ { "cifsDomain", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_domain), JSON_SAFE },
+ { "cifsUserName", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_user_name), JSON_SAFE },
+ { "cifsService", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, cifs_service), JSON_SAFE },
+ { "imagePath", JSON_VARIANT_STRING, json_dispatch_path, offsetof(UserRecord, image_path), 0 },
+ { "homeDirectory", JSON_VARIANT_STRING, json_dispatch_home_directory, offsetof(UserRecord, home_directory), 0 },
+ { "uid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, uid), 0 },
+ { "gid", JSON_VARIANT_UNSIGNED, json_dispatch_uid_gid, offsetof(UserRecord, gid), 0 },
+ { "memberOf", JSON_VARIANT_ARRAY, json_dispatch_user_group_list, offsetof(UserRecord, member_of), JSON_RELAX},
+ { "fileSystemType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, file_system_type), JSON_SAFE },
+ { "partitionUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, partition_uuid), 0 },
+ { "luksUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, luks_uuid), 0 },
+ { "fileSystemUuid", JSON_VARIANT_STRING, json_dispatch_id128, offsetof(UserRecord, file_system_uuid), 0 },
+ { "luksDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_discard), 0 },
+ { "luksOfflineDiscard", _JSON_VARIANT_TYPE_INVALID, json_dispatch_tristate, offsetof(UserRecord, luks_offline_discard), 0 },
+ { "luksCipher", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher), JSON_SAFE },
+ { "luksCipherMode", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_cipher_mode), JSON_SAFE },
+ { "luksVolumeKeySize", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_volume_key_size), 0 },
+ { "luksPbkdfHashAlgorithm", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_hash_algorithm), JSON_SAFE },
+ { "luksPbkdfType", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, luks_pbkdf_type), JSON_SAFE },
+ { "luksPbkdfTimeCostUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_time_cost_usec), 0 },
+ { "luksPbkdfMemoryCost", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_memory_cost), 0 },
+ { "luksPbkdfParallelThreads", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, luks_pbkdf_parallel_threads), 0 },
+ { "service", JSON_VARIANT_STRING, json_dispatch_string, offsetof(UserRecord, service), JSON_SAFE },
+ { "rateLimitIntervalUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_interval_usec), 0 },
+ { "rateLimitBurst", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, ratelimit_burst), 0 },
+ { "enforcePasswordPolicy", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, enforce_password_policy), 0 },
+ { "autoLogin", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, auto_login), 0 },
+ { "stopDelayUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, stop_delay_usec), 0 },
+ { "killProcesses", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, kill_processes), 0 },
+ { "passwordChangeMinUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_min_usec), 0 },
+ { "passwordChangeMaxUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_max_usec), 0 },
+ { "passwordChangeWarnUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_warn_usec), 0 },
+ { "passwordChangeInactiveUSec", JSON_VARIANT_UNSIGNED, json_dispatch_uint64, offsetof(UserRecord, password_change_inactive_usec), 0 },
+ { "passwordChangeNow", JSON_VARIANT_BOOLEAN, json_dispatch_tristate, offsetof(UserRecord, password_change_now), 0 },
+ { "pkcs11TokenUri", JSON_VARIANT_ARRAY, dispatch_pkcs11_uri_array, offsetof(UserRecord, pkcs11_token_uri), 0 },
+ { "fido2HmacCredential", JSON_VARIANT_ARRAY, dispatch_fido2_hmac_credential_array, 0, 0 },
+ { "recoveryKeyType", JSON_VARIANT_ARRAY, json_dispatch_strv, offsetof(UserRecord, recovery_key_type), 0 },
+
+ { "secret", JSON_VARIANT_OBJECT, dispatch_secret, 0, 0 },
+ { "privileged", JSON_VARIANT_OBJECT, dispatch_privileged, 0, 0 },
+
+ /* Ignore the perMachine, binding, status stuff here, and process it later, so that it overrides whatever is set above */
+ { "perMachine", JSON_VARIANT_ARRAY, NULL, 0, 0 },
+ { "binding", JSON_VARIANT_OBJECT, NULL, 0, 0 },
+ { "status", JSON_VARIANT_OBJECT, NULL, 0, 0 },
+
+ /* Ignore 'signature', we check it with explicit accessors instead */
+ { "signature", JSON_VARIANT_ARRAY, NULL, 0, 0 },
+ {},
+ };
+
+ JsonDispatchFlags json_flags = USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(load_flags);
+ int r;
+
+ assert(h);
+ assert(!h->json);
+
+ /* Note that this call will leave a half-initialized record around on failure! */
+
+ r = user_group_record_mangle(v, load_flags, &h->json, &h->mask);
+ if (r < 0)
+ return r;
+
+ r = json_dispatch(h->json, user_dispatch_table, NULL, json_flags, h);
+ if (r < 0)
+ return r;
+
+ /* During the parsing operation above we ignored the 'perMachine', 'binding' and 'status' fields,
+ * since we want them to override the global options. Let's process them now. */
+
+ r = dispatch_per_machine("perMachine", json_variant_by_key(h->json, "perMachine"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ r = dispatch_binding("binding", json_variant_by_key(h->json, "binding"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ r = dispatch_status("status", json_variant_by_key(h->json, "status"), json_flags, h);
+ if (r < 0)
+ return r;
+
+ if (FLAGS_SET(h->mask, USER_RECORD_REGULAR) && !h->user_name)
+ return json_log(h->json, json_flags, SYNTHETIC_ERRNO(EINVAL), "User name field missing, refusing.");
+
+ r = user_record_augment(h, json_flags);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int user_record_build(UserRecord **ret, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
+ _cleanup_(user_record_unrefp) UserRecord *u = NULL;
+ va_list ap;
+ int r;
+
+ assert(ret);
+
+ va_start(ap, ret);
+ r = json_buildv(&v, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ u = user_record_new();
+ if (!u)
+ return -ENOMEM;
+
+ r = user_record_load(u, v, USER_RECORD_LOAD_FULL);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(u);
+ return 0;
+}
+
+const char *user_record_user_name_and_realm(UserRecord *h) {
+ assert(h);
+
+ /* Return the pre-initialized joined string if it is defined */
+ if (h->user_name_and_realm_auto)
+ return h->user_name_and_realm_auto;
+
+ /* If it's not defined then we cannot have a realm */
+ assert(!h->realm);
+ return h->user_name;
+}
+
+UserStorage user_record_storage(UserRecord *h) {
+ assert(h);
+
+ if (h->storage >= 0)
+ return h->storage;
+
+ return USER_CLASSIC;
+}
+
+const char *user_record_file_system_type(UserRecord *h) {
+ assert(h);
+
+ return h->file_system_type ?: "btrfs";
+}
+
+const char *user_record_skeleton_directory(UserRecord *h) {
+ assert(h);
+
+ return h->skeleton_directory ?: "/etc/skel";
+}
+
+mode_t user_record_access_mode(UserRecord *h) {
+ assert(h);
+
+ return h->access_mode != (mode_t) -1 ? h->access_mode : 0700;
+}
+
+const char* user_record_home_directory(UserRecord *h) {
+ assert(h);
+
+ if (h->home_directory)
+ return h->home_directory;
+ if (h->home_directory_auto)
+ return h->home_directory_auto;
+
+ /* The root user is special, hence be special about it */
+ if (streq_ptr(h->user_name, "root"))
+ return "/root";
+
+ return "/";
+}
+
+const char *user_record_image_path(UserRecord *h) {
+ assert(h);
+
+ if (h->image_path)
+ return h->image_path;
+ if (h->image_path_auto)
+ return h->image_path_auto;
+
+ return IN_SET(user_record_storage(h), USER_CLASSIC, USER_DIRECTORY, USER_SUBVOLUME, USER_FSCRYPT) ? user_record_home_directory(h) : NULL;
+}
+
+const char *user_record_cifs_user_name(UserRecord *h) {
+ assert(h);
+
+ return h->cifs_user_name ?: h->user_name;
+}
+
+unsigned long user_record_mount_flags(UserRecord *h) {
+ assert(h);
+
+ return (h->nosuid ? MS_NOSUID : 0) |
+ (h->noexec ? MS_NOEXEC : 0) |
+ (h->nodev ? MS_NODEV : 0);
+}
+
+const char *user_record_shell(UserRecord *h) {
+ assert(h);
+
+ if (h->shell)
+ return h->shell;
+
+ if (streq_ptr(h->user_name, "root"))
+ return "/bin/sh";
+
+ if (user_record_disposition(h) == USER_REGULAR)
+ return "/bin/bash";
+
+ return NOLOGIN;
+}
+
+const char *user_record_real_name(UserRecord *h) {
+ assert(h);
+
+ return h->real_name ?: h->user_name;
+}
+
+bool user_record_luks_discard(UserRecord *h) {
+ const char *ip;
+
+ assert(h);
+
+ if (h->luks_discard >= 0)
+ return h->luks_discard;
+
+ ip = user_record_image_path(h);
+ if (!ip)
+ return false;
+
+ /* Use discard by default if we are referring to a real block device, but not when operating on a
+ * loopback device. We want to optimize for SSD and flash storage after all, but we should be careful
+ * when storing stuff on top of regular file systems in loopback files as doing discard then would
+ * mean thin provisioning and we should not do that willy-nilly since it means we'll risk EIO later
+ * on should the disk space to back our file systems not be available. */
+
+ return path_startswith(ip, "/dev/");
+}
+
+bool user_record_luks_offline_discard(UserRecord *h) {
+ const char *ip;
+
+ assert(h);
+
+ if (h->luks_offline_discard >= 0)
+ return h->luks_offline_discard;
+
+ /* Discard while we are logged out should generally be a good idea, except when operating directly on
+ * physical media, where we should just bind it to the online discard mode. */
+
+ ip = user_record_image_path(h);
+ if (!ip)
+ return false;
+
+ if (path_startswith(ip, "/dev/"))
+ return user_record_luks_discard(h);
+
+ return true;
+}
+
+const char *user_record_luks_cipher(UserRecord *h) {
+ assert(h);
+
+ return h->luks_cipher ?: "aes";
+}
+
+const char *user_record_luks_cipher_mode(UserRecord *h) {
+ assert(h);
+
+ return h->luks_cipher_mode ?: "xts-plain64";
+}
+
+uint64_t user_record_luks_volume_key_size(UserRecord *h) {
+ assert(h);
+
+ /* We return a value here that can be cast without loss into size_t which is what libcrypsetup expects */
+
+ if (h->luks_volume_key_size == UINT64_MAX)
+ return 256 / 8;
+
+ return MIN(h->luks_volume_key_size, SIZE_MAX);
+}
+
+const char* user_record_luks_pbkdf_type(UserRecord *h) {
+ assert(h);
+
+ return h->luks_pbkdf_type ?: "argon2i";
+}
+
+uint64_t user_record_luks_pbkdf_time_cost_usec(UserRecord *h) {
+ assert(h);
+
+ /* Returns a value with ms granularity, since that's what libcryptsetup expects */
+
+ if (h->luks_pbkdf_time_cost_usec == UINT64_MAX)
+ return 500 * USEC_PER_MSEC; /* We default to 500ms, in contrast to libcryptsetup's 2s, which is just awfully slow on every login */
+
+ return MIN(DIV_ROUND_UP(h->luks_pbkdf_time_cost_usec, USEC_PER_MSEC), UINT32_MAX) * USEC_PER_MSEC;
+}
+
+uint64_t user_record_luks_pbkdf_memory_cost(UserRecord *h) {
+ assert(h);
+
+ /* Returns a value with kb granularity, since that's what libcryptsetup expects */
+
+ if (h->luks_pbkdf_memory_cost == UINT64_MAX)
+ return 64*1024*1024; /* We default to 64M, since this should work on smaller systems too */
+
+ return MIN(DIV_ROUND_UP(h->luks_pbkdf_memory_cost, 1024), UINT32_MAX) * 1024;
+}
+
+uint64_t user_record_luks_pbkdf_parallel_threads(UserRecord *h) {
+ assert(h);
+
+ if (h->luks_pbkdf_memory_cost == UINT64_MAX)
+ return 1; /* We default to 1, since this should work on smaller systems too */
+
+ return MIN(h->luks_pbkdf_parallel_threads, UINT32_MAX);
+}
+
+const char *user_record_luks_pbkdf_hash_algorithm(UserRecord *h) {
+ assert(h);
+
+ return h->luks_pbkdf_hash_algorithm ?: "sha512";
+}
+
+gid_t user_record_gid(UserRecord *h) {
+ assert(h);
+
+ if (gid_is_valid(h->gid))
+ return h->gid;
+
+ return (gid_t) h->uid;
+}
+
+UserDisposition user_record_disposition(UserRecord *h) {
+ assert(h);
+
+ if (h->disposition >= 0)
+ return h->disposition;
+
+ /* If not declared, derive from UID */
+
+ if (!uid_is_valid(h->uid))
+ return _USER_DISPOSITION_INVALID;
+
+ if (h->uid == 0 || h->uid == UID_NOBODY)
+ return USER_INTRINSIC;
+
+ if (uid_is_system(h->uid))
+ return USER_SYSTEM;
+
+ if (uid_is_dynamic(h->uid))
+ return USER_DYNAMIC;
+
+ if (uid_is_container(h->uid))
+ return USER_CONTAINER;
+
+ if (h->uid > INT32_MAX)
+ return USER_RESERVED;
+
+ return USER_REGULAR;
+}
+
+int user_record_removable(UserRecord *h) {
+ UserStorage storage;
+ assert(h);
+
+ if (h->removable >= 0)
+ return h->removable;
+
+ /* Refuse to decide for classic records */
+ storage = user_record_storage(h);
+ if (h->storage < 0 || h->storage == USER_CLASSIC)
+ return -1;
+
+ /* For now consider only LUKS home directories with a reference by path as removable */
+ return storage == USER_LUKS && path_startswith(user_record_image_path(h), "/dev/");
+}
+
+uint64_t user_record_ratelimit_interval_usec(UserRecord *h) {
+ assert(h);
+
+ if (h->ratelimit_interval_usec == UINT64_MAX)
+ return DEFAULT_RATELIMIT_INTERVAL_USEC;
+
+ return h->ratelimit_interval_usec;
+}
+
+uint64_t user_record_ratelimit_burst(UserRecord *h) {
+ assert(h);
+
+ if (h->ratelimit_burst == UINT64_MAX)
+ return DEFAULT_RATELIMIT_BURST;
+
+ return h->ratelimit_burst;
+}
+
+bool user_record_can_authenticate(UserRecord *h) {
+ assert(h);
+
+ /* Returns true if there's some form of property configured that the user can authenticate against */
+
+ if (h->n_pkcs11_encrypted_key > 0)
+ return true;
+
+ if (h->n_fido2_hmac_salt > 0)
+ return true;
+
+ return !strv_isempty(h->hashed_password);
+}
+
+uint64_t user_record_ratelimit_next_try(UserRecord *h) {
+ assert(h);
+
+ /* Calculates when the it's possible to login next. Returns:
+ *
+ * UINT64_MAX → Nothing known
+ * 0 → Right away
+ * Any other → Next time in CLOCK_REALTIME in usec (which could be in the past)
+ */
+
+ if (h->ratelimit_begin_usec == UINT64_MAX ||
+ h->ratelimit_count == UINT64_MAX)
+ return UINT64_MAX;
+
+ if (h->ratelimit_begin_usec > now(CLOCK_REALTIME)) /* If the ratelimit time is in the future, then
+ * the local clock is probably incorrect. Let's
+ * not refuse login then. */
+ return UINT64_MAX;
+
+ if (h->ratelimit_count < user_record_ratelimit_burst(h))
+ return 0;
+
+ return usec_add(h->ratelimit_begin_usec, user_record_ratelimit_interval_usec(h));
+}
+
+bool user_record_equal(UserRecord *a, UserRecord *b) {
+ assert(a);
+ assert(b);
+
+ /* We assume that when a record is modified its JSON data is updated at the same time, hence it's
+ * sufficient to compare the JSON data. */
+
+ return json_variant_equal(a->json, b->json);
+}
+
+bool user_record_compatible(UserRecord *a, UserRecord *b) {
+ assert(a);
+ assert(b);
+
+ /* If either lacks the regular section, we can't really decide, let's hence say they are
+ * incompatible. */
+ if (!(a->mask & b->mask & USER_RECORD_REGULAR))
+ return false;
+
+ return streq_ptr(a->user_name, b->user_name) &&
+ streq_ptr(a->realm, b->realm);
+}
+
+int user_record_compare_last_change(UserRecord *a, UserRecord *b) {
+ assert(a);
+ assert(b);
+
+ if (a->last_change_usec == b->last_change_usec)
+ return 0;
+
+ /* Always consider a record with a timestamp newer than one without */
+ if (a->last_change_usec == UINT64_MAX)
+ return -1;
+ if (b->last_change_usec == UINT64_MAX)
+ return 1;
+
+ return CMP(a->last_change_usec, b->last_change_usec);
+}
+
+int user_record_clone(UserRecord *h, UserRecordLoadFlags flags, UserRecord **ret) {
+ _cleanup_(user_record_unrefp) UserRecord *c = NULL;
+ int r;
+
+ assert(h);
+ assert(ret);
+
+ c = user_record_new();
+ if (!c)
+ return -ENOMEM;
+
+ r = user_record_load(c, h->json, flags);
+ if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(c);
+ return 0;
+}
+
+int user_record_masked_equal(UserRecord *a, UserRecord *b, UserRecordMask mask) {
+ _cleanup_(user_record_unrefp) UserRecord *x = NULL, *y = NULL;
+ int r;
+
+ assert(a);
+ assert(b);
+
+ /* Compares the two records, but ignores anything not listed in the specified mask */
+
+ if ((a->mask & ~mask) != 0) {
+ r = user_record_clone(a, USER_RECORD_ALLOW(mask) | USER_RECORD_STRIP(~mask & _USER_RECORD_MASK_MAX), &x);
+ if (r < 0)
+ return r;
+
+ a = x;
+ }
+
+ if ((b->mask & ~mask) != 0) {
+ r = user_record_clone(b, USER_RECORD_ALLOW(mask) | USER_RECORD_STRIP(~mask & _USER_RECORD_MASK_MAX), &y);
+ if (r < 0)
+ return r;
+
+ b = y;
+ }
+
+ return user_record_equal(a, b);
+}
+
+int user_record_test_blocked(UserRecord *h) {
+ usec_t n;
+
+ /* Checks whether access to the specified user shall be allowed at the moment. Returns:
+ *
+ * -ESTALE: Record is from the future
+ * -ENOLCK: Record is blocked
+ * -EL2HLT: Record is not valid yet
+ * -EL3HLT: Record is not valid anymore
+ *
+ */
+
+ assert(h);
+
+ if (h->locked > 0)
+ return -ENOLCK;
+
+ n = now(CLOCK_REALTIME);
+
+ if (h->not_before_usec != UINT64_MAX && n < h->not_before_usec)
+ return -EL2HLT;
+ if (h->not_after_usec != UINT64_MAX && n > h->not_after_usec)
+ return -EL3HLT;
+
+ if (h->last_change_usec != UINT64_MAX &&
+ h->last_change_usec > n) /* Complain during log-ins when the record is from the future */
+ return -ESTALE;
+
+ return 0;
+}
+
+int user_record_test_password_change_required(UserRecord *h) {
+ bool change_permitted;
+ usec_t n;
+
+ assert(h);
+
+ /* Checks whether the user must change the password when logging in
+
+ -EKEYREVOKED: Change password now because admin said so
+ -EOWNERDEAD: Change password now because it expired
+ -EKEYREJECTED: Password is expired, no changing is allowed
+ -EKEYEXPIRED: Password is about to expire, warn user
+ -ENETDOWN: Record has expiration info but no password change timestamp
+ -EROFS: No password change required nor permitted
+ -ESTALE: RTC likely incorrect, last password change is in the future
+ 0: No password change required, but permitted
+ */
+
+ /* If a password change request has been set explicitly, it overrides everything */
+ if (h->password_change_now > 0)
+ return -EKEYREVOKED;
+
+ n = now(CLOCK_REALTIME);
+
+ /* Password change in the future? Then our RTC is likely incorrect */
+ if (h->last_password_change_usec != UINT64_MAX &&
+ h->last_password_change_usec > n &&
+ (h->password_change_min_usec != UINT64_MAX ||
+ h->password_change_max_usec != UINT64_MAX ||
+ h->password_change_inactive_usec != UINT64_MAX))
+ return -ESTALE;
+
+ /* Then, let's check if password changing is currently allowed at all */
+ if (h->password_change_min_usec != UINT64_MAX) {
+
+ /* Expiry configured but no password change timestamp known? */
+ if (h->last_password_change_usec == UINT64_MAX)
+ return -ENETDOWN;
+
+ if (h->password_change_min_usec >= UINT64_MAX - h->last_password_change_usec)
+ change_permitted = false;
+ else
+ change_permitted = n >= h->last_password_change_usec + h->password_change_min_usec;
+
+ } else
+ change_permitted = true;
+
+ /* Let's check whether the password has expired. */
+ if (!(h->password_change_max_usec == UINT64_MAX ||
+ h->password_change_max_usec >= UINT64_MAX - h->last_password_change_usec)) {
+
+ uint64_t change_before;
+
+ /* Expiry configured but no password change timestamp known? */
+ if (h->last_password_change_usec == UINT64_MAX)
+ return -ENETDOWN;
+
+ /* Password is in inactive phase? */
+ if (h->password_change_inactive_usec != UINT64_MAX &&
+ h->password_change_inactive_usec < UINT64_MAX - h->password_change_max_usec) {
+ usec_t added;
+
+ added = h->password_change_inactive_usec + h->password_change_max_usec;
+ if (added < UINT64_MAX - h->last_password_change_usec &&
+ n >= h->last_password_change_usec + added)
+ return -EKEYREJECTED;
+ }
+
+ /* Password needs to be changed now? */
+ change_before = h->last_password_change_usec + h->password_change_max_usec;
+ if (n >= change_before)
+ return change_permitted ? -EOWNERDEAD : -EKEYREJECTED;
+
+ /* Warn user? */
+ if (h->password_change_warn_usec != UINT64_MAX &&
+ (change_before < h->password_change_warn_usec ||
+ n >= change_before - h->password_change_warn_usec))
+ return change_permitted ? -EKEYEXPIRED : -EROFS;
+ }
+
+ /* No password changing necessary */
+ return change_permitted ? 0 : -EROFS;
+}
+
+static const char* const user_storage_table[_USER_STORAGE_MAX] = {
+ [USER_CLASSIC] = "classic",
+ [USER_LUKS] = "luks",
+ [USER_DIRECTORY] = "directory",
+ [USER_SUBVOLUME] = "subvolume",
+ [USER_FSCRYPT] = "fscrypt",
+ [USER_CIFS] = "cifs",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(user_storage, UserStorage);
+
+static const char* const user_disposition_table[_USER_DISPOSITION_MAX] = {
+ [USER_INTRINSIC] = "intrinsic",
+ [USER_SYSTEM] = "system",
+ [USER_DYNAMIC] = "dynamic",
+ [USER_REGULAR] = "regular",
+ [USER_CONTAINER] = "container",
+ [USER_RESERVED] = "reserved",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(user_disposition, UserDisposition);
diff --git a/src/shared/user-record.h b/src/shared/user-record.h
new file mode 100644
index 0000000..542a0dc
--- /dev/null
+++ b/src/shared/user-record.h
@@ -0,0 +1,444 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include "sd-id128.h"
+
+#include "json.h"
+#include "missing_resource.h"
+#include "time-util.h"
+
+/* But some limits on disk sizes: not less than 5M, not more than 5T */
+#define USER_DISK_SIZE_MIN (UINT64_C(5)*1024*1024)
+#define USER_DISK_SIZE_MAX (UINT64_C(5)*1024*1024*1024*1024)
+
+/* The default disk size to use when nothing else is specified, relative to free disk space */
+#define USER_DISK_SIZE_DEFAULT_PERCENT 85
+
+bool uid_is_system(uid_t uid);
+bool gid_is_system(gid_t gid);
+
+static inline bool uid_is_dynamic(uid_t uid) {
+ return DYNAMIC_UID_MIN <= uid && uid <= DYNAMIC_UID_MAX;
+}
+
+static inline bool gid_is_dynamic(gid_t gid) {
+ return uid_is_dynamic((uid_t) gid);
+}
+
+static inline bool uid_is_container(uid_t uid) {
+ return CONTAINER_UID_BASE_MIN <= uid && uid <= CONTAINER_UID_BASE_MAX;
+}
+
+static inline bool gid_is_container(gid_t gid) {
+ return uid_is_container((uid_t) gid);
+}
+
+typedef struct UGIDAllocationRange {
+ uid_t system_alloc_uid_min;
+ uid_t system_uid_max;
+ gid_t system_alloc_gid_min;
+ gid_t system_gid_max;
+} UGIDAllocationRange;
+
+int read_login_defs(UGIDAllocationRange *ret_defs, const char *path, const char *root);
+const UGIDAllocationRange *acquire_ugid_allocation_range(void);
+
+typedef enum UserDisposition {
+ USER_INTRINSIC, /* root and nobody */
+ USER_SYSTEM, /* statically allocated users for system services */
+ USER_DYNAMIC, /* dynamically allocated users for system services */
+ USER_REGULAR, /* regular (typically human users) */
+ USER_CONTAINER, /* UID ranges allocated for container uses */
+ USER_RESERVED, /* Range above 2^31 */
+ _USER_DISPOSITION_MAX,
+ _USER_DISPOSITION_INVALID = -1,
+} UserDisposition;
+
+typedef enum UserHomeStorage {
+ USER_CLASSIC,
+ USER_LUKS,
+ USER_DIRECTORY, /* A directory, and a .identity file in it, which USER_CLASSIC lacks */
+ USER_SUBVOLUME,
+ USER_FSCRYPT,
+ USER_CIFS,
+ _USER_STORAGE_MAX,
+ _USER_STORAGE_INVALID = -1
+} UserStorage;
+
+typedef enum UserRecordMask {
+ /* The various sections an identity record may have, as bit mask */
+ USER_RECORD_REGULAR = 1U << 0,
+ USER_RECORD_SECRET = 1U << 1,
+ USER_RECORD_PRIVILEGED = 1U << 2,
+ USER_RECORD_PER_MACHINE = 1U << 3,
+ USER_RECORD_BINDING = 1U << 4,
+ USER_RECORD_STATUS = 1U << 5,
+ USER_RECORD_SIGNATURE = 1U << 6,
+ _USER_RECORD_MASK_MAX = (1U << 7)-1
+} UserRecordMask;
+
+typedef enum UserRecordLoadFlags {
+ /* A set of flags used while loading a user record from JSON data. We leave the lower 6 bits free,
+ * just as a safety precaution so that we can detect borked conversions between UserRecordMask and
+ * UserRecordLoadFlags. */
+
+ /* What to require */
+ USER_RECORD_REQUIRE_REGULAR = USER_RECORD_REGULAR << 7,
+ USER_RECORD_REQUIRE_SECRET = USER_RECORD_SECRET << 7,
+ USER_RECORD_REQUIRE_PRIVILEGED = USER_RECORD_PRIVILEGED << 7,
+ USER_RECORD_REQUIRE_PER_MACHINE = USER_RECORD_PER_MACHINE << 7,
+ USER_RECORD_REQUIRE_BINDING = USER_RECORD_BINDING << 7,
+ USER_RECORD_REQUIRE_STATUS = USER_RECORD_STATUS << 7,
+ USER_RECORD_REQUIRE_SIGNATURE = USER_RECORD_SIGNATURE << 7,
+
+ /* What to allow */
+ USER_RECORD_ALLOW_REGULAR = USER_RECORD_REGULAR << 14,
+ USER_RECORD_ALLOW_SECRET = USER_RECORD_SECRET << 14,
+ USER_RECORD_ALLOW_PRIVILEGED = USER_RECORD_PRIVILEGED << 14,
+ USER_RECORD_ALLOW_PER_MACHINE = USER_RECORD_PER_MACHINE << 14,
+ USER_RECORD_ALLOW_BINDING = USER_RECORD_BINDING << 14,
+ USER_RECORD_ALLOW_STATUS = USER_RECORD_STATUS << 14,
+ USER_RECORD_ALLOW_SIGNATURE = USER_RECORD_SIGNATURE << 14,
+
+ /* What to strip */
+ USER_RECORD_STRIP_REGULAR = USER_RECORD_REGULAR << 21,
+ USER_RECORD_STRIP_SECRET = USER_RECORD_SECRET << 21,
+ USER_RECORD_STRIP_PRIVILEGED = USER_RECORD_PRIVILEGED << 21,
+ USER_RECORD_STRIP_PER_MACHINE = USER_RECORD_PER_MACHINE << 21,
+ USER_RECORD_STRIP_BINDING = USER_RECORD_BINDING << 21,
+ USER_RECORD_STRIP_STATUS = USER_RECORD_STATUS << 21,
+ USER_RECORD_STRIP_SIGNATURE = USER_RECORD_SIGNATURE << 21,
+
+ /* Some special combinations that deserve explicit names */
+ USER_RECORD_LOAD_FULL = USER_RECORD_REQUIRE_REGULAR |
+ USER_RECORD_ALLOW_SECRET |
+ USER_RECORD_ALLOW_PRIVILEGED |
+ USER_RECORD_ALLOW_PER_MACHINE |
+ USER_RECORD_ALLOW_BINDING |
+ USER_RECORD_ALLOW_STATUS |
+ USER_RECORD_ALLOW_SIGNATURE,
+
+ USER_RECORD_LOAD_REFUSE_SECRET = USER_RECORD_REQUIRE_REGULAR |
+ USER_RECORD_ALLOW_PRIVILEGED |
+ USER_RECORD_ALLOW_PER_MACHINE |
+ USER_RECORD_ALLOW_BINDING |
+ USER_RECORD_ALLOW_STATUS |
+ USER_RECORD_ALLOW_SIGNATURE,
+
+ USER_RECORD_LOAD_MASK_SECRET = USER_RECORD_REQUIRE_REGULAR |
+ USER_RECORD_ALLOW_PRIVILEGED |
+ USER_RECORD_ALLOW_PER_MACHINE |
+ USER_RECORD_ALLOW_BINDING |
+ USER_RECORD_ALLOW_STATUS |
+ USER_RECORD_ALLOW_SIGNATURE |
+ USER_RECORD_STRIP_SECRET,
+
+ USER_RECORD_EXTRACT_SECRET = USER_RECORD_REQUIRE_SECRET |
+ USER_RECORD_STRIP_REGULAR |
+ USER_RECORD_STRIP_PRIVILEGED |
+ USER_RECORD_STRIP_PER_MACHINE |
+ USER_RECORD_STRIP_BINDING |
+ USER_RECORD_STRIP_STATUS |
+ USER_RECORD_STRIP_SIGNATURE,
+
+ USER_RECORD_LOAD_SIGNABLE = USER_RECORD_REQUIRE_REGULAR |
+ USER_RECORD_ALLOW_PRIVILEGED |
+ USER_RECORD_ALLOW_PER_MACHINE,
+
+ USER_RECORD_EXTRACT_SIGNABLE = USER_RECORD_LOAD_SIGNABLE |
+ USER_RECORD_STRIP_SECRET |
+ USER_RECORD_STRIP_BINDING |
+ USER_RECORD_STRIP_STATUS |
+ USER_RECORD_STRIP_SIGNATURE,
+
+ USER_RECORD_LOAD_EMBEDDED = USER_RECORD_REQUIRE_REGULAR |
+ USER_RECORD_ALLOW_PRIVILEGED |
+ USER_RECORD_ALLOW_PER_MACHINE |
+ USER_RECORD_ALLOW_SIGNATURE,
+
+ USER_RECORD_EXTRACT_EMBEDDED = USER_RECORD_LOAD_EMBEDDED |
+ USER_RECORD_STRIP_SECRET |
+ USER_RECORD_STRIP_BINDING |
+ USER_RECORD_STRIP_STATUS,
+
+ /* Whether to log about loader errors beyond LOG_DEBUG */
+ USER_RECORD_LOG = 1U << 28,
+
+ /* Whether to ignore errors and load what we can */
+ USER_RECORD_PERMISSIVE = 1U << 29,
+} UserRecordLoadFlags;
+
+static inline UserRecordLoadFlags USER_RECORD_REQUIRE(UserRecordMask m) {
+ assert((m & ~_USER_RECORD_MASK_MAX) == 0);
+ return m << 7;
+}
+
+static inline UserRecordLoadFlags USER_RECORD_ALLOW(UserRecordMask m) {
+ assert((m & ~_USER_RECORD_MASK_MAX) == 0);
+ return m << 14;
+}
+
+static inline UserRecordLoadFlags USER_RECORD_STRIP(UserRecordMask m) {
+ assert((m & ~_USER_RECORD_MASK_MAX) == 0);
+ return m << 21;
+}
+
+static inline UserRecordMask USER_RECORD_REQUIRE_MASK(UserRecordLoadFlags f) {
+ return (f >> 7) & _USER_RECORD_MASK_MAX;
+}
+
+static inline UserRecordMask USER_RECORD_ALLOW_MASK(UserRecordLoadFlags f) {
+ return ((f >> 14) & _USER_RECORD_MASK_MAX) | USER_RECORD_REQUIRE_MASK(f);
+}
+
+static inline UserRecordMask USER_RECORD_STRIP_MASK(UserRecordLoadFlags f) {
+ return (f >> 21) & _USER_RECORD_MASK_MAX;
+}
+
+static inline JsonDispatchFlags USER_RECORD_LOAD_FLAGS_TO_JSON_DISPATCH_FLAGS(UserRecordLoadFlags flags) {
+ return (FLAGS_SET(flags, USER_RECORD_LOG) ? JSON_LOG : 0) |
+ (FLAGS_SET(flags, USER_RECORD_PERMISSIVE) ? JSON_PERMISSIVE : 0);
+}
+
+typedef struct Pkcs11EncryptedKey {
+ /* The encrypted passphrase, which can be decrypted with the private key indicated below */
+ void *data;
+ size_t size;
+
+ /* Where to find the private key to decrypt the encrypted passphrase above */
+ char *uri;
+
+ /* What to test the decrypted passphrase against to allow access (classic UNIX password hash). Note
+ * that the decrypted passphrase is also used for unlocking LUKS and fscrypt, and if the account is
+ * backed by LUKS or fscrypt the hashed password is only an additional layer of authentication, not
+ * the only. */
+ char *hashed_password;
+} Pkcs11EncryptedKey;
+
+typedef struct Fido2HmacCredential {
+ void *id;
+ size_t size;
+} Fido2HmacCredential;
+
+typedef struct Fido2HmacSalt {
+ /* The FIDO2 Cridential ID to use */
+ Fido2HmacCredential credential;
+
+ /* The FIDO2 salt value */
+ void *salt;
+ size_t salt_size;
+
+ /* What to test the hashed salt value against, usually UNIX password hash here. */
+ char *hashed_password;
+} Fido2HmacSalt;
+
+typedef struct RecoveryKey {
+ /* The type of recovery key, must be "modhex64" right now */
+ char *type;
+
+ /* A UNIX password hash of the normalized form of modhex64 */
+ char *hashed_password;
+} RecoveryKey;
+
+typedef struct UserRecord {
+ /* The following three fields are not part of the JSON record */
+ unsigned n_ref;
+ UserRecordMask mask;
+ bool incomplete; /* incomplete due to security restrictions. */
+
+ char *user_name;
+ char *realm;
+ char *user_name_and_realm_auto; /* the user_name field concatenated with '@' and the realm, if the latter is defined */
+ char *real_name;
+ char *email_address;
+ char *password_hint;
+ char *icon_name;
+ char *location;
+
+ UserDisposition disposition;
+ uint64_t last_change_usec;
+ uint64_t last_password_change_usec;
+
+ char *shell;
+ mode_t umask;
+ char **environment;
+ char *time_zone;
+ char *preferred_language;
+ int nice_level;
+ struct rlimit *rlimits[_RLIMIT_MAX];
+
+ int locked; /* prohibit activation in general */
+ uint64_t not_before_usec; /* prohibit activation before this unix time */
+ uint64_t not_after_usec; /* prohibit activation after this unix time */
+
+ UserStorage storage;
+ uint64_t disk_size;
+ uint64_t disk_size_relative; /* Disk size, relative to the free bytes of the medium, normalized to UINT32_MAX = 100% */
+ char *skeleton_directory;
+ mode_t access_mode;
+
+ uint64_t tasks_max;
+ uint64_t memory_high;
+ uint64_t memory_max;
+ uint64_t cpu_weight;
+ uint64_t io_weight;
+
+ bool nosuid;
+ bool nodev;
+ bool noexec;
+
+ char **hashed_password;
+ char **ssh_authorized_keys;
+ char **password;
+ char **token_pin;
+
+ char *cifs_domain;
+ char *cifs_user_name;
+ char *cifs_service;
+
+ char *image_path;
+ char *image_path_auto; /* when none is configured explicitly, this is where we place the implicit image */
+ char *home_directory;
+ char *home_directory_auto; /* when none is set explicitly, this is where we place the implicit home directory */
+
+ uid_t uid;
+ gid_t gid;
+
+ char **member_of;
+
+ char *file_system_type;
+ sd_id128_t partition_uuid;
+ sd_id128_t luks_uuid;
+ sd_id128_t file_system_uuid;
+
+ int luks_discard;
+ int luks_offline_discard;
+ char *luks_cipher;
+ char *luks_cipher_mode;
+ uint64_t luks_volume_key_size;
+ char *luks_pbkdf_hash_algorithm;
+ char *luks_pbkdf_type;
+ uint64_t luks_pbkdf_time_cost_usec;
+ uint64_t luks_pbkdf_memory_cost;
+ uint64_t luks_pbkdf_parallel_threads;
+
+ uint64_t disk_usage;
+ uint64_t disk_free;
+ uint64_t disk_ceiling;
+ uint64_t disk_floor;
+
+ char *state;
+ char *service;
+ int signed_locally;
+
+ uint64_t good_authentication_counter;
+ uint64_t bad_authentication_counter;
+ uint64_t last_good_authentication_usec;
+ uint64_t last_bad_authentication_usec;
+
+ uint64_t ratelimit_begin_usec;
+ uint64_t ratelimit_count;
+ uint64_t ratelimit_interval_usec;
+ uint64_t ratelimit_burst;
+
+ int removable;
+ int enforce_password_policy;
+ int auto_login;
+
+ uint64_t stop_delay_usec; /* How long to leave systemd --user around on log-out */
+ int kill_processes; /* Whether to kill user processes forcibly on log-out */
+
+ /* The following exist mostly so that we can cover the full /etc/shadow set of fields */
+ uint64_t password_change_min_usec; /* maps to .sp_min */
+ uint64_t password_change_max_usec; /* maps to .sp_max */
+ uint64_t password_change_warn_usec; /* maps to .sp_warn */
+ uint64_t password_change_inactive_usec; /* maps to .sp_inact */
+ int password_change_now; /* Require a password change immediately on next login (.sp_lstchg = 0) */
+
+ char **pkcs11_token_uri;
+ Pkcs11EncryptedKey *pkcs11_encrypted_key;
+ size_t n_pkcs11_encrypted_key;
+ int pkcs11_protected_authentication_path_permitted;
+
+ Fido2HmacCredential *fido2_hmac_credential;
+ size_t n_fido2_hmac_credential;
+ Fido2HmacSalt *fido2_hmac_salt;
+ size_t n_fido2_hmac_salt;
+ int fido2_user_presence_permitted;
+
+ char **recovery_key_type;
+ RecoveryKey *recovery_key;
+ size_t n_recovery_key;
+
+ JsonVariant *json;
+} UserRecord;
+
+UserRecord* user_record_new(void);
+UserRecord* user_record_ref(UserRecord *h);
+UserRecord* user_record_unref(UserRecord *h);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(UserRecord*, user_record_unref);
+
+int user_record_load(UserRecord *h, JsonVariant *v, UserRecordLoadFlags flags);
+int user_record_build(UserRecord **ret, ...);
+
+const char *user_record_user_name_and_realm(UserRecord *h);
+UserStorage user_record_storage(UserRecord *h);
+const char *user_record_file_system_type(UserRecord *h);
+const char *user_record_skeleton_directory(UserRecord *h);
+mode_t user_record_access_mode(UserRecord *h);
+const char *user_record_home_directory(UserRecord *h);
+const char *user_record_image_path(UserRecord *h);
+unsigned long user_record_mount_flags(UserRecord *h);
+const char *user_record_cifs_user_name(UserRecord *h);
+const char *user_record_shell(UserRecord *h);
+const char *user_record_real_name(UserRecord *h);
+bool user_record_luks_discard(UserRecord *h);
+bool user_record_luks_offline_discard(UserRecord *h);
+const char *user_record_luks_cipher(UserRecord *h);
+const char *user_record_luks_cipher_mode(UserRecord *h);
+uint64_t user_record_luks_volume_key_size(UserRecord *h);
+const char* user_record_luks_pbkdf_type(UserRecord *h);
+usec_t user_record_luks_pbkdf_time_cost_usec(UserRecord *h);
+uint64_t user_record_luks_pbkdf_memory_cost(UserRecord *h);
+uint64_t user_record_luks_pbkdf_parallel_threads(UserRecord *h);
+const char *user_record_luks_pbkdf_hash_algorithm(UserRecord *h);
+gid_t user_record_gid(UserRecord *h);
+UserDisposition user_record_disposition(UserRecord *h);
+int user_record_removable(UserRecord *h);
+usec_t user_record_ratelimit_interval_usec(UserRecord *h);
+uint64_t user_record_ratelimit_burst(UserRecord *h);
+bool user_record_can_authenticate(UserRecord *h);
+
+int user_record_build_image_path(UserStorage storage, const char *user_name_and_realm, char **ret);
+
+bool user_record_equal(UserRecord *a, UserRecord *b);
+bool user_record_compatible(UserRecord *a, UserRecord *b);
+int user_record_compare_last_change(UserRecord *a, UserRecord *b);
+
+usec_t user_record_ratelimit_next_try(UserRecord *h);
+
+int user_record_clone(UserRecord *h, UserRecordLoadFlags flags, UserRecord **ret);
+int user_record_masked_equal(UserRecord *a, UserRecord *b, UserRecordMask mask);
+
+int user_record_test_blocked(UserRecord *h);
+int user_record_test_password_change_required(UserRecord *h);
+
+/* The following six are user by group-record.c, that's why we export them here */
+int json_dispatch_realm(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_gecos(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_user_group_list(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+int json_dispatch_user_disposition(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata);
+
+int per_machine_id_match(JsonVariant *ids, JsonDispatchFlags flags);
+int per_machine_hostname_match(JsonVariant *hns, JsonDispatchFlags flags);
+int user_group_record_mangle(JsonVariant *v, UserRecordLoadFlags load_flags, JsonVariant **ret_variant, UserRecordMask *ret_mask);
+
+const char* user_storage_to_string(UserStorage t) _const_;
+UserStorage user_storage_from_string(const char *s) _pure_;
+
+const char* user_disposition_to_string(UserDisposition t) _const_;
+UserDisposition user_disposition_from_string(const char *s) _pure_;
diff --git a/src/shared/userdb.c b/src/shared/userdb.c
new file mode 100644
index 0000000..2d48028
--- /dev/null
+++ b/src/shared/userdb.c
@@ -0,0 +1,1249 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/auxv.h>
+
+#include "dirent-util.h"
+#include "dlfcn-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "missing_syscall.h"
+#include "parse-util.h"
+#include "set.h"
+#include "socket-util.h"
+#include "strv.h"
+#include "user-record-nss.h"
+#include "user-util.h"
+#include "userdb.h"
+#include "varlink.h"
+
+DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(link_hash_ops, void, trivial_hash_func, trivial_compare_func, Varlink, varlink_unref);
+
+typedef enum LookupWhat {
+ LOOKUP_USER,
+ LOOKUP_GROUP,
+ LOOKUP_MEMBERSHIP,
+ _LOOKUP_WHAT_MAX,
+} LookupWhat;
+
+struct UserDBIterator {
+ LookupWhat what;
+ Set *links;
+ bool nss_covered:1;
+ bool nss_iterating:1;
+ bool synthesize_root:1;
+ bool synthesize_nobody:1;
+ bool nss_systemd_blocked:1;
+ int error;
+ unsigned n_found;
+ sd_event *event;
+ UserRecord *found_user; /* when .what == LOOKUP_USER */
+ GroupRecord *found_group; /* when .what == LOOKUP_GROUP */
+
+ char *found_user_name, *found_group_name; /* when .what == LOOKUP_MEMBERSHIP */
+ char **members_of_group;
+ size_t index_members_of_group;
+ char *filter_user_name;
+};
+
+UserDBIterator* userdb_iterator_free(UserDBIterator *iterator) {
+ if (!iterator)
+ return NULL;
+
+ set_free(iterator->links);
+
+ switch (iterator->what) {
+
+ case LOOKUP_USER:
+ user_record_unref(iterator->found_user);
+
+ if (iterator->nss_iterating)
+ endpwent();
+
+ break;
+
+ case LOOKUP_GROUP:
+ group_record_unref(iterator->found_group);
+
+ if (iterator->nss_iterating)
+ endgrent();
+
+ break;
+
+ case LOOKUP_MEMBERSHIP:
+ free(iterator->found_user_name);
+ free(iterator->found_group_name);
+ strv_free(iterator->members_of_group);
+ free(iterator->filter_user_name);
+
+ if (iterator->nss_iterating)
+ endgrent();
+
+ break;
+
+ default:
+ assert_not_reached("Unexpected state?");
+ }
+
+ sd_event_unref(iterator->event);
+
+ if (iterator->nss_systemd_blocked)
+ assert_se(userdb_block_nss_systemd(false) >= 0);
+
+ return mfree(iterator);
+}
+
+static UserDBIterator* userdb_iterator_new(LookupWhat what) {
+ UserDBIterator *i;
+
+ assert(what >= 0);
+ assert(what < _LOOKUP_WHAT_MAX);
+
+ i = new(UserDBIterator, 1);
+ if (!i)
+ return NULL;
+
+ *i = (UserDBIterator) {
+ .what = what,
+ };
+
+ return i;
+}
+
+static int userdb_iterator_block_nss_systemd(UserDBIterator *iterator) {
+ int r;
+
+ assert(iterator);
+
+ if (iterator->nss_systemd_blocked)
+ return 0;
+
+ r = userdb_block_nss_systemd(true);
+ if (r < 0)
+ return r;
+
+ iterator->nss_systemd_blocked = true;
+ return 1;
+}
+
+struct user_group_data {
+ JsonVariant *record;
+ bool incomplete;
+};
+
+static void user_group_data_release(struct user_group_data *d) {
+ json_variant_unref(d->record);
+}
+
+static int userdb_on_query_reply(
+ Varlink *link,
+ JsonVariant *parameters,
+ const char *error_id,
+ VarlinkReplyFlags flags,
+ void *userdata) {
+
+ UserDBIterator *iterator = userdata;
+ int r;
+
+ assert(iterator);
+
+ if (error_id) {
+ log_debug("Got lookup error: %s", error_id);
+
+ if (STR_IN_SET(error_id,
+ "io.systemd.UserDatabase.NoRecordFound",
+ "io.systemd.UserDatabase.ConflictingRecordFound"))
+ r = -ESRCH;
+ else if (streq(error_id, "io.systemd.UserDatabase.ServiceNotAvailable"))
+ r = -EHOSTDOWN;
+ else if (streq(error_id, "io.systemd.UserDatabase.EnumerationNotSupported"))
+ r = -EOPNOTSUPP;
+ else if (streq(error_id, VARLINK_ERROR_TIMEOUT))
+ r = -ETIMEDOUT;
+ else
+ r = -EIO;
+
+ goto finish;
+ }
+
+ switch (iterator->what) {
+
+ case LOOKUP_USER: {
+ _cleanup_(user_group_data_release) struct user_group_data user_data = {};
+
+ static const JsonDispatch dispatch_table[] = {
+ { "record", _JSON_VARIANT_TYPE_INVALID, json_dispatch_variant, offsetof(struct user_group_data, record), 0 },
+ { "incomplete", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(struct user_group_data, incomplete), 0 },
+ {}
+ };
+ _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
+
+ assert_se(!iterator->found_user);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &user_data);
+ if (r < 0)
+ goto finish;
+
+ if (!user_data.record) {
+ r = log_debug_errno(SYNTHETIC_ERRNO(EIO), "Reply is missing record key");
+ goto finish;
+ }
+
+ hr = user_record_new();
+ if (!hr) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = user_record_load(hr, user_data.record, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_PERMISSIVE);
+ if (r < 0)
+ goto finish;
+
+ if (!hr->service) {
+ r = log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "User record does not carry service information, refusing.");
+ goto finish;
+ }
+
+ hr->incomplete = user_data.incomplete;
+
+ /* We match the root user by the name since the name is our primary key. We match the nobody
+ * use by UID though, since the name might differ on OSes */
+ if (streq_ptr(hr->user_name, "root"))
+ iterator->synthesize_root = false;
+ if (hr->uid == UID_NOBODY)
+ iterator->synthesize_nobody = false;
+
+ iterator->found_user = TAKE_PTR(hr);
+ iterator->n_found++;
+
+ /* More stuff coming? then let's just exit cleanly here */
+ if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ return 0;
+
+ /* Otherwise, let's remove this link and exit cleanly then */
+ r = 0;
+ goto finish;
+ }
+
+ case LOOKUP_GROUP: {
+ _cleanup_(user_group_data_release) struct user_group_data group_data = {};
+
+ static const JsonDispatch dispatch_table[] = {
+ { "record", _JSON_VARIANT_TYPE_INVALID, json_dispatch_variant, offsetof(struct user_group_data, record), 0 },
+ { "incomplete", JSON_VARIANT_BOOLEAN, json_dispatch_boolean, offsetof(struct user_group_data, incomplete), 0 },
+ {}
+ };
+ _cleanup_(group_record_unrefp) GroupRecord *g = NULL;
+
+ assert_se(!iterator->found_group);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &group_data);
+ if (r < 0)
+ goto finish;
+
+ if (!group_data.record) {
+ r = log_debug_errno(SYNTHETIC_ERRNO(EIO), "Reply is missing record key");
+ goto finish;
+ }
+
+ g = group_record_new();
+ if (!g) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ r = group_record_load(g, group_data.record, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_PERMISSIVE);
+ if (r < 0)
+ goto finish;
+
+ if (!g->service) {
+ r = log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Group record does not carry service information, refusing.");
+ goto finish;
+ }
+
+ g->incomplete = group_data.incomplete;
+
+ if (streq_ptr(g->group_name, "root"))
+ iterator->synthesize_root = false;
+ if (g->gid == GID_NOBODY)
+ iterator->synthesize_nobody = false;
+
+ iterator->found_group = TAKE_PTR(g);
+ iterator->n_found++;
+
+ if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ return 0;
+
+ r = 0;
+ goto finish;
+ }
+
+ case LOOKUP_MEMBERSHIP: {
+ struct membership_data {
+ const char *user_name;
+ const char *group_name;
+ } membership_data = {};
+
+ static const JsonDispatch dispatch_table[] = {
+ { "userName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(struct membership_data, user_name), JSON_SAFE },
+ { "groupName", JSON_VARIANT_STRING, json_dispatch_const_string, offsetof(struct membership_data, group_name), JSON_SAFE },
+ {}
+ };
+
+ assert(!iterator->found_user_name);
+ assert(!iterator->found_group_name);
+
+ r = json_dispatch(parameters, dispatch_table, NULL, 0, &membership_data);
+ if (r < 0)
+ goto finish;
+
+ iterator->found_user_name = mfree(iterator->found_user_name);
+ iterator->found_group_name = mfree(iterator->found_group_name);
+
+ iterator->found_user_name = strdup(membership_data.user_name);
+ if (!iterator->found_user_name) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ iterator->found_group_name = strdup(membership_data.group_name);
+ if (!iterator->found_group_name) {
+ r = -ENOMEM;
+ goto finish;
+ }
+
+ iterator->n_found++;
+
+ if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ return 0;
+
+ r = 0;
+ goto finish;
+ }
+
+ default:
+ assert_not_reached("unexpected lookup");
+ }
+
+finish:
+ /* If we got one ESRCH, let that win. This way when we do a wild dump we won't be tripped up by bad
+ * errors if at least one connection ended cleanly */
+ if (r == -ESRCH || iterator->error == 0)
+ iterator->error = -r;
+
+ assert_se(set_remove(iterator->links, link) == link);
+ link = varlink_unref(link);
+ return 0;
+}
+
+static int userdb_connect(
+ UserDBIterator *iterator,
+ const char *path,
+ const char *method,
+ bool more,
+ JsonVariant *query) {
+
+ _cleanup_(varlink_unrefp) Varlink *vl = NULL;
+ int r;
+
+ assert(iterator);
+ assert(path);
+ assert(method);
+
+ r = varlink_connect_address(&vl, path);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to connect to %s: %m", path);
+
+ varlink_set_userdata(vl, iterator);
+
+ if (!iterator->event) {
+ r = sd_event_new(&iterator->event);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to allocate event loop: %m");
+ }
+
+ r = varlink_attach_event(vl, iterator->event, SD_EVENT_PRIORITY_NORMAL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to attach varlink connection to event loop: %m");
+
+ (void) varlink_set_description(vl, path);
+
+ r = varlink_bind_reply(vl, userdb_on_query_reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to bind reply callback: %m");
+
+ if (more)
+ r = varlink_observe(vl, method, query);
+ else
+ r = varlink_invoke(vl, method, query);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to invoke varlink method: %m");
+
+ r = set_ensure_consume(&iterator->links, &link_hash_ops, TAKE_PTR(vl));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to add varlink connection to set: %m");
+ return r;
+}
+
+static int userdb_start_query(
+ UserDBIterator *iterator,
+ const char *method,
+ bool more,
+ JsonVariant *query,
+ UserDBFlags flags) {
+
+ _cleanup_(strv_freep) char **except = NULL, **only = NULL;
+ _cleanup_(closedirp) DIR *d = NULL;
+ struct dirent *de;
+ const char *e;
+ int r, ret = 0;
+
+ assert(iterator);
+ assert(method);
+
+ e = getenv("SYSTEMD_BYPASS_USERDB");
+ if (e) {
+ r = parse_boolean(e);
+ if (r > 0)
+ return -ENOLINK;
+ if (r < 0) {
+ except = strv_split(e, ":");
+ if (!except)
+ return -ENOMEM;
+ }
+ }
+
+ e = getenv("SYSTEMD_ONLY_USERDB");
+ if (e) {
+ only = strv_split(e, ":");
+ if (!only)
+ return -ENOMEM;
+ }
+
+ /* First, let's talk to the multiplexer, if we can */
+ if ((flags & (USERDB_AVOID_MULTIPLEXER|USERDB_AVOID_DYNAMIC_USER|USERDB_AVOID_NSS|USERDB_DONT_SYNTHESIZE)) == 0 &&
+ !strv_contains(except, "io.systemd.Multiplexer") &&
+ (!only || strv_contains(only, "io.systemd.Multiplexer"))) {
+ _cleanup_(json_variant_unrefp) JsonVariant *patched_query = json_variant_ref(query);
+
+ r = json_variant_set_field_string(&patched_query, "service", "io.systemd.Multiplexer");
+ if (r < 0)
+ return log_debug_errno(r, "Unable to set service JSON field: %m");
+
+ r = userdb_connect(iterator, "/run/systemd/userdb/io.systemd.Multiplexer", method, more, patched_query);
+ if (r >= 0) {
+ iterator->nss_covered = true; /* The multiplexer does NSS */
+ return 0;
+ }
+ }
+
+ d = opendir("/run/systemd/userdb/");
+ if (!d) {
+ if (errno == ENOENT)
+ return -ESRCH;
+
+ return -errno;
+ }
+
+ FOREACH_DIRENT(de, d, return -errno) {
+ _cleanup_(json_variant_unrefp) JsonVariant *patched_query = NULL;
+ _cleanup_free_ char *p = NULL;
+ bool is_nss;
+
+ if (streq(de->d_name, "io.systemd.Multiplexer")) /* We already tried this above, don't try this again */
+ continue;
+
+ if (FLAGS_SET(flags, USERDB_AVOID_DYNAMIC_USER) &&
+ streq(de->d_name, "io.systemd.DynamicUser"))
+ continue;
+
+ /* Avoid NSS is this is requested. Note that we also skip NSS when we were asked to skip the
+ * multiplexer, since in that case it's safer to do NSS in the client side emulation below
+ * (and when we run as part of systemd-userdbd.service we don't want to talk to ourselves
+ * anyway). */
+ is_nss = streq(de->d_name, "io.systemd.NameServiceSwitch");
+ if ((flags & (USERDB_AVOID_NSS|USERDB_AVOID_MULTIPLEXER)) && is_nss)
+ continue;
+
+ if (strv_contains(except, de->d_name))
+ continue;
+
+ if (only && !strv_contains(only, de->d_name))
+ continue;
+
+ p = path_join("/run/systemd/userdb/", de->d_name);
+ if (!p)
+ return -ENOMEM;
+
+ patched_query = json_variant_ref(query);
+ r = json_variant_set_field_string(&patched_query, "service", de->d_name);
+ if (r < 0)
+ return log_debug_errno(r, "Unable to set service JSON field: %m");
+
+ r = userdb_connect(iterator, p, method, more, patched_query);
+ if (is_nss && r >= 0) /* Turn off fallback NSS if we found the NSS service and could connect
+ * to it */
+ iterator->nss_covered = true;
+
+ if (ret == 0 && r < 0)
+ ret = r;
+ }
+
+ if (set_isempty(iterator->links))
+ return ret; /* propagate last error we saw if we couldn't connect to anything. */
+
+ /* We connected to some services, in this case, ignore the ones we failed on */
+ return 0;
+}
+
+static int userdb_process(
+ UserDBIterator *iterator,
+ UserRecord **ret_user_record,
+ GroupRecord **ret_group_record,
+ char **ret_user_name,
+ char **ret_group_name) {
+
+ int r;
+
+ assert(iterator);
+
+ for (;;) {
+ if (iterator->what == LOOKUP_USER && iterator->found_user) {
+ if (ret_user_record)
+ *ret_user_record = TAKE_PTR(iterator->found_user);
+ else
+ iterator->found_user = user_record_unref(iterator->found_user);
+
+ if (ret_group_record)
+ *ret_group_record = NULL;
+ if (ret_user_name)
+ *ret_user_name = NULL;
+ if (ret_group_name)
+ *ret_group_name = NULL;
+
+ return 0;
+ }
+
+ if (iterator->what == LOOKUP_GROUP && iterator->found_group) {
+ if (ret_group_record)
+ *ret_group_record = TAKE_PTR(iterator->found_group);
+ else
+ iterator->found_group = group_record_unref(iterator->found_group);
+
+ if (ret_user_record)
+ *ret_user_record = NULL;
+ if (ret_user_name)
+ *ret_user_name = NULL;
+ if (ret_group_name)
+ *ret_group_name = NULL;
+
+ return 0;
+ }
+
+ if (iterator->what == LOOKUP_MEMBERSHIP && iterator->found_user_name && iterator->found_group_name) {
+ if (ret_user_name)
+ *ret_user_name = TAKE_PTR(iterator->found_user_name);
+ else
+ iterator->found_user_name = mfree(iterator->found_user_name);
+
+ if (ret_group_name)
+ *ret_group_name = TAKE_PTR(iterator->found_group_name);
+ else
+ iterator->found_group_name = mfree(iterator->found_group_name);
+
+ if (ret_user_record)
+ *ret_user_record = NULL;
+ if (ret_group_record)
+ *ret_group_record = NULL;
+
+ return 0;
+ }
+
+ if (set_isempty(iterator->links)) {
+ if (iterator->error == 0)
+ return -ESRCH;
+
+ return -abs(iterator->error);
+ }
+
+ if (!iterator->event)
+ return -ESRCH;
+
+ r = sd_event_run(iterator->event, UINT64_MAX);
+ if (r < 0)
+ return r;
+ }
+}
+
+static int synthetic_root_user_build(UserRecord **ret) {
+ return user_record_build(
+ ret,
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING("root")),
+ JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(0)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(0)),
+ JSON_BUILD_PAIR("homeDirectory", JSON_BUILD_STRING("/root")),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic"))));
+}
+
+static int synthetic_nobody_user_build(UserRecord **ret) {
+ return user_record_build(
+ ret,
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(NOBODY_USER_NAME)),
+ JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(UID_NOBODY)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(GID_NOBODY)),
+ JSON_BUILD_PAIR("shell", JSON_BUILD_STRING(NOLOGIN)),
+ JSON_BUILD_PAIR("locked", JSON_BUILD_BOOLEAN(true)),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic"))));
+}
+
+int userdb_by_name(const char *name, UserDBFlags flags, UserRecord **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ int r;
+
+ if (!valid_user_group_name(name, VALID_USER_RELAX))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(name))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_USER);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetUserRecord", false, query, flags);
+ if (r >= 0) {
+ r = userdb_process(iterator, ret, NULL, NULL, NULL);
+ if (r >= 0)
+ return r;
+ }
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !iterator->nss_covered) {
+ /* Make sure the NSS lookup doesn't recurse back to us. */
+
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r >= 0) {
+ /* Client-side NSS fallback */
+ r = nss_user_record_by_name(name, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret);
+ if (r >= 0)
+ return r;
+ }
+ }
+
+ if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) {
+ if (streq(name, "root"))
+ return synthetic_root_user_build(ret);
+
+ if (streq(name, NOBODY_USER_NAME) && synthesize_nobody())
+ return synthetic_nobody_user_build(ret);
+ }
+
+ return r;
+}
+
+int userdb_by_uid(uid_t uid, UserDBFlags flags, UserRecord **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ int r;
+
+ if (!uid_is_valid(uid))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("uid", JSON_BUILD_UNSIGNED(uid))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_USER);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetUserRecord", false, query, flags);
+ if (r >= 0) {
+ r = userdb_process(iterator, ret, NULL, NULL, NULL);
+ if (r >= 0)
+ return r;
+ }
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !iterator->nss_covered) {
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r >= 0) {
+ /* Client-side NSS fallback */
+ r = nss_user_record_by_uid(uid, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret);
+ if (r >= 0)
+ return r;
+ }
+ }
+
+ if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) {
+ if (uid == 0)
+ return synthetic_root_user_build(ret);
+
+ if (uid == UID_NOBODY && synthesize_nobody())
+ return synthetic_nobody_user_build(ret);
+ }
+
+ return r;
+}
+
+int userdb_all(UserDBFlags flags, UserDBIterator **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ int r;
+
+ assert(ret);
+
+ iterator = userdb_iterator_new(LOOKUP_USER);
+ if (!iterator)
+ return -ENOMEM;
+
+ iterator->synthesize_root = iterator->synthesize_nobody = !FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE);
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetUserRecord", true, NULL, flags);
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && (r < 0 || !iterator->nss_covered)) {
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r < 0)
+ return r;
+
+ setpwent();
+ iterator->nss_iterating = true;
+ } else if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(iterator);
+ return 0;
+}
+
+int userdb_iterator_get(UserDBIterator *iterator, UserRecord **ret) {
+ int r;
+
+ assert(iterator);
+ assert(iterator->what == LOOKUP_USER);
+
+ if (iterator->nss_iterating) {
+ struct passwd *pw;
+
+ /* If NSS isn't covered elsewhere, let's iterate through it first, since it probably contains
+ * the more traditional sources, which are probably good to show first. */
+
+ pw = getpwent();
+ if (pw) {
+ _cleanup_free_ char *buffer = NULL;
+ bool incomplete = false;
+ struct spwd spwd;
+
+ if (streq_ptr(pw->pw_name, "root"))
+ iterator->synthesize_root = false;
+ if (pw->pw_uid == UID_NOBODY)
+ iterator->synthesize_nobody = false;
+
+ r = nss_spwd_for_passwd(pw, &spwd, &buffer);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to acquire shadow entry for user %s, ignoring: %m", pw->pw_name);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ }
+
+ r = nss_passwd_to_user_record(pw, r >= 0 ? &spwd : NULL, ret);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->incomplete = incomplete;
+ return r;
+ }
+
+ if (errno != 0)
+ log_debug_errno(errno, "Failure to iterate NSS user database, ignoring: %m");
+
+ iterator->nss_iterating = false;
+ endpwent();
+ }
+
+ r = userdb_process(iterator, ret, NULL, NULL, NULL);
+
+ if (r < 0) {
+ if (iterator->synthesize_root) {
+ iterator->synthesize_root = false;
+ iterator->n_found++;
+ return synthetic_root_user_build(ret);
+ }
+
+ if (iterator->synthesize_nobody) {
+ iterator->synthesize_nobody = false;
+ iterator->n_found++;
+ return synthetic_nobody_user_build(ret);
+ }
+ }
+
+ /* if we found at least one entry, then ignore errors and indicate that we reached the end */
+ if (r < 0 && iterator->n_found > 0)
+ return -ESRCH;
+
+ return r;
+}
+
+static int synthetic_root_group_build(GroupRecord **ret) {
+ return group_record_build(
+ ret,
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING("root")),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(0)),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic"))));
+}
+
+static int synthetic_nobody_group_build(GroupRecord **ret) {
+ return group_record_build(
+ ret,
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(NOBODY_GROUP_NAME)),
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(GID_NOBODY)),
+ JSON_BUILD_PAIR("disposition", JSON_BUILD_STRING("intrinsic"))));
+}
+
+int groupdb_by_name(const char *name, UserDBFlags flags, GroupRecord **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ int r;
+
+ if (!valid_user_group_name(name, VALID_USER_RELAX))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(name))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_GROUP);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetGroupRecord", false, query, flags);
+ if (r >= 0) {
+ r = userdb_process(iterator, NULL, ret, NULL, NULL);
+ if (r >= 0)
+ return r;
+ }
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !(iterator && iterator->nss_covered)) {
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r >= 0) {
+ r = nss_group_record_by_name(name, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret);
+ if (r >= 0)
+ return r;
+ }
+ }
+
+ if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) {
+ if (streq(name, "root"))
+ return synthetic_root_group_build(ret);
+
+ if (streq(name, NOBODY_GROUP_NAME) && synthesize_nobody())
+ return synthetic_nobody_group_build(ret);
+ }
+
+ return r;
+}
+
+int groupdb_by_gid(gid_t gid, UserDBFlags flags, GroupRecord **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ int r;
+
+ if (!gid_is_valid(gid))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(gid))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_GROUP);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetGroupRecord", false, query, flags);
+ if (r >= 0) {
+ r = userdb_process(iterator, NULL, ret, NULL, NULL);
+ if (r >= 0)
+ return r;
+ }
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && !(iterator && iterator->nss_covered)) {
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r >= 0) {
+ r = nss_group_record_by_gid(gid, !FLAGS_SET(flags, USERDB_AVOID_SHADOW), ret);
+ if (r >= 0)
+ return r;
+ }
+ }
+
+ if (!FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE)) {
+ if (gid == 0)
+ return synthetic_root_group_build(ret);
+
+ if (gid == GID_NOBODY && synthesize_nobody())
+ return synthetic_nobody_group_build(ret);
+ }
+
+ return r;
+}
+
+int groupdb_all(UserDBFlags flags, UserDBIterator **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ int r;
+
+ assert(ret);
+
+ iterator = userdb_iterator_new(LOOKUP_GROUP);
+ if (!iterator)
+ return -ENOMEM;
+
+ iterator->synthesize_root = iterator->synthesize_nobody = !FLAGS_SET(flags, USERDB_DONT_SYNTHESIZE);
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetGroupRecord", true, NULL, flags);
+
+ if (!FLAGS_SET(flags, USERDB_AVOID_NSS) && (r < 0 || !iterator->nss_covered)) {
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r < 0)
+ return r;
+
+ setgrent();
+ iterator->nss_iterating = true;
+ } if (r < 0)
+ return r;
+
+ *ret = TAKE_PTR(iterator);
+ return 0;
+}
+
+int groupdb_iterator_get(UserDBIterator *iterator, GroupRecord **ret) {
+ int r;
+
+ assert(iterator);
+ assert(iterator->what == LOOKUP_GROUP);
+
+ if (iterator->nss_iterating) {
+ struct group *gr;
+
+ errno = 0;
+ gr = getgrent();
+ if (gr) {
+ _cleanup_free_ char *buffer = NULL;
+ bool incomplete = false;
+ struct sgrp sgrp;
+
+ if (streq_ptr(gr->gr_name, "root"))
+ iterator->synthesize_root = false;
+ if (gr->gr_gid == GID_NOBODY)
+ iterator->synthesize_nobody = false;
+
+ r = nss_sgrp_for_group(gr, &sgrp, &buffer);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to acquire shadow entry for group %s, ignoring: %m", gr->gr_name);
+ incomplete = ERRNO_IS_PRIVILEGE(r);
+ }
+
+ r = nss_group_to_group_record(gr, r >= 0 ? &sgrp : NULL, ret);
+ if (r < 0)
+ return r;
+
+ if (ret)
+ (*ret)->incomplete = incomplete;
+ return r;
+ }
+
+ if (errno != 0)
+ log_debug_errno(errno, "Failure to iterate NSS group database, ignoring: %m");
+
+ iterator->nss_iterating = false;
+ endgrent();
+ }
+
+ r = userdb_process(iterator, NULL, ret, NULL, NULL);
+ if (r < 0) {
+ if (iterator->synthesize_root) {
+ iterator->synthesize_root = false;
+ iterator->n_found++;
+ return synthetic_root_group_build(ret);
+ }
+
+ if (iterator->synthesize_nobody) {
+ iterator->synthesize_nobody = false;
+ iterator->n_found++;
+ return synthetic_nobody_group_build(ret);
+ }
+ }
+
+ /* if we found at least one entry, then ignore errors and indicate that we reached the end */
+ if (r < 0 && iterator->n_found > 0)
+ return -ESRCH;
+
+ return r;
+}
+
+int membershipdb_by_user(const char *name, UserDBFlags flags, UserDBIterator **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ int r;
+
+ assert(ret);
+
+ if (!valid_user_group_name(name, VALID_USER_RELAX))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("userName", JSON_BUILD_STRING(name))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_MEMBERSHIP);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetMemberships", true, query, flags);
+ if ((r >= 0 && iterator->nss_covered) || FLAGS_SET(flags, USERDB_AVOID_NSS))
+ goto finish;
+
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r < 0)
+ return r;
+
+ iterator->filter_user_name = strdup(name);
+ if (!iterator->filter_user_name)
+ return -ENOMEM;
+
+ setgrent();
+ iterator->nss_iterating = true;
+
+ r = 0;
+
+finish:
+ if (r >= 0)
+ *ret = TAKE_PTR(iterator);
+ return r;
+}
+
+int membershipdb_by_group(const char *name, UserDBFlags flags, UserDBIterator **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_(json_variant_unrefp) JsonVariant *query = NULL;
+ _cleanup_(group_record_unrefp) GroupRecord *gr = NULL;
+ int r;
+
+ assert(ret);
+
+ if (!valid_user_group_name(name, VALID_USER_RELAX))
+ return -EINVAL;
+
+ r = json_build(&query, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("groupName", JSON_BUILD_STRING(name))));
+ if (r < 0)
+ return r;
+
+ iterator = userdb_iterator_new(LOOKUP_MEMBERSHIP);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetMemberships", true, query, flags);
+ if ((r >= 0 && iterator->nss_covered) || FLAGS_SET(flags, USERDB_AVOID_NSS))
+ goto finish;
+
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r < 0)
+ return r;
+
+ /* We ignore all errors here, since the group might be defined by a userdb native service, and we queried them already above. */
+ (void) nss_group_record_by_name(name, false, &gr);
+ if (gr) {
+ iterator->members_of_group = strv_copy(gr->members);
+ if (!iterator->members_of_group)
+ return -ENOMEM;
+
+ iterator->index_members_of_group = 0;
+
+ iterator->found_group_name = strdup(name);
+ if (!iterator->found_group_name)
+ return -ENOMEM;
+ }
+
+ r = 0;
+
+finish:
+ if (r >= 0)
+ *ret = TAKE_PTR(iterator);
+
+ return r;
+}
+
+int membershipdb_all(UserDBFlags flags, UserDBIterator **ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ int r;
+
+ assert(ret);
+
+ iterator = userdb_iterator_new(LOOKUP_MEMBERSHIP);
+ if (!iterator)
+ return -ENOMEM;
+
+ r = userdb_start_query(iterator, "io.systemd.UserDatabase.GetMemberships", true, NULL, flags);
+ if ((r >= 0 && iterator->nss_covered) || FLAGS_SET(flags, USERDB_AVOID_NSS))
+ goto finish;
+
+ r = userdb_iterator_block_nss_systemd(iterator);
+ if (r < 0)
+ return r;
+
+ setgrent();
+ iterator->nss_iterating = true;
+
+ r = 0;
+
+finish:
+ if (r >= 0)
+ *ret = TAKE_PTR(iterator);
+
+ return r;
+}
+
+int membershipdb_iterator_get(
+ UserDBIterator *iterator,
+ char **ret_user,
+ char **ret_group) {
+
+ int r;
+
+ assert(iterator);
+
+ for (;;) {
+ /* If we are iteratring through NSS acquire a new group entry if we haven't acquired one yet. */
+ if (!iterator->members_of_group) {
+ struct group *g;
+
+ if (!iterator->nss_iterating)
+ break;
+
+ assert(!iterator->found_user_name);
+ do {
+ errno = 0;
+ g = getgrent();
+ if (!g) {
+ if (errno != 0)
+ log_debug_errno(errno, "Failure during NSS group iteration, ignoring: %m");
+ break;
+ }
+
+ } while (iterator->filter_user_name ? !strv_contains(g->gr_mem, iterator->filter_user_name) :
+ strv_isempty(g->gr_mem));
+
+ if (g) {
+ r = free_and_strdup(&iterator->found_group_name, g->gr_name);
+ if (r < 0)
+ return r;
+
+ if (iterator->filter_user_name)
+ iterator->members_of_group = strv_new(iterator->filter_user_name);
+ else
+ iterator->members_of_group = strv_copy(g->gr_mem);
+ if (!iterator->members_of_group)
+ return -ENOMEM;
+
+ iterator->index_members_of_group = 0;
+ } else {
+ iterator->nss_iterating = false;
+ endgrent();
+ break;
+ }
+ }
+
+ assert(iterator->found_group_name);
+ assert(iterator->members_of_group);
+ assert(!iterator->found_user_name);
+
+ if (iterator->members_of_group[iterator->index_members_of_group]) {
+ _cleanup_free_ char *cu = NULL, *cg = NULL;
+
+ if (ret_user) {
+ cu = strdup(iterator->members_of_group[iterator->index_members_of_group]);
+ if (!cu)
+ return -ENOMEM;
+ }
+
+ if (ret_group) {
+ cg = strdup(iterator->found_group_name);
+ if (!cg)
+ return -ENOMEM;
+ }
+
+ if (ret_user)
+ *ret_user = TAKE_PTR(cu);
+
+ if (ret_group)
+ *ret_group = TAKE_PTR(cg);
+
+ iterator->index_members_of_group++;
+ return 0;
+ }
+
+ iterator->members_of_group = strv_free(iterator->members_of_group);
+ iterator->found_group_name = mfree(iterator->found_group_name);
+ }
+
+ r = userdb_process(iterator, NULL, NULL, ret_user, ret_group);
+ if (r < 0 && iterator->n_found > 0)
+ return -ESRCH;
+
+ return r;
+}
+
+int membershipdb_by_group_strv(const char *name, UserDBFlags flags, char ***ret) {
+ _cleanup_(userdb_iterator_freep) UserDBIterator *iterator = NULL;
+ _cleanup_strv_free_ char **members = NULL;
+ int r;
+
+ assert(name);
+ assert(ret);
+
+ r = membershipdb_by_group(name, flags, &iterator);
+ if (r < 0)
+ return r;
+
+ for (;;) {
+ _cleanup_free_ char *user_name = NULL;
+
+ r = membershipdb_iterator_get(iterator, &user_name, NULL);
+ if (r == -ESRCH)
+ break;
+ if (r < 0)
+ return r;
+
+ r = strv_consume(&members, TAKE_PTR(user_name));
+ if (r < 0)
+ return r;
+ }
+
+ strv_sort(members);
+ strv_uniq(members);
+
+ *ret = TAKE_PTR(members);
+ return 0;
+}
+
+int userdb_block_nss_systemd(int b) {
+ _cleanup_(dlclosep) void *dl = NULL;
+ int (*call)(bool b);
+
+ /* Note that we might be called from libnss_systemd.so.2 itself, but that should be fine, really. */
+
+ dl = dlopen(ROOTLIBDIR "/libnss_systemd.so.2", RTLD_LAZY|RTLD_NODELETE);
+ if (!dl) {
+ /* If the file isn't installed, don't complain loudly */
+ log_debug("Failed to dlopen(libnss_systemd.so.2), ignoring: %s", dlerror());
+ return 0;
+ }
+
+ call = (int (*)(bool b)) dlsym(dl, "_nss_systemd_block");
+ if (!call)
+ /* If the file is is installed but lacks the symbol we expect, things are weird, let's complain */
+ return log_debug_errno(SYNTHETIC_ERRNO(ELIBBAD),
+ "Unable to find symbol _nss_systemd_block in libnss_systemd.so.2: %s", dlerror());
+
+ return call(b);
+}
diff --git a/src/shared/userdb.h b/src/shared/userdb.h
new file mode 100644
index 0000000..ee207b5
--- /dev/null
+++ b/src/shared/userdb.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "group-record.h"
+#include "user-record.h"
+
+/* Inquire local services for user/group records */
+
+typedef struct UserDBIterator UserDBIterator;
+
+UserDBIterator *userdb_iterator_free(UserDBIterator *iterator);
+DEFINE_TRIVIAL_CLEANUP_FUNC(UserDBIterator*, userdb_iterator_free);
+
+typedef enum UserDBFlags {
+ USERDB_AVOID_NSS = 1 << 0, /* don't do client-side nor server-side NSS */
+ USERDB_AVOID_SHADOW = 1 << 1, /* don't do client-side shadow calls (server side might happen though) */
+ USERDB_AVOID_DYNAMIC_USER = 1 << 2, /* exclude looking up in io.systemd.DynamicUser */
+ USERDB_AVOID_MULTIPLEXER = 1 << 3, /* exclude looking up via io.systemd.Multiplexer */
+ USERDB_DONT_SYNTHESIZE = 1 << 4, /* don't synthesize root/nobody */
+} UserDBFlags;
+
+int userdb_by_name(const char *name, UserDBFlags flags, UserRecord **ret);
+int userdb_by_uid(uid_t uid, UserDBFlags flags, UserRecord **ret);
+int userdb_all(UserDBFlags flags, UserDBIterator **ret);
+int userdb_iterator_get(UserDBIterator *iterator, UserRecord **ret);
+
+int groupdb_by_name(const char *name, UserDBFlags flags, GroupRecord **ret);
+int groupdb_by_gid(gid_t gid, UserDBFlags flags, GroupRecord **ret);
+int groupdb_all(UserDBFlags flags, UserDBIterator **ret);
+int groupdb_iterator_get(UserDBIterator *iterator, GroupRecord **ret);
+
+int membershipdb_by_user(const char *name, UserDBFlags flags, UserDBIterator **ret);
+int membershipdb_by_group(const char *name, UserDBFlags flags, UserDBIterator **ret);
+int membershipdb_all(UserDBFlags flags, UserDBIterator **ret);
+int membershipdb_iterator_get(UserDBIterator *iterator, char **user, char **group);
+int membershipdb_by_group_strv(const char *name, UserDBFlags flags, char ***ret);
+
+int userdb_block_nss_systemd(int b);
diff --git a/src/shared/utmp-wtmp.c b/src/shared/utmp-wtmp.c
new file mode 100644
index 0000000..b36bc20
--- /dev/null
+++ b/src/shared/utmp-wtmp.c
@@ -0,0 +1,409 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+#include <utmpx.h>
+
+#include "alloc-util.h"
+#include "fd-util.h"
+#include "hostname-util.h"
+#include "io-util.h"
+#include "macro.h"
+#include "memory-util.h"
+#include "path-util.h"
+#include "string-util.h"
+#include "terminal-util.h"
+#include "time-util.h"
+#include "user-util.h"
+#include "utmp-wtmp.h"
+
+int utmp_get_runlevel(int *runlevel, int *previous) {
+ _cleanup_(utxent_cleanup) bool utmpx = false;
+ struct utmpx *found, lookup = { .ut_type = RUN_LVL };
+ const char *e;
+
+ assert(runlevel);
+
+ /* If these values are set in the environment this takes
+ * precedence. Presumably, sysvinit does this to work around a
+ * race condition that would otherwise exist where we'd always
+ * go to disk and hence might read runlevel data that might be
+ * very new and not apply to the current script being executed. */
+
+ e = getenv("RUNLEVEL");
+ if (e && e[0] > 0) {
+ *runlevel = e[0];
+
+ if (previous) {
+ /* $PREVLEVEL seems to be an Upstart thing */
+
+ e = getenv("PREVLEVEL");
+ if (e && e[0] > 0)
+ *previous = e[0];
+ else
+ *previous = 0;
+ }
+
+ return 0;
+ }
+
+ if (utmpxname(_PATH_UTMPX) < 0)
+ return -errno;
+
+ utmpx = utxent_start();
+
+ found = getutxid(&lookup);
+ if (!found)
+ return -errno;
+
+ *runlevel = found->ut_pid & 0xFF;
+ if (previous)
+ *previous = (found->ut_pid >> 8) & 0xFF;
+
+ return 0;
+}
+
+static void init_timestamp(struct utmpx *store, usec_t t) {
+ assert(store);
+
+ if (t <= 0)
+ t = now(CLOCK_REALTIME);
+
+ store->ut_tv.tv_sec = t / USEC_PER_SEC;
+ store->ut_tv.tv_usec = t % USEC_PER_SEC;
+}
+
+static void init_entry(struct utmpx *store, usec_t t) {
+ struct utsname uts = {};
+
+ assert(store);
+
+ init_timestamp(store, t);
+
+ if (uname(&uts) >= 0)
+ strncpy(store->ut_host, uts.release, sizeof(store->ut_host));
+
+ strncpy(store->ut_line, "~", sizeof(store->ut_line)); /* or ~~ ? */
+ strncpy(store->ut_id, "~~", sizeof(store->ut_id));
+}
+
+static int write_entry_utmp(const struct utmpx *store) {
+ _cleanup_(utxent_cleanup) bool utmpx = false;
+
+ assert(store);
+
+ /* utmp is similar to wtmp, but there is only one entry for
+ * each entry type resp. user; i.e. basically a key/value
+ * table. */
+
+ if (utmpxname(_PATH_UTMPX) < 0)
+ return -errno;
+
+ utmpx = utxent_start();
+
+ if (pututxline(store))
+ return 0;
+ if (errno == ENOENT) {
+ /* If utmp/wtmp have been disabled, that's a good thing, hence ignore the error. */
+ log_debug_errno(errno, "Not writing utmp: %m");
+ return 0;
+ }
+ return -errno;
+}
+
+static int write_entry_wtmp(const struct utmpx *store) {
+ assert(store);
+
+ /* wtmp is a simple append-only file where each entry is
+ * simply appended to the end; i.e. basically a log. */
+
+ errno = 0;
+ updwtmpx(_PATH_WTMPX, store);
+ if (errno == ENOENT) {
+ /* If utmp/wtmp have been disabled, that's a good thing, hence ignore the error. */
+ log_debug_errno(errno, "Not writing wtmp: %m");
+ return 0;
+ }
+ if (errno == EROFS) {
+ log_warning_errno(errno, "Failed to write wtmp record, ignoring: %m");
+ return 0;
+ }
+ return -errno;
+}
+
+static int write_utmp_wtmp(const struct utmpx *store_utmp, const struct utmpx *store_wtmp) {
+ int r, s;
+
+ r = write_entry_utmp(store_utmp);
+ s = write_entry_wtmp(store_wtmp);
+ return r < 0 ? r : s;
+}
+
+static int write_entry_both(const struct utmpx *store) {
+ return write_utmp_wtmp(store, store);
+}
+
+int utmp_put_shutdown(void) {
+ struct utmpx store = {};
+
+ init_entry(&store, 0);
+
+ store.ut_type = RUN_LVL;
+ strncpy(store.ut_user, "shutdown", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+int utmp_put_reboot(usec_t t) {
+ struct utmpx store = {};
+
+ init_entry(&store, t);
+
+ store.ut_type = BOOT_TIME;
+ strncpy(store.ut_user, "reboot", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+static void copy_suffix(char *buf, size_t buf_size, const char *src) {
+ size_t l;
+
+ l = strlen(src);
+ if (l < buf_size)
+ strncpy(buf, src, buf_size);
+ else
+ memcpy(buf, src + l - buf_size, buf_size);
+}
+
+int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) {
+ struct utmpx store = {
+ .ut_type = INIT_PROCESS,
+ .ut_pid = pid,
+ .ut_session = sid,
+ };
+ int r;
+
+ assert(id);
+
+ init_timestamp(&store, 0);
+
+ /* Copy the whole string if it fits, or just the suffix without the terminating NUL. */
+ copy_suffix(store.ut_id, sizeof(store.ut_id), id);
+
+ if (line)
+ strncpy_exact(store.ut_line, line, sizeof(store.ut_line));
+
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(ut_type, LOGIN_PROCESS, USER_PROCESS)) {
+ store.ut_type = LOGIN_PROCESS;
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+ }
+
+ if (ut_type == USER_PROCESS) {
+ store.ut_type = USER_PROCESS;
+ strncpy(store.ut_user, user, sizeof(store.ut_user)-1);
+ r = write_entry_both(&store);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) {
+ struct utmpx lookup = {
+ .ut_type = INIT_PROCESS /* looks for DEAD_PROCESS, LOGIN_PROCESS, USER_PROCESS, too */
+ }, store, store_wtmp, *found;
+
+ assert(id);
+
+ setutxent();
+
+ /* Copy the whole string if it fits, or just the suffix without the terminating NUL. */
+ copy_suffix(store.ut_id, sizeof(store.ut_id), id);
+
+ found = getutxid(&lookup);
+ if (!found)
+ return 0;
+
+ if (found->ut_pid != pid)
+ return 0;
+
+ memcpy(&store, found, sizeof(store));
+ store.ut_type = DEAD_PROCESS;
+ store.ut_exit.e_termination = code;
+ store.ut_exit.e_exit = status;
+
+ zero(store.ut_user);
+ zero(store.ut_host);
+ zero(store.ut_tv);
+
+ memcpy(&store_wtmp, &store, sizeof(store_wtmp));
+ /* wtmp wants the current time */
+ init_timestamp(&store_wtmp, 0);
+
+ return write_utmp_wtmp(&store, &store_wtmp);
+}
+
+int utmp_put_runlevel(int runlevel, int previous) {
+ struct utmpx store = {};
+ int r;
+
+ assert(runlevel > 0);
+
+ if (previous <= 0) {
+ /* Find the old runlevel automatically */
+
+ r = utmp_get_runlevel(&previous, NULL);
+ if (r < 0) {
+ if (r != -ESRCH)
+ return r;
+
+ previous = 0;
+ }
+ }
+
+ if (previous == runlevel)
+ return 0;
+
+ init_entry(&store, 0);
+
+ store.ut_type = RUN_LVL;
+ store.ut_pid = (runlevel & 0xFF) | ((previous & 0xFF) << 8);
+ strncpy(store.ut_user, "runlevel", sizeof(store.ut_user));
+
+ return write_entry_both(&store);
+}
+
+#define TIMEOUT_USEC (50 * USEC_PER_MSEC)
+
+static int write_to_terminal(const char *tty, const char *message) {
+ _cleanup_close_ int fd = -1;
+ const char *p;
+ size_t left;
+ usec_t end;
+
+ assert(tty);
+ assert(message);
+
+ fd = open(tty, O_WRONLY|O_NONBLOCK|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0 || !isatty(fd))
+ return -errno;
+
+ p = message;
+ left = strlen(message);
+
+ end = now(CLOCK_MONOTONIC) + TIMEOUT_USEC;
+
+ while (left > 0) {
+ ssize_t n;
+ usec_t t;
+ int k;
+
+ t = now(CLOCK_MONOTONIC);
+
+ if (t >= end)
+ return -ETIME;
+
+ k = fd_wait_for_event(fd, POLLOUT, end - t);
+ if (k < 0)
+ return k;
+ if (k == 0)
+ return -ETIME;
+
+ n = write(fd, p, left);
+ if (n < 0) {
+ if (errno == EAGAIN)
+ continue;
+
+ return -errno;
+ }
+
+ assert((size_t) n <= left);
+
+ p += n;
+ left -= n;
+ }
+
+ return 0;
+}
+
+int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata) {
+
+ _cleanup_free_ char *text = NULL, *hn = NULL, *un = NULL, *stdin_tty = NULL;
+ char date[FORMAT_TIMESTAMP_MAX];
+ struct utmpx *u;
+ int r;
+
+ hn = gethostname_malloc();
+ if (!hn)
+ return -ENOMEM;
+ if (!username) {
+ un = getlogname_malloc();
+ if (!un)
+ return -ENOMEM;
+ }
+
+ if (!origin_tty) {
+ getttyname_harder(STDIN_FILENO, &stdin_tty);
+ origin_tty = stdin_tty;
+ }
+
+ if (asprintf(&text,
+ "\a\r\n"
+ "Broadcast message from %s@%s%s%s (%s):\r\n\r\n"
+ "%s\r\n\r\n",
+ un ?: username, hn,
+ origin_tty ? " on " : "", strempty(origin_tty),
+ format_timestamp(date, sizeof(date), now(CLOCK_REALTIME)),
+ message) < 0)
+ return -ENOMEM;
+
+ setutxent();
+
+ r = 0;
+
+ while ((u = getutxent())) {
+ _cleanup_free_ char *buf = NULL;
+ const char *path;
+ int q;
+
+ if (u->ut_type != USER_PROCESS || u->ut_user[0] == 0)
+ continue;
+
+ /* this access is fine, because STRLEN("/dev/") << 32 (UT_LINESIZE) */
+ if (path_startswith(u->ut_line, "/dev/"))
+ path = u->ut_line;
+ else {
+ if (asprintf(&buf, "/dev/%.*s", (int) sizeof(u->ut_line), u->ut_line) < 0)
+ return -ENOMEM;
+
+ path = buf;
+ }
+
+ if (!match_tty || match_tty(path, userdata)) {
+ q = write_to_terminal(path, text);
+ if (q < 0)
+ r = q;
+ }
+ }
+
+ return r;
+}
diff --git a/src/shared/utmp-wtmp.h b/src/shared/utmp-wtmp.h
new file mode 100644
index 0000000..3e71f76
--- /dev/null
+++ b/src/shared/utmp-wtmp.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "time-util.h"
+#include "util.h"
+
+#if ENABLE_UTMP
+#include <utmpx.h>
+
+int utmp_get_runlevel(int *runlevel, int *previous);
+
+int utmp_put_shutdown(void);
+int utmp_put_reboot(usec_t timestamp);
+int utmp_put_runlevel(int runlevel, int previous);
+
+int utmp_put_dead_process(const char *id, pid_t pid, int code, int status);
+int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user);
+
+int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata);
+
+static inline bool utxent_start(void) {
+ setutxent();
+ return true;
+}
+static inline void utxent_cleanup(bool *initialized) {
+ if (initialized)
+ endutxent();
+}
+
+#else /* ENABLE_UTMP */
+
+static inline int utmp_get_runlevel(int *runlevel, int *previous) {
+ return -ESRCH;
+}
+static inline int utmp_put_shutdown(void) {
+ return 0;
+}
+static inline int utmp_put_reboot(usec_t timestamp) {
+ return 0;
+}
+static inline int utmp_put_runlevel(int runlevel, int previous) {
+ return 0;
+}
+static inline int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) {
+ return 0;
+}
+static inline int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) {
+ return 0;
+}
+static inline int utmp_wall(
+ const char *message,
+ const char *username,
+ const char *origin_tty,
+ bool (*match_tty)(const char *tty, void *userdata),
+ void *userdata) {
+ return 0;
+}
+
+#endif /* ENABLE_UTMP */
diff --git a/src/shared/varlink.c b/src/shared/varlink.c
new file mode 100644
index 0000000..e7be33c
--- /dev/null
+++ b/src/shared/varlink.c
@@ -0,0 +1,2502 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/poll.h>
+
+#include "alloc-util.h"
+#include "errno-util.h"
+#include "fd-util.h"
+#include "hashmap.h"
+#include "io-util.h"
+#include "list.h"
+#include "process-util.h"
+#include "selinux-util.h"
+#include "set.h"
+#include "socket-util.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "strv.h"
+#include "time-util.h"
+#include "umask-util.h"
+#include "user-util.h"
+#include "varlink.h"
+
+#define VARLINK_DEFAULT_CONNECTIONS_MAX 4096U
+#define VARLINK_DEFAULT_CONNECTIONS_PER_UID_MAX 1024U
+
+#define VARLINK_DEFAULT_TIMEOUT_USEC (45U*USEC_PER_SEC)
+#define VARLINK_BUFFER_MAX (16U*1024U*1024U)
+#define VARLINK_READ_SIZE (64U*1024U)
+
+typedef enum VarlinkState {
+ /* Client side states */
+ VARLINK_IDLE_CLIENT,
+ VARLINK_AWAITING_REPLY,
+ VARLINK_AWAITING_REPLY_MORE,
+ VARLINK_CALLING,
+ VARLINK_CALLED,
+ VARLINK_PROCESSING_REPLY,
+
+ /* Server side states */
+ VARLINK_IDLE_SERVER,
+ VARLINK_PROCESSING_METHOD,
+ VARLINK_PROCESSING_METHOD_MORE,
+ VARLINK_PROCESSING_METHOD_ONEWAY,
+ VARLINK_PROCESSED_METHOD,
+ VARLINK_PENDING_METHOD,
+ VARLINK_PENDING_METHOD_MORE,
+
+ /* Common states (only during shutdown) */
+ VARLINK_PENDING_DISCONNECT,
+ VARLINK_PENDING_TIMEOUT,
+ VARLINK_PROCESSING_DISCONNECT,
+ VARLINK_PROCESSING_TIMEOUT,
+ VARLINK_PROCESSING_FAILURE,
+ VARLINK_DISCONNECTED,
+
+ _VARLINK_STATE_MAX,
+ _VARLINK_STATE_INVALID = -1
+} VarlinkState;
+
+/* Tests whether we are not yet disconnected. Note that this is true during all states where the connection
+ * is still good for something, and false only when it's dead for good. This means: when we are
+ * asynchronously connecting to a peer and the connect() is still pending, then this will return 'true', as
+ * the connection is still good, and we are likely to be able to properly operate on it soon. */
+#define VARLINK_STATE_IS_ALIVE(state) \
+ IN_SET(state, \
+ VARLINK_IDLE_CLIENT, \
+ VARLINK_AWAITING_REPLY, \
+ VARLINK_AWAITING_REPLY_MORE, \
+ VARLINK_CALLING, \
+ VARLINK_CALLED, \
+ VARLINK_PROCESSING_REPLY, \
+ VARLINK_IDLE_SERVER, \
+ VARLINK_PROCESSING_METHOD, \
+ VARLINK_PROCESSING_METHOD_MORE, \
+ VARLINK_PROCESSING_METHOD_ONEWAY, \
+ VARLINK_PROCESSED_METHOD, \
+ VARLINK_PENDING_METHOD, \
+ VARLINK_PENDING_METHOD_MORE)
+
+struct Varlink {
+ unsigned n_ref;
+
+ VarlinkServer *server;
+
+ VarlinkState state;
+ bool connecting; /* This boolean indicates whether the socket fd we are operating on is currently
+ * processing an asynchronous connect(). In that state we watch the socket for
+ * EPOLLOUT, but we refrain from calling read() or write() on the socket as that
+ * will trigger ENOTCONN. Note that this boolean is kept separate from the
+ * VarlinkState above on purpose: while the connect() is still not complete we
+ * already want to allow queuing of messages and similar. Thus it's nice to keep
+ * these two state concepts separate: the VarlinkState encodes what our own view of
+ * the connection is, i.e. whether we think it's a server, a client, and has
+ * something queued already, while 'connecting' tells us a detail about the
+ * transport used below, that should have no effect on how we otherwise accept and
+ * process operations from the user.
+ *
+ * Or to say this differently: VARLINK_STATE_IS_ALIVE(state) tells you whether the
+ * connection is good to use, even if it might not be fully connected
+ * yet. connecting=true then informs you that actually we are still connecting, and
+ * the connection is actually not established yet and thus any requests you enqueue
+ * now will still work fine but will be queued only, not sent yet, but that
+ * shouldn't stop you from using the connection, since eventually whatever you queue
+ * *will* be sent.
+ *
+ * Or to say this even differently: 'state' is a high-level ("application layer"
+ * high, if you so will) state, while 'conecting' is a low-level ("transport layer"
+ * low, if you so will) state, and while they are not entirely unrelated and
+ * sometimes propagate effects to each other they are only asynchronously connected
+ * at most. */
+ unsigned n_pending;
+
+ int fd;
+
+ char *input_buffer; /* valid data starts at input_buffer_index, ends at input_buffer_index+input_buffer_size */
+ size_t input_buffer_allocated;
+ size_t input_buffer_index;
+ size_t input_buffer_size;
+ size_t input_buffer_unscanned;
+
+ char *output_buffer; /* valid data starts at output_buffer_index, ends at output_buffer_index+output_buffer_size */
+ size_t output_buffer_allocated;
+ size_t output_buffer_index;
+ size_t output_buffer_size;
+
+ VarlinkReply reply_callback;
+
+ JsonVariant *current;
+ JsonVariant *reply;
+
+ struct ucred ucred;
+ bool ucred_acquired:1;
+
+ bool write_disconnected:1;
+ bool read_disconnected:1;
+ bool prefer_read_write:1;
+ bool got_pollhup:1;
+
+ usec_t timestamp;
+ usec_t timeout;
+
+ void *userdata;
+ char *description;
+
+ sd_event *event;
+ sd_event_source *io_event_source;
+ sd_event_source *time_event_source;
+ sd_event_source *quit_event_source;
+ sd_event_source *defer_event_source;
+};
+
+typedef struct VarlinkServerSocket VarlinkServerSocket;
+
+struct VarlinkServerSocket {
+ VarlinkServer *server;
+
+ int fd;
+ char *address;
+
+ sd_event_source *event_source;
+
+ LIST_FIELDS(VarlinkServerSocket, sockets);
+};
+
+struct VarlinkServer {
+ unsigned n_ref;
+ VarlinkServerFlags flags;
+
+ LIST_HEAD(VarlinkServerSocket, sockets);
+
+ Hashmap *methods;
+ VarlinkConnect connect_callback;
+ VarlinkDisconnect disconnect_callback;
+
+ sd_event *event;
+ int64_t event_priority;
+
+ unsigned n_connections;
+ Hashmap *by_uid;
+
+ void *userdata;
+ char *description;
+
+ unsigned connections_max;
+ unsigned connections_per_uid_max;
+};
+
+static const char* const varlink_state_table[_VARLINK_STATE_MAX] = {
+ [VARLINK_IDLE_CLIENT] = "idle-client",
+ [VARLINK_AWAITING_REPLY] = "awaiting-reply",
+ [VARLINK_AWAITING_REPLY_MORE] = "awaiting-reply-more",
+ [VARLINK_CALLING] = "calling",
+ [VARLINK_CALLED] = "called",
+ [VARLINK_PROCESSING_REPLY] = "processing-reply",
+ [VARLINK_IDLE_SERVER] = "idle-server",
+ [VARLINK_PROCESSING_METHOD] = "processing-method",
+ [VARLINK_PROCESSING_METHOD_MORE] = "processing-method-more",
+ [VARLINK_PROCESSING_METHOD_ONEWAY] = "processing-method-oneway",
+ [VARLINK_PROCESSED_METHOD] = "processed-method",
+ [VARLINK_PENDING_METHOD] = "pending-method",
+ [VARLINK_PENDING_METHOD_MORE] = "pending-method-more",
+ [VARLINK_PENDING_DISCONNECT] = "pending-disconnect",
+ [VARLINK_PENDING_TIMEOUT] = "pending-timeout",
+ [VARLINK_PROCESSING_DISCONNECT] = "processing-disconnect",
+ [VARLINK_PROCESSING_TIMEOUT] = "processing-timeout",
+ [VARLINK_PROCESSING_FAILURE] = "processing-failure",
+ [VARLINK_DISCONNECTED] = "disconnected",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(varlink_state, VarlinkState);
+
+#define varlink_log_errno(v, error, fmt, ...) \
+ log_debug_errno(error, "%s: " fmt, varlink_description(v), ##__VA_ARGS__)
+
+#define varlink_log(v, fmt, ...) \
+ log_debug("%s: " fmt, varlink_description(v), ##__VA_ARGS__)
+
+#define varlink_server_log_errno(s, error, fmt, ...) \
+ log_debug_errno(error, "%s: " fmt, varlink_server_description(s), ##__VA_ARGS__)
+
+#define varlink_server_log(s, fmt, ...) \
+ log_debug("%s: " fmt, varlink_server_description(s), ##__VA_ARGS__)
+
+static inline const char *varlink_description(Varlink *v) {
+ return strna(v ? v->description : NULL);
+}
+
+static inline const char *varlink_server_description(VarlinkServer *s) {
+ return strna(s ? s->description : NULL);
+}
+
+static void varlink_set_state(Varlink *v, VarlinkState state) {
+ assert(v);
+ assert(state >= 0 && state < _VARLINK_STATE_MAX);
+
+ if (v->state < 0)
+ varlink_log(v, "varlink: setting state %s",
+ varlink_state_to_string(state));
+ else
+ varlink_log(v, "varlink: changing state %s → %s",
+ varlink_state_to_string(v->state),
+ varlink_state_to_string(state));
+
+ v->state = state;
+}
+
+static int varlink_new(Varlink **ret) {
+ Varlink *v;
+
+ assert(ret);
+
+ v = new(Varlink, 1);
+ if (!v)
+ return -ENOMEM;
+
+ *v = (Varlink) {
+ .n_ref = 1,
+ .fd = -1,
+
+ .state = _VARLINK_STATE_INVALID,
+
+ .ucred.uid = UID_INVALID,
+ .ucred.gid = GID_INVALID,
+
+ .timestamp = USEC_INFINITY,
+ .timeout = VARLINK_DEFAULT_TIMEOUT_USEC
+ };
+
+ *ret = v;
+ return 0;
+}
+
+int varlink_connect_address(Varlink **ret, const char *address) {
+ _cleanup_(varlink_unrefp) Varlink *v = NULL;
+ union sockaddr_union sockaddr;
+ socklen_t sockaddr_len;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(address, -EINVAL);
+
+ r = sockaddr_un_set_path(&sockaddr.un, address);
+ if (r < 0)
+ return r;
+ sockaddr_len = r;
+
+ r = varlink_new(&v);
+ if (r < 0)
+ return r;
+
+ v->fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (v->fd < 0)
+ return -errno;
+
+ v->fd = fd_move_above_stdio(v->fd);
+
+ if (connect(v->fd, &sockaddr.sa, sockaddr_len) < 0) {
+ if (!IN_SET(errno, EAGAIN, EINPROGRESS))
+ return -errno;
+
+ v->connecting = true; /* We are asynchronously connecting, i.e. the connect() is being
+ * processed in the background. As long as that's the case the socket
+ * is in a special state: it's there, we can poll it for EPOLLOUT, but
+ * if we attempt to write() to it before we see EPOLLOUT we'll get
+ * ENOTCONN (and not EAGAIN, like we would for a normal connected
+ * socket that isn't writable at the moment). Since ENOTCONN on write()
+ * hence can mean two different things (i.e. connection not complete
+ * yet vs. already disconnected again), we store as a boolean whether
+ * we are still in connect(). */
+ }
+
+ varlink_set_state(v, VARLINK_IDLE_CLIENT);
+
+ *ret = TAKE_PTR(v);
+ return r;
+}
+
+int varlink_connect_fd(Varlink **ret, int fd) {
+ Varlink *v;
+ int r;
+
+ assert_return(ret, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ r = varlink_new(&v);
+ if (r < 0)
+ return r;
+
+ v->fd = fd;
+ varlink_set_state(v, VARLINK_IDLE_CLIENT);
+
+ /* Note that if this function is called we assume the passed socket (if it is one) is already
+ * properly connected, i.e. any asynchronous connect() done on it already completed. Because of that
+ * we'll not set the 'connecting' boolean here, i.e. we don't need to avoid write()ing to the socket
+ * until the connection is fully set up. Behaviour here is hence a bit different from
+ * varlink_connect_address() above, as there we do handle asynchronous connections ourselves and
+ * avoid doing write() on it before we saw EPOLLOUT for the first time. */
+
+ *ret = v;
+ return 0;
+}
+
+static void varlink_detach_event_sources(Varlink *v) {
+ assert(v);
+
+ v->io_event_source = sd_event_source_disable_unref(v->io_event_source);
+ v->time_event_source = sd_event_source_disable_unref(v->time_event_source);
+ v->quit_event_source = sd_event_source_disable_unref(v->quit_event_source);
+ v->defer_event_source = sd_event_source_disable_unref(v->defer_event_source);
+}
+
+static void varlink_clear(Varlink *v) {
+ assert(v);
+
+ varlink_detach_event_sources(v);
+
+ v->fd = safe_close(v->fd);
+
+ v->input_buffer = mfree(v->input_buffer);
+ v->output_buffer = mfree(v->output_buffer);
+
+ v->current = json_variant_unref(v->current);
+ v->reply = json_variant_unref(v->reply);
+
+ v->event = sd_event_unref(v->event);
+}
+
+static Varlink* varlink_destroy(Varlink *v) {
+ if (!v)
+ return NULL;
+
+ /* If this is called the server object must already been unreffed here. Why that? because when we
+ * linked up the varlink connection with the server object we took one ref in each direction */
+ assert(!v->server);
+
+ varlink_clear(v);
+
+ free(v->description);
+ return mfree(v);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(Varlink, varlink, varlink_destroy);
+
+static int varlink_test_disconnect(Varlink *v) {
+ assert(v);
+
+ /* Tests whether we the connection has been terminated. We are careful to not stop processing it
+ * prematurely, since we want to handle half-open connections as well as possible and want to flush
+ * out and read data before we close down if we can. */
+
+ /* Already disconnected? */
+ if (!VARLINK_STATE_IS_ALIVE(v->state))
+ return 0;
+
+ /* Wait until connection setup is complete, i.e. until asynchronous connect() completes */
+ if (v->connecting)
+ return 0;
+
+ /* Still something to write and we can write? Stay around */
+ if (v->output_buffer_size > 0 && !v->write_disconnected)
+ return 0;
+
+ /* Both sides gone already? Then there's no need to stick around */
+ if (v->read_disconnected && v->write_disconnected)
+ goto disconnect;
+
+ /* If we are waiting for incoming data but the read side is shut down, disconnect. */
+ if (IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING, VARLINK_IDLE_SERVER) && v->read_disconnected)
+ goto disconnect;
+
+ /* Similar, if are a client that hasn't written anything yet but the write side is dead, also
+ * disconnect. We also explicitly check for POLLHUP here since we likely won't notice the write side
+ * being down if we never wrote anything. */
+ if (IN_SET(v->state, VARLINK_IDLE_CLIENT) && (v->write_disconnected || v->got_pollhup))
+ goto disconnect;
+
+ /* The server is still expecting to write more, but its write end is disconnected and it got a POLLHUP
+ * (i.e. from a disconnected client), so disconnect. */
+ if (IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE) && v->write_disconnected && v->got_pollhup)
+ goto disconnect;
+
+ return 0;
+
+disconnect:
+ varlink_set_state(v, VARLINK_PENDING_DISCONNECT);
+ return 1;
+}
+
+static int varlink_write(Varlink *v) {
+ ssize_t n;
+
+ assert(v);
+
+ if (!VARLINK_STATE_IS_ALIVE(v->state))
+ return 0;
+ if (v->connecting) /* Writing while we are still wait for a non-blocking connect() to complete will
+ * result in ENOTCONN, hence exit early here */
+ return 0;
+ if (v->output_buffer_size == 0)
+ return 0;
+ if (v->write_disconnected)
+ return 0;
+
+ assert(v->fd >= 0);
+
+ /* We generally prefer recv()/send() (mostly because of MSG_NOSIGNAL) but also want to be compatible
+ * with non-socket IO, hence fall back automatically */
+ if (!v->prefer_read_write) {
+ n = send(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size, MSG_DONTWAIT|MSG_NOSIGNAL);
+ if (n < 0 && errno == ENOTSOCK)
+ v->prefer_read_write = true;
+ }
+ if (v->prefer_read_write)
+ n = write(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size);
+ if (n < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ if (ERRNO_IS_DISCONNECT(errno)) {
+ /* If we get informed about a disconnect on write, then let's remember that, but not
+ * act on it just yet. Let's wait for read() to report the issue first. */
+ v->write_disconnected = true;
+ return 1;
+ }
+
+ return -errno;
+ }
+
+ v->output_buffer_size -= n;
+
+ if (v->output_buffer_size == 0)
+ v->output_buffer_index = 0;
+ else
+ v->output_buffer_index += n;
+
+ v->timestamp = now(CLOCK_MONOTONIC);
+ return 1;
+}
+
+static int varlink_read(Varlink *v) {
+ size_t rs;
+ ssize_t n;
+
+ assert(v);
+
+ if (!IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING, VARLINK_IDLE_SERVER))
+ return 0;
+ if (v->connecting) /* read() on a socket while we are in connect() will fail with EINVAL, hence exit early here */
+ return 0;
+ if (v->current)
+ return 0;
+ if (v->input_buffer_unscanned > 0)
+ return 0;
+ if (v->read_disconnected)
+ return 0;
+
+ if (v->input_buffer_size >= VARLINK_BUFFER_MAX)
+ return -ENOBUFS;
+
+ assert(v->fd >= 0);
+
+ if (v->input_buffer_allocated <= v->input_buffer_index + v->input_buffer_size) {
+ size_t add;
+
+ add = MIN(VARLINK_BUFFER_MAX - v->input_buffer_size, VARLINK_READ_SIZE);
+
+ if (v->input_buffer_index == 0) {
+
+ if (!GREEDY_REALLOC(v->input_buffer, v->input_buffer_allocated, v->input_buffer_size + add))
+ return -ENOMEM;
+
+ } else {
+ char *b;
+
+ b = new(char, v->input_buffer_size + add);
+ if (!b)
+ return -ENOMEM;
+
+ memcpy(b, v->input_buffer + v->input_buffer_index, v->input_buffer_size);
+
+ free_and_replace(v->input_buffer, b);
+
+ v->input_buffer_allocated = v->input_buffer_size + add;
+ v->input_buffer_index = 0;
+ }
+ }
+
+ rs = v->input_buffer_allocated - (v->input_buffer_index + v->input_buffer_size);
+
+ if (!v->prefer_read_write) {
+ n = recv(v->fd, v->input_buffer + v->input_buffer_index + v->input_buffer_size, rs, MSG_DONTWAIT);
+ if (n < 0 && errno == ENOTSOCK)
+ v->prefer_read_write = true;
+ }
+ if (v->prefer_read_write)
+ n = read(v->fd, v->input_buffer + v->input_buffer_index + v->input_buffer_size, rs);
+ if (n < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ if (ERRNO_IS_DISCONNECT(errno)) {
+ v->read_disconnected = true;
+ return 1;
+ }
+
+ return -errno;
+ }
+ if (n == 0) { /* EOF */
+ v->read_disconnected = true;
+ return 1;
+ }
+
+ v->input_buffer_size += n;
+ v->input_buffer_unscanned += n;
+
+ return 1;
+}
+
+static int varlink_parse_message(Varlink *v) {
+ const char *e, *begin;
+ size_t sz;
+ int r;
+
+ assert(v);
+
+ if (v->current)
+ return 0;
+ if (v->input_buffer_unscanned <= 0)
+ return 0;
+
+ assert(v->input_buffer_unscanned <= v->input_buffer_size);
+ assert(v->input_buffer_index + v->input_buffer_size <= v->input_buffer_allocated);
+
+ begin = v->input_buffer + v->input_buffer_index;
+
+ e = memchr(begin + v->input_buffer_size - v->input_buffer_unscanned, 0, v->input_buffer_unscanned);
+ if (!e) {
+ v->input_buffer_unscanned = 0;
+ return 0;
+ }
+
+ sz = e - begin + 1;
+
+ varlink_log(v, "New incoming message: %s", begin); /* FIXME: should we output the whole message here before validation?
+ * This may produce a non-printable journal entry if the message
+ * is invalid. We may also expose privileged information. */
+
+ r = json_parse(begin, 0, &v->current, NULL, NULL);
+ if (r < 0) {
+ /* If we encounter a parse failure flush all data. We cannot possibly recover from this,
+ * hence drop all buffered data now. */
+ v->input_buffer_index = v->input_buffer_size = v->input_buffer_unscanned = 0;
+ return varlink_log_errno(v, r, "Failed to parse JSON: %m");
+ }
+
+ v->input_buffer_size -= sz;
+
+ if (v->input_buffer_size == 0)
+ v->input_buffer_index = 0;
+ else
+ v->input_buffer_index += sz;
+
+ v->input_buffer_unscanned = v->input_buffer_size;
+ return 1;
+}
+
+static int varlink_test_timeout(Varlink *v) {
+ assert(v);
+
+ if (!IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING))
+ return 0;
+ if (v->timeout == USEC_INFINITY)
+ return 0;
+
+ if (now(CLOCK_MONOTONIC) < usec_add(v->timestamp, v->timeout))
+ return 0;
+
+ varlink_set_state(v, VARLINK_PENDING_TIMEOUT);
+
+ return 1;
+}
+
+static int varlink_dispatch_local_error(Varlink *v, const char *error) {
+ int r;
+
+ assert(v);
+ assert(error);
+
+ if (!v->reply_callback)
+ return 0;
+
+ r = v->reply_callback(v, NULL, error, VARLINK_REPLY_ERROR|VARLINK_REPLY_LOCAL, v->userdata);
+ if (r < 0)
+ log_debug_errno(r, "Reply callback returned error, ignoring: %m");
+
+ return 1;
+}
+
+static int varlink_dispatch_timeout(Varlink *v) {
+ assert(v);
+
+ if (v->state != VARLINK_PENDING_TIMEOUT)
+ return 0;
+
+ varlink_set_state(v, VARLINK_PROCESSING_TIMEOUT);
+ varlink_dispatch_local_error(v, VARLINK_ERROR_TIMEOUT);
+ varlink_close(v);
+
+ return 1;
+}
+
+static int varlink_dispatch_disconnect(Varlink *v) {
+ assert(v);
+
+ if (v->state != VARLINK_PENDING_DISCONNECT)
+ return 0;
+
+ varlink_set_state(v, VARLINK_PROCESSING_DISCONNECT);
+ varlink_dispatch_local_error(v, VARLINK_ERROR_DISCONNECTED);
+ varlink_close(v);
+
+ return 1;
+}
+
+static int varlink_sanitize_parameters(JsonVariant **v) {
+ assert(v);
+
+ /* Varlink always wants a parameters list, hence make one if the caller doesn't want any */
+ if (!*v)
+ return json_variant_new_object(v, NULL, 0);
+ else if (!json_variant_is_object(*v))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int varlink_dispatch_reply(Varlink *v) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ VarlinkReplyFlags flags = 0;
+ const char *error = NULL;
+ JsonVariant *e;
+ const char *k;
+ int r;
+
+ assert(v);
+
+ if (!IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING))
+ return 0;
+ if (!v->current)
+ return 0;
+
+ assert(v->n_pending > 0);
+
+ if (!json_variant_is_object(v->current))
+ goto invalid;
+
+ JSON_VARIANT_OBJECT_FOREACH(k, e, v->current) {
+
+ if (streq(k, "error")) {
+ if (error)
+ goto invalid;
+ if (!json_variant_is_string(e))
+ goto invalid;
+
+ error = json_variant_string(e);
+ flags |= VARLINK_REPLY_ERROR;
+
+ } else if (streq(k, "parameters")) {
+ if (parameters)
+ goto invalid;
+ if (!json_variant_is_object(e))
+ goto invalid;
+
+ parameters = json_variant_ref(e);
+
+ } else if (streq(k, "continues")) {
+ if (FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ goto invalid;
+
+ if (!json_variant_is_boolean(e))
+ goto invalid;
+
+ if (json_variant_boolean(e))
+ flags |= VARLINK_REPLY_CONTINUES;
+ } else
+ goto invalid;
+ }
+
+ /* Replies with 'continue' set are only OK if we set 'more' when the method call was initiated */
+ if (v->state != VARLINK_AWAITING_REPLY_MORE && FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ goto invalid;
+
+ /* An error is final */
+ if (error && FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ goto invalid;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ goto invalid;
+
+ if (IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE)) {
+ varlink_set_state(v, VARLINK_PROCESSING_REPLY);
+
+ if (v->reply_callback) {
+ r = v->reply_callback(v, parameters, error, flags, v->userdata);
+ if (r < 0)
+ log_debug_errno(r, "Reply callback returned error, ignoring: %m");
+ }
+
+ v->current = json_variant_unref(v->current);
+
+ if (v->state == VARLINK_PROCESSING_REPLY) {
+
+ assert(v->n_pending > 0);
+
+ if (!FLAGS_SET(flags, VARLINK_REPLY_CONTINUES))
+ v->n_pending--;
+
+ varlink_set_state(v,
+ FLAGS_SET(flags, VARLINK_REPLY_CONTINUES) ? VARLINK_AWAITING_REPLY_MORE :
+ v->n_pending == 0 ? VARLINK_IDLE_CLIENT : VARLINK_AWAITING_REPLY);
+ }
+ } else {
+ assert(v->state == VARLINK_CALLING);
+ varlink_set_state(v, VARLINK_CALLED);
+ }
+
+ return 1;
+
+invalid:
+ varlink_set_state(v, VARLINK_PROCESSING_FAILURE);
+ varlink_dispatch_local_error(v, VARLINK_ERROR_PROTOCOL);
+ varlink_close(v);
+
+ return 1;
+}
+
+static int varlink_dispatch_method(Varlink *v) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ VarlinkMethodFlags flags = 0;
+ const char *method = NULL, *error;
+ JsonVariant *e;
+ VarlinkMethod callback;
+ const char *k;
+ int r;
+
+ assert(v);
+
+ if (v->state != VARLINK_IDLE_SERVER)
+ return 0;
+ if (!v->current)
+ return 0;
+
+ if (!json_variant_is_object(v->current))
+ goto invalid;
+
+ JSON_VARIANT_OBJECT_FOREACH(k, e, v->current) {
+
+ if (streq(k, "method")) {
+ if (method)
+ goto invalid;
+ if (!json_variant_is_string(e))
+ goto invalid;
+
+ method = json_variant_string(e);
+
+ } else if (streq(k, "parameters")) {
+ if (parameters)
+ goto invalid;
+ if (!json_variant_is_object(e))
+ goto invalid;
+
+ parameters = json_variant_ref(e);
+
+ } else if (streq(k, "oneway")) {
+
+ if ((flags & (VARLINK_METHOD_ONEWAY|VARLINK_METHOD_MORE)) != 0)
+ goto invalid;
+
+ if (!json_variant_is_boolean(e))
+ goto invalid;
+
+ if (json_variant_boolean(e))
+ flags |= VARLINK_METHOD_ONEWAY;
+
+ } else if (streq(k, "more")) {
+
+ if ((flags & (VARLINK_METHOD_ONEWAY|VARLINK_METHOD_MORE)) != 0)
+ goto invalid;
+
+ if (!json_variant_is_boolean(e))
+ goto invalid;
+
+ if (json_variant_boolean(e))
+ flags |= VARLINK_METHOD_MORE;
+
+ } else
+ goto invalid;
+ }
+
+ if (!method)
+ goto invalid;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ goto fail;
+
+ varlink_set_state(v, (flags & VARLINK_METHOD_MORE) ? VARLINK_PROCESSING_METHOD_MORE :
+ (flags & VARLINK_METHOD_ONEWAY) ? VARLINK_PROCESSING_METHOD_ONEWAY :
+ VARLINK_PROCESSING_METHOD);
+
+ assert(v->server);
+
+ if (STR_IN_SET(method, "org.varlink.service.GetInfo", "org.varlink.service.GetInterface")) {
+ /* For now, we don't implement a single of varlink's own methods */
+ callback = NULL;
+ error = VARLINK_ERROR_METHOD_NOT_IMPLEMENTED;
+ } else if (startswith(method, "org.varlink.service.")) {
+ callback = NULL;
+ error = VARLINK_ERROR_METHOD_NOT_FOUND;
+ } else {
+ callback = hashmap_get(v->server->methods, method);
+ error = VARLINK_ERROR_METHOD_NOT_FOUND;
+ }
+
+ if (callback) {
+ r = callback(v, parameters, flags, v->userdata);
+ if (r < 0) {
+ log_debug_errno(r, "Callback for %s returned error: %m", method);
+
+ /* We got an error back from the callback. Propagate it to the client if the method call remains unanswered. */
+ if (!FLAGS_SET(flags, VARLINK_METHOD_ONEWAY)) {
+ r = varlink_error_errno(v, r);
+ if (r < 0)
+ return r;
+ }
+ }
+ } else if (!FLAGS_SET(flags, VARLINK_METHOD_ONEWAY)) {
+ assert(error);
+
+ r = varlink_errorb(v, error, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method))));
+ if (r < 0)
+ return r;
+ }
+
+ switch (v->state) {
+
+ case VARLINK_PROCESSED_METHOD: /* Method call is fully processed */
+ case VARLINK_PROCESSING_METHOD_ONEWAY: /* ditto */
+ v->current = json_variant_unref(v->current);
+ varlink_set_state(v, VARLINK_IDLE_SERVER);
+ break;
+
+ case VARLINK_PROCESSING_METHOD: /* Method call wasn't replied to, will be replied to later */
+ varlink_set_state(v, VARLINK_PENDING_METHOD);
+ break;
+
+ case VARLINK_PROCESSING_METHOD_MORE: /* No reply for a "more" message was sent, more to come */
+ varlink_set_state(v, VARLINK_PENDING_METHOD_MORE);
+ break;
+
+ default:
+ assert_not_reached("Unexpected state");
+
+ }
+
+ return r;
+
+invalid:
+ r = -EINVAL;
+
+fail:
+ varlink_set_state(v, VARLINK_PROCESSING_FAILURE);
+ varlink_dispatch_local_error(v, VARLINK_ERROR_PROTOCOL);
+ varlink_close(v);
+
+ return r;
+}
+
+int varlink_process(Varlink *v) {
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ varlink_ref(v);
+
+ r = varlink_write(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_dispatch_reply(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_dispatch_method(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_parse_message(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_read(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_test_disconnect(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_dispatch_disconnect(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_test_timeout(v);
+ if (r != 0)
+ goto finish;
+
+ r = varlink_dispatch_timeout(v);
+ if (r != 0)
+ goto finish;
+
+finish:
+ if (r >= 0 && v->defer_event_source) {
+ int q;
+
+ /* If we did some processing, make sure we are called again soon */
+ q = sd_event_source_set_enabled(v->defer_event_source, r > 0 ? SD_EVENT_ON : SD_EVENT_OFF);
+ if (q < 0)
+ r = q;
+ }
+
+ if (r < 0) {
+ if (VARLINK_STATE_IS_ALIVE(v->state))
+ /* Initiate disconnection */
+ varlink_set_state(v, VARLINK_PENDING_DISCONNECT);
+ else
+ /* We failed while disconnecting, in that case close right away */
+ varlink_close(v);
+ }
+
+ varlink_unref(v);
+ return r;
+}
+
+static void handle_revents(Varlink *v, int revents) {
+ assert(v);
+
+ if (v->connecting) {
+ /* If we have seen POLLOUT or POLLHUP on a socket we are asynchronously waiting a connect()
+ * to complete on, we know we are ready. We don't read the connection error here though,
+ * we'll get the error on the next read() or write(). */
+ if ((revents & (POLLOUT|POLLHUP)) == 0)
+ return;
+
+ varlink_log(v, "Anynchronous connection completed.");
+ v->connecting = false;
+ } else {
+ /* Note that we don't care much about POLLIN/POLLOUT here, we'll just try reading and writing
+ * what we can. However, we do care about POLLHUP to detect connection termination even if we
+ * momentarily don't want to read nor write anything. */
+
+ if (!FLAGS_SET(revents, POLLHUP))
+ return;
+
+ varlink_log(v, "Got POLLHUP from socket.");
+ v->got_pollhup = true;
+ }
+}
+
+int varlink_wait(Varlink *v, usec_t timeout) {
+ int r, fd, events;
+ usec_t t;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ r = varlink_get_timeout(v, &t);
+ if (r < 0)
+ return r;
+ if (t != USEC_INFINITY) {
+ usec_t n;
+
+ n = now(CLOCK_MONOTONIC);
+ if (t < n)
+ t = 0;
+ else
+ t = usec_sub_unsigned(t, n);
+ }
+
+ if (timeout != USEC_INFINITY &&
+ (t == USEC_INFINITY || timeout < t))
+ t = timeout;
+
+ fd = varlink_get_fd(v);
+ if (fd < 0)
+ return fd;
+
+ events = varlink_get_events(v);
+ if (events < 0)
+ return events;
+
+ r = fd_wait_for_event(fd, events, t);
+ if (r <= 0)
+ return r;
+
+ handle_revents(v, r);
+ return 1;
+}
+
+int varlink_get_fd(Varlink *v) {
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ if (v->fd < 0)
+ return -EBADF;
+
+ return v->fd;
+}
+
+int varlink_get_events(Varlink *v) {
+ int ret = 0;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ if (v->connecting) /* When processing an asynchronous connect(), we only wait for EPOLLOUT, which
+ * tells us that the connection is now complete. Before that we should neither
+ * write() or read() from the fd. */
+ return EPOLLOUT;
+
+ if (!v->read_disconnected &&
+ IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING, VARLINK_IDLE_SERVER) &&
+ !v->current &&
+ v->input_buffer_unscanned <= 0)
+ ret |= EPOLLIN;
+
+ if (!v->write_disconnected &&
+ v->output_buffer_size > 0)
+ ret |= EPOLLOUT;
+
+ return ret;
+}
+
+int varlink_get_timeout(Varlink *v, usec_t *ret) {
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ if (IN_SET(v->state, VARLINK_AWAITING_REPLY, VARLINK_AWAITING_REPLY_MORE, VARLINK_CALLING) &&
+ v->timeout != USEC_INFINITY) {
+ if (ret)
+ *ret = usec_add(v->timestamp, v->timeout);
+ return 1;
+ } else {
+ if (ret)
+ *ret = USEC_INFINITY;
+ return 0;
+ }
+}
+
+int varlink_flush(Varlink *v) {
+ int ret = 0, r;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ for (;;) {
+ if (v->output_buffer_size == 0)
+ break;
+ if (v->write_disconnected)
+ return -ECONNRESET;
+
+ r = varlink_write(v);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ ret = 1;
+ continue;
+ }
+
+ r = fd_wait_for_event(v->fd, POLLOUT, USEC_INFINITY);
+ if (r < 0)
+ return r;
+
+ assert(r != 0);
+
+ handle_revents(v, r);
+ }
+
+ return ret;
+}
+
+static void varlink_detach_server(Varlink *v) {
+ VarlinkServer *saved_server;
+ assert(v);
+
+ if (!v->server)
+ return;
+
+ if (v->server->by_uid &&
+ v->ucred_acquired &&
+ uid_is_valid(v->ucred.uid)) {
+ unsigned c;
+
+ c = PTR_TO_UINT(hashmap_get(v->server->by_uid, UID_TO_PTR(v->ucred.uid)));
+ assert(c > 0);
+
+ if (c == 1)
+ (void) hashmap_remove(v->server->by_uid, UID_TO_PTR(v->ucred.uid));
+ else
+ (void) hashmap_replace(v->server->by_uid, UID_TO_PTR(v->ucred.uid), UINT_TO_PTR(c - 1));
+ }
+
+ assert(v->server->n_connections > 0);
+ v->server->n_connections--;
+
+ /* If this is a connection associated to a server, then let's disconnect the server and the
+ * connection from each other. This drops the dangling reference that connect_callback() set up. But
+ * before we release the references, let's call the disconnection callback if it is defined. */
+
+ saved_server = TAKE_PTR(v->server);
+
+ if (saved_server->disconnect_callback)
+ saved_server->disconnect_callback(saved_server, v, saved_server->userdata);
+
+ varlink_server_unref(saved_server);
+ varlink_unref(v);
+}
+
+int varlink_close(Varlink *v) {
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return 0;
+
+ varlink_set_state(v, VARLINK_DISCONNECTED);
+
+ /* Let's take a reference first, since varlink_detach_server() might drop the final (dangling) ref
+ * which would destroy us before we can call varlink_clear() */
+ varlink_ref(v);
+ varlink_detach_server(v);
+ varlink_clear(v);
+ varlink_unref(v);
+
+ return 1;
+}
+
+Varlink* varlink_close_unref(Varlink *v) {
+
+ if (!v)
+ return NULL;
+
+ (void) varlink_close(v);
+ return varlink_unref(v);
+}
+
+Varlink* varlink_flush_close_unref(Varlink *v) {
+
+ if (!v)
+ return NULL;
+
+ (void) varlink_flush(v);
+ (void) varlink_close(v);
+ return varlink_unref(v);
+}
+
+static int varlink_enqueue_json(Varlink *v, JsonVariant *m) {
+ _cleanup_free_ char *text = NULL;
+ int r;
+
+ assert(v);
+ assert(m);
+
+ r = json_variant_format(m, 0, &text);
+ if (r < 0)
+ return r;
+ assert(text[r] == '\0');
+
+ if (v->output_buffer_size + r + 1 > VARLINK_BUFFER_MAX)
+ return -ENOBUFS;
+
+ varlink_log(v, "Sending message: %s", text);
+
+ if (v->output_buffer_size == 0) {
+
+ free_and_replace(v->output_buffer, text);
+
+ v->output_buffer_size = v->output_buffer_allocated = r + 1;
+ v->output_buffer_index = 0;
+
+ } else if (v->output_buffer_index == 0) {
+
+ if (!GREEDY_REALLOC(v->output_buffer, v->output_buffer_allocated, v->output_buffer_size + r + 1))
+ return -ENOMEM;
+
+ memcpy(v->output_buffer + v->output_buffer_size, text, r + 1);
+ v->output_buffer_size += r + 1;
+
+ } else {
+ char *n;
+ const size_t new_size = v->output_buffer_size + r + 1;
+
+ n = new(char, new_size);
+ if (!n)
+ return -ENOMEM;
+
+ memcpy(mempcpy(n, v->output_buffer + v->output_buffer_index, v->output_buffer_size), text, r + 1);
+
+ free_and_replace(v->output_buffer, n);
+ v->output_buffer_allocated = v->output_buffer_size = new_size;
+ v->output_buffer_index = 0;
+ }
+
+ return 0;
+}
+
+int varlink_send(Varlink *v, const char *method, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(method, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ /* We allow enqueuing multiple method calls at once! */
+ if (!IN_SET(v->state, VARLINK_IDLE_CLIENT, VARLINK_AWAITING_REPLY))
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)),
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)),
+ JSON_BUILD_PAIR("oneway", JSON_BUILD_BOOLEAN(true))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ /* No state change here, this is one-way only after all */
+ v->timestamp = now(CLOCK_MONOTONIC);
+ return 0;
+}
+
+int varlink_sendb(Varlink *v, const char *method, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, method);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_send(v, method, parameters);
+}
+
+int varlink_invoke(Varlink *v, const char *method, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(method, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+
+ /* We allow enqueuing multiple method calls at once! */
+ if (!IN_SET(v->state, VARLINK_IDLE_CLIENT, VARLINK_AWAITING_REPLY))
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)),
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ varlink_set_state(v, VARLINK_AWAITING_REPLY);
+ v->n_pending++;
+ v->timestamp = now(CLOCK_MONOTONIC);
+
+ return 0;
+}
+
+int varlink_invokeb(Varlink *v, const char *method, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, method);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_invoke(v, method, parameters);
+}
+
+int varlink_observe(Varlink *v, const char *method, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(method, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ /* Note that we don't allow enqueuing multiple method calls when we are in more/continues mode! We
+ * thus insist on an idle client here. */
+ if (v->state != VARLINK_IDLE_CLIENT)
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)),
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)),
+ JSON_BUILD_PAIR("more", JSON_BUILD_BOOLEAN(true))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+
+ varlink_set_state(v, VARLINK_AWAITING_REPLY_MORE);
+ v->n_pending++;
+ v->timestamp = now(CLOCK_MONOTONIC);
+
+ return 0;
+}
+
+int varlink_observeb(Varlink *v, const char *method, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, method);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_observe(v, method, parameters);
+}
+
+int varlink_call(
+ Varlink *v,
+ const char *method,
+ JsonVariant *parameters,
+ JsonVariant **ret_parameters,
+ const char **ret_error_id,
+ VarlinkReplyFlags *ret_flags) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(method, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ if (!IN_SET(v->state, VARLINK_IDLE_CLIENT))
+ return -EBUSY;
+
+ assert(v->n_pending == 0); /* n_pending can't be > 0 if we are in VARLINK_IDLE_CLIENT state */
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("method", JSON_BUILD_STRING(method)),
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ varlink_set_state(v, VARLINK_CALLING);
+ v->n_pending++;
+ v->timestamp = now(CLOCK_MONOTONIC);
+
+ while (v->state == VARLINK_CALLING) {
+
+ r = varlink_process(v);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ r = varlink_wait(v, USEC_INFINITY);
+ if (r < 0)
+ return r;
+ }
+
+ switch (v->state) {
+
+ case VARLINK_CALLED:
+ assert(v->current);
+
+ json_variant_unref(v->reply);
+ v->reply = TAKE_PTR(v->current);
+
+ varlink_set_state(v, VARLINK_IDLE_CLIENT);
+ assert(v->n_pending == 1);
+ v->n_pending--;
+
+ if (ret_parameters)
+ *ret_parameters = json_variant_by_key(v->reply, "parameters");
+ if (ret_error_id)
+ *ret_error_id = json_variant_string(json_variant_by_key(v->reply, "error"));
+ if (ret_flags)
+ *ret_flags = 0;
+
+ return 1;
+
+ case VARLINK_PENDING_DISCONNECT:
+ case VARLINK_DISCONNECTED:
+ return -ECONNRESET;
+
+ case VARLINK_PENDING_TIMEOUT:
+ return -ETIME;
+
+ default:
+ assert_not_reached("Unexpected state after method call.");
+ }
+}
+
+int varlink_callb(
+ Varlink *v,
+ const char *method,
+ JsonVariant **ret_parameters,
+ const char **ret_error_id,
+ VarlinkReplyFlags *ret_flags, ...) {
+
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, ret_flags);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_call(v, method, parameters, ret_parameters, ret_error_id, ret_flags);
+}
+
+int varlink_reply(Varlink *v, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ if (!IN_SET(v->state,
+ VARLINK_PROCESSING_METHOD, VARLINK_PROCESSING_METHOD_MORE,
+ VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE))
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) {
+ /* We just replied to a method call that was let hanging for a while (i.e. we were outside of
+ * the varlink_dispatch_method() stack frame), which means with this reply we are ready to
+ * process further messages. */
+ v->current = json_variant_unref(v->current);
+ varlink_set_state(v, VARLINK_IDLE_SERVER);
+ } else
+ /* We replied to a method call from within the varlink_dispatch_method() stack frame), which
+ * means we should it handle the rest of the state engine. */
+ varlink_set_state(v, VARLINK_PROCESSED_METHOD);
+
+ return 1;
+}
+
+int varlink_replyb(Varlink *v, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, v);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_reply(v, parameters);
+}
+
+int varlink_error(Varlink *v, const char *error_id, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(error_id, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ if (!IN_SET(v->state,
+ VARLINK_PROCESSING_METHOD, VARLINK_PROCESSING_METHOD_MORE,
+ VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE))
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("error", JSON_BUILD_STRING(error_id)),
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ if (IN_SET(v->state, VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) {
+ v->current = json_variant_unref(v->current);
+ varlink_set_state(v, VARLINK_IDLE_SERVER);
+ } else
+ varlink_set_state(v, VARLINK_PROCESSED_METHOD);
+
+ return 1;
+}
+
+int varlink_errorb(Varlink *v, const char *error_id, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(error_id, -EINVAL);
+
+ va_start(ap, error_id);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_error(v, error_id, parameters);
+}
+
+int varlink_error_invalid_parameter(Varlink *v, JsonVariant *parameters) {
+
+ assert_return(v, -EINVAL);
+ assert_return(parameters, -EINVAL);
+
+ /* We expect to be called in one of two ways: the 'parameters' argument is a string variant in which
+ * case it is the parameter key name that is invalid. Or the 'parameters' argument is an object
+ * variant in which case we'll pull out the first key. The latter mode is useful in functions that
+ * don't expect any arguments. */
+
+ if (json_variant_is_string(parameters))
+ return varlink_error(v, VARLINK_ERROR_INVALID_PARAMETER, parameters);
+
+ if (json_variant_is_object(parameters) &&
+ json_variant_elements(parameters) > 0)
+ return varlink_error(v, VARLINK_ERROR_INVALID_PARAMETER,
+ json_variant_by_index(parameters, 0));
+
+ return -EINVAL;
+}
+
+int varlink_error_errno(Varlink *v, int error) {
+ return varlink_errorb(
+ v,
+ VARLINK_ERROR_SYSTEM,
+ JSON_BUILD_OBJECT(JSON_BUILD_PAIR("errno", JSON_BUILD_INTEGER(abs(error)))));
+}
+
+int varlink_notify(Varlink *v, JsonVariant *parameters) {
+ _cleanup_(json_variant_unrefp) JsonVariant *m = NULL;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ if (v->state == VARLINK_DISCONNECTED)
+ return -ENOTCONN;
+ if (!IN_SET(v->state, VARLINK_PROCESSING_METHOD_MORE, VARLINK_PENDING_METHOD_MORE))
+ return -EBUSY;
+
+ r = varlink_sanitize_parameters(&parameters);
+ if (r < 0)
+ return r;
+
+ r = json_build(&m, JSON_BUILD_OBJECT(
+ JSON_BUILD_PAIR("parameters", JSON_BUILD_VARIANT(parameters)),
+ JSON_BUILD_PAIR("continues", JSON_BUILD_BOOLEAN(true))));
+ if (r < 0)
+ return r;
+
+ r = varlink_enqueue_json(v, m);
+ if (r < 0)
+ return r;
+
+ /* No state change, as more is coming */
+ return 1;
+}
+
+int varlink_notifyb(Varlink *v, ...) {
+ _cleanup_(json_variant_unrefp) JsonVariant *parameters = NULL;
+ va_list ap;
+ int r;
+
+ assert_return(v, -EINVAL);
+
+ va_start(ap, v);
+ r = json_buildv(&parameters, ap);
+ va_end(ap);
+
+ if (r < 0)
+ return r;
+
+ return varlink_notify(v, parameters);
+}
+
+int varlink_bind_reply(Varlink *v, VarlinkReply callback) {
+ assert_return(v, -EINVAL);
+
+ if (callback && v->reply_callback && callback != v->reply_callback)
+ return -EBUSY;
+
+ v->reply_callback = callback;
+
+ return 0;
+}
+
+void* varlink_set_userdata(Varlink *v, void *userdata) {
+ void *old;
+
+ assert_return(v, NULL);
+
+ old = v->userdata;
+ v->userdata = userdata;
+
+ return old;
+}
+
+void* varlink_get_userdata(Varlink *v) {
+ assert_return(v, NULL);
+
+ return v->userdata;
+}
+
+static int varlink_acquire_ucred(Varlink *v) {
+ int r;
+
+ assert(v);
+
+ if (v->ucred_acquired)
+ return 0;
+
+ r = getpeercred(v->fd, &v->ucred);
+ if (r < 0)
+ return r;
+
+ v->ucred_acquired = true;
+ return 0;
+}
+
+int varlink_get_peer_uid(Varlink *v, uid_t *ret) {
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = varlink_acquire_ucred(v);
+ if (r < 0)
+ return r;
+
+ if (!uid_is_valid(v->ucred.uid))
+ return -ENODATA;
+
+ *ret = v->ucred.uid;
+ return 0;
+}
+
+int varlink_get_peer_pid(Varlink *v, pid_t *ret) {
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(ret, -EINVAL);
+
+ r = varlink_acquire_ucred(v);
+ if (r < 0)
+ return r;
+
+ if (!pid_is_valid(v->ucred.pid))
+ return -ENODATA;
+
+ *ret = v->ucred.pid;
+ return 0;
+}
+
+int varlink_set_relative_timeout(Varlink *v, usec_t timeout) {
+ assert_return(v, -EINVAL);
+ assert_return(timeout > 0, -EINVAL);
+
+ v->timeout = timeout;
+ return 0;
+}
+
+VarlinkServer *varlink_get_server(Varlink *v) {
+ assert_return(v, NULL);
+
+ return v->server;
+}
+
+int varlink_set_description(Varlink *v, const char *description) {
+ assert_return(v, -EINVAL);
+
+ return free_and_strdup(&v->description, description);
+}
+
+static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
+ Varlink *v = userdata;
+
+ assert(s);
+ assert(v);
+
+ handle_revents(v, revents);
+ (void) varlink_process(v);
+
+ return 1;
+}
+
+static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+ Varlink *v = userdata;
+
+ assert(s);
+ assert(v);
+
+ (void) varlink_process(v);
+ return 1;
+}
+
+static int defer_callback(sd_event_source *s, void *userdata) {
+ Varlink *v = userdata;
+
+ assert(s);
+ assert(v);
+
+ (void) varlink_process(v);
+ return 1;
+}
+
+static int prepare_callback(sd_event_source *s, void *userdata) {
+ Varlink *v = userdata;
+ int r, e;
+ usec_t until;
+ bool have_timeout;
+
+ assert(s);
+ assert(v);
+
+ e = varlink_get_events(v);
+ if (e < 0)
+ return e;
+
+ r = sd_event_source_set_io_events(v->io_event_source, e);
+ if (r < 0)
+ return r;
+
+ r = varlink_get_timeout(v, &until);
+ if (r < 0)
+ return r;
+ have_timeout = r > 0;
+
+ if (have_timeout) {
+ r = sd_event_source_set_time(v->time_event_source, until);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_source_set_enabled(v->time_event_source, have_timeout ? SD_EVENT_ON : SD_EVENT_OFF);
+ if (r < 0)
+ return r;
+
+ return 1;
+}
+
+static int quit_callback(sd_event_source *event, void *userdata) {
+ Varlink *v = userdata;
+
+ assert(event);
+ assert(v);
+
+ varlink_flush(v);
+ varlink_close(v);
+
+ return 1;
+}
+
+int varlink_attach_event(Varlink *v, sd_event *e, int64_t priority) {
+ int r;
+
+ assert_return(v, -EINVAL);
+ assert_return(!v->event, -EBUSY);
+
+ if (e)
+ v->event = sd_event_ref(e);
+ else {
+ r = sd_event_default(&v->event);
+ if (r < 0)
+ return r;
+ }
+
+ r = sd_event_add_time(v->event, &v->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, v);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(v->time_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(v->time_event_source, "varlink-time");
+
+ r = sd_event_add_exit(v->event, &v->quit_event_source, quit_callback, v);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(v->quit_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(v->quit_event_source, "varlink-quit");
+
+ r = sd_event_add_io(v->event, &v->io_event_source, v->fd, 0, io_callback, v);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_prepare(v->io_event_source, prepare_callback);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(v->io_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(v->io_event_source, "varlink-io");
+
+ r = sd_event_add_defer(v->event, &v->defer_event_source, defer_callback, v);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(v->defer_event_source, priority);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(v->defer_event_source, "varlink-defer");
+
+ return 0;
+
+fail:
+ varlink_detach_event(v);
+ return r;
+}
+
+void varlink_detach_event(Varlink *v) {
+ if (!v)
+ return;
+
+ varlink_detach_event_sources(v);
+
+ v->event = sd_event_unref(v->event);
+}
+
+sd_event *varlink_get_event(Varlink *v) {
+ assert_return(v, NULL);
+
+ return v->event;
+}
+
+int varlink_server_new(VarlinkServer **ret, VarlinkServerFlags flags) {
+ VarlinkServer *s;
+
+ assert_return(ret, -EINVAL);
+ assert_return((flags & ~_VARLINK_SERVER_FLAGS_ALL) == 0, -EINVAL);
+
+ s = new(VarlinkServer, 1);
+ if (!s)
+ return -ENOMEM;
+
+ *s = (VarlinkServer) {
+ .n_ref = 1,
+ .flags = flags,
+ .connections_max = varlink_server_connections_max(NULL),
+ .connections_per_uid_max = varlink_server_connections_per_uid_max(NULL),
+ };
+
+ *ret = s;
+ return 0;
+}
+
+static VarlinkServer* varlink_server_destroy(VarlinkServer *s) {
+ char *m;
+
+ if (!s)
+ return NULL;
+
+ varlink_server_shutdown(s);
+
+ while ((m = hashmap_steal_first_key(s->methods)))
+ free(m);
+
+ hashmap_free(s->methods);
+ hashmap_free(s->by_uid);
+
+ sd_event_unref(s->event);
+
+ free(s->description);
+
+ return mfree(s);
+}
+
+DEFINE_TRIVIAL_REF_UNREF_FUNC(VarlinkServer, varlink_server, varlink_server_destroy);
+
+static int validate_connection(VarlinkServer *server, const struct ucred *ucred) {
+ int allowed = -1;
+
+ assert(server);
+ assert(ucred);
+
+ if (FLAGS_SET(server->flags, VARLINK_SERVER_ROOT_ONLY))
+ allowed = ucred->uid == 0;
+
+ if (FLAGS_SET(server->flags, VARLINK_SERVER_MYSELF_ONLY))
+ allowed = allowed > 0 || ucred->uid == getuid();
+
+ if (allowed == 0) { /* Allow access when it is explicitly allowed or when neither
+ * VARLINK_SERVER_ROOT_ONLY nor VARLINK_SERVER_MYSELF_ONLY are specified. */
+ varlink_server_log(server, "Unprivileged client attempted connection, refusing.");
+ return 0;
+ }
+
+ if (server->n_connections >= server->connections_max) {
+ varlink_server_log(server, "Connection limit of %u reached, refusing.", server->connections_max);
+ return 0;
+ }
+
+ if (FLAGS_SET(server->flags, VARLINK_SERVER_ACCOUNT_UID)) {
+ unsigned c;
+
+ if (!uid_is_valid(ucred->uid)) {
+ varlink_server_log(server, "Client with invalid UID attempted connection, refusing.");
+ return 0;
+ }
+
+ c = PTR_TO_UINT(hashmap_get(server->by_uid, UID_TO_PTR(ucred->uid)));
+ if (c >= server->connections_per_uid_max) {
+ varlink_server_log(server, "Per-UID connection limit of %u reached, refusing.",
+ server->connections_per_uid_max);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static int count_connection(VarlinkServer *server, struct ucred *ucred) {
+ unsigned c;
+ int r;
+
+ assert(server);
+ assert(ucred);
+
+ server->n_connections++;
+
+ if (FLAGS_SET(server->flags, VARLINK_SERVER_ACCOUNT_UID)) {
+ r = hashmap_ensure_allocated(&server->by_uid, NULL);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to allocate UID hash table: %m");
+
+ c = PTR_TO_UINT(hashmap_get(server->by_uid, UID_TO_PTR(ucred->uid)));
+
+ varlink_server_log(server, "Connections of user " UID_FMT ": %u (of %u max)",
+ ucred->uid, c, server->connections_per_uid_max);
+
+ r = hashmap_replace(server->by_uid, UID_TO_PTR(ucred->uid), UINT_TO_PTR(c + 1));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to increment counter in UID hash table: %m");
+ }
+
+ return 0;
+}
+
+int varlink_server_add_connection(VarlinkServer *server, int fd, Varlink **ret) {
+ _cleanup_(varlink_unrefp) Varlink *v = NULL;
+ bool ucred_acquired;
+ struct ucred ucred;
+ int r;
+
+ assert_return(server, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+
+ if ((server->flags & (VARLINK_SERVER_ROOT_ONLY|VARLINK_SERVER_ACCOUNT_UID)) != 0) {
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return varlink_server_log_errno(server, r, "Failed to acquire peer credentials of incoming socket, refusing: %m");
+
+ ucred_acquired = true;
+
+ r = validate_connection(server, &ucred);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EPERM;
+ } else
+ ucred_acquired = false;
+
+ r = varlink_new(&v);
+ if (r < 0)
+ return varlink_server_log_errno(server, r, "Failed to allocate connection object: %m");
+
+ r = count_connection(server, &ucred);
+ if (r < 0)
+ return r;
+
+ v->fd = fd;
+ v->userdata = server->userdata;
+ if (ucred_acquired) {
+ v->ucred = ucred;
+ v->ucred_acquired = true;
+ }
+
+ (void) asprintf(&v->description, "%s-%i", server->description ?: "varlink", v->fd);
+
+ /* Link up the server and the connection, and take reference in both directions. Note that the
+ * reference on the connection is left dangling. It will be dropped when the connection is closed,
+ * which happens in varlink_close(), including in the event loop quit callback. */
+ v->server = varlink_server_ref(server);
+ varlink_ref(v);
+
+ varlink_set_state(v, VARLINK_IDLE_SERVER);
+
+ if (server->event) {
+ r = varlink_attach_event(v, server->event, server->event_priority);
+ if (r < 0) {
+ varlink_log_errno(v, r, "Failed to attach new connection: %m");
+ v->fd = -1; /* take the fd out of the connection again */
+ varlink_close(v);
+ return r;
+ }
+ }
+
+ if (ret)
+ *ret = v;
+
+ return 0;
+}
+
+static int connect_callback(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ VarlinkServerSocket *ss = userdata;
+ _cleanup_close_ int cfd = -1;
+ Varlink *v = NULL;
+ int r;
+
+ assert(source);
+ assert(ss);
+
+ varlink_server_log(ss->server, "New incoming connection.");
+
+ cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC);
+ if (cfd < 0) {
+ if (ERRNO_IS_ACCEPT_AGAIN(errno))
+ return 0;
+
+ return varlink_server_log_errno(ss->server, errno, "Failed to accept incoming socket: %m");
+ }
+
+ r = varlink_server_add_connection(ss->server, cfd, &v);
+ if (r < 0)
+ return 0;
+
+ TAKE_FD(cfd);
+
+ if (ss->server->connect_callback) {
+ r = ss->server->connect_callback(ss->server, v, ss->server->userdata);
+ if (r < 0) {
+ varlink_log_errno(v, r, "Connection callback returned error, disconnecting client: %m");
+ varlink_close(v);
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+int varlink_server_listen_fd(VarlinkServer *s, int fd) {
+ _cleanup_free_ VarlinkServerSocket *ss = NULL;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(fd >= 0, -EBADF);
+
+ r = fd_nonblock(fd, true);
+ if (r < 0)
+ return r;
+
+ ss = new(VarlinkServerSocket, 1);
+ if (!ss)
+ return -ENOMEM;
+
+ *ss = (VarlinkServerSocket) {
+ .server = s,
+ .fd = fd,
+ };
+
+ if (s->event) {
+ r = sd_event_add_io(s->event, &ss->event_source, fd, EPOLLIN, connect_callback, ss);
+ if (r < 0)
+ return r;
+
+ r = sd_event_source_set_priority(ss->event_source, s->event_priority);
+ if (r < 0)
+ return r;
+ }
+
+ LIST_PREPEND(sockets, s->sockets, TAKE_PTR(ss));
+ return 0;
+}
+
+int varlink_server_listen_address(VarlinkServer *s, const char *address, mode_t m) {
+ union sockaddr_union sockaddr;
+ socklen_t sockaddr_len;
+ _cleanup_close_ int fd = -1;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(address, -EINVAL);
+ assert_return((m & ~0777) == 0, -EINVAL);
+
+ r = sockaddr_un_set_path(&sockaddr.un, address);
+ if (r < 0)
+ return r;
+ sockaddr_len = r;
+
+ fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+
+ (void) sockaddr_un_unlink(&sockaddr.un);
+
+ RUN_WITH_UMASK(~m & 0777) {
+ r = mac_selinux_bind(fd, &sockaddr.sa, sockaddr_len);
+ if (r < 0)
+ return r;
+ }
+
+ if (listen(fd, SOMAXCONN) < 0)
+ return -errno;
+
+ r = varlink_server_listen_fd(s, fd);
+ if (r < 0)
+ return r;
+
+ TAKE_FD(fd);
+ return 0;
+}
+
+void* varlink_server_set_userdata(VarlinkServer *s, void *userdata) {
+ void *ret;
+
+ assert_return(s, NULL);
+
+ ret = s->userdata;
+ s->userdata = userdata;
+
+ return ret;
+}
+
+void* varlink_server_get_userdata(VarlinkServer *s) {
+ assert_return(s, NULL);
+
+ return s->userdata;
+}
+
+static VarlinkServerSocket* varlink_server_socket_destroy(VarlinkServerSocket *ss) {
+ if (!ss)
+ return NULL;
+
+ if (ss->server)
+ LIST_REMOVE(sockets, ss->server->sockets, ss);
+
+ sd_event_source_disable_unref(ss->event_source);
+
+ free(ss->address);
+ safe_close(ss->fd);
+
+ return mfree(ss);
+}
+
+int varlink_server_shutdown(VarlinkServer *s) {
+ assert_return(s, -EINVAL);
+
+ while (s->sockets)
+ varlink_server_socket_destroy(s->sockets);
+
+ return 0;
+}
+
+int varlink_server_attach_event(VarlinkServer *s, sd_event *e, int64_t priority) {
+ VarlinkServerSocket *ss;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(!s->event, -EBUSY);
+
+ if (e)
+ s->event = sd_event_ref(e);
+ else {
+ r = sd_event_default(&s->event);
+ if (r < 0)
+ return r;
+ }
+
+ LIST_FOREACH(sockets, ss, s->sockets) {
+ assert(!ss->event_source);
+
+ r = sd_event_add_io(s->event, &ss->event_source, ss->fd, EPOLLIN, connect_callback, ss);
+ if (r < 0)
+ goto fail;
+
+ r = sd_event_source_set_priority(ss->event_source, priority);
+ if (r < 0)
+ goto fail;
+ }
+
+ s->event_priority = priority;
+ return 0;
+
+fail:
+ varlink_server_detach_event(s);
+ return r;
+}
+
+int varlink_server_detach_event(VarlinkServer *s) {
+ VarlinkServerSocket *ss;
+
+ assert_return(s, -EINVAL);
+
+ LIST_FOREACH(sockets, ss, s->sockets) {
+
+ if (!ss->event_source)
+ continue;
+
+ (void) sd_event_source_set_enabled(ss->event_source, SD_EVENT_OFF);
+ ss->event_source = sd_event_source_unref(ss->event_source);
+ }
+
+ sd_event_unref(s->event);
+ return 0;
+}
+
+sd_event *varlink_server_get_event(VarlinkServer *s) {
+ assert_return(s, NULL);
+
+ return s->event;
+}
+
+int varlink_server_bind_method(VarlinkServer *s, const char *method, VarlinkMethod callback) {
+ char *m;
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(method, -EINVAL);
+ assert_return(callback, -EINVAL);
+
+ if (startswith(method, "org.varlink.service."))
+ return -EEXIST;
+
+ r = hashmap_ensure_allocated(&s->methods, &string_hash_ops);
+ if (r < 0)
+ return r;
+
+ m = strdup(method);
+ if (!m)
+ return -ENOMEM;
+
+ r = hashmap_put(s->methods, m, callback);
+ if (r < 0) {
+ free(m);
+ return r;
+ }
+
+ return 0;
+}
+
+int varlink_server_bind_method_many_internal(VarlinkServer *s, ...) {
+ va_list ap;
+ int r = 0;
+
+ assert_return(s, -EINVAL);
+
+ va_start(ap, s);
+ for (;;) {
+ VarlinkMethod callback;
+ const char *method;
+
+ method = va_arg(ap, const char *);
+ if (!method)
+ break;
+
+ callback = va_arg(ap, VarlinkMethod);
+
+ r = varlink_server_bind_method(s, method, callback);
+ if (r < 0)
+ break;
+ }
+ va_end(ap);
+
+ return r;
+}
+
+int varlink_server_bind_connect(VarlinkServer *s, VarlinkConnect callback) {
+ assert_return(s, -EINVAL);
+
+ if (callback && s->connect_callback && callback != s->connect_callback)
+ return -EBUSY;
+
+ s->connect_callback = callback;
+ return 0;
+}
+
+int varlink_server_bind_disconnect(VarlinkServer *s, VarlinkDisconnect callback) {
+ assert_return(s, -EINVAL);
+
+ if (callback && s->disconnect_callback && callback != s->disconnect_callback)
+ return -EBUSY;
+
+ s->disconnect_callback = callback;
+ return 0;
+}
+
+unsigned varlink_server_connections_max(VarlinkServer *s) {
+ int dts;
+
+ /* If a server is specified, return the setting for that server, otherwise the default value */
+ if (s)
+ return s->connections_max;
+
+ dts = getdtablesize();
+ assert_se(dts > 0);
+
+ /* Make sure we never use up more than ¾th of RLIMIT_NOFILE for IPC */
+ if (VARLINK_DEFAULT_CONNECTIONS_MAX > (unsigned) dts / 4 * 3)
+ return dts / 4 * 3;
+
+ return VARLINK_DEFAULT_CONNECTIONS_MAX;
+}
+
+unsigned varlink_server_connections_per_uid_max(VarlinkServer *s) {
+ unsigned m;
+
+ if (s)
+ return s->connections_per_uid_max;
+
+ /* Make sure to never use up more than ¾th of available connections for a single user */
+ m = varlink_server_connections_max(NULL);
+ if (VARLINK_DEFAULT_CONNECTIONS_PER_UID_MAX > m)
+ return m / 4 * 3;
+
+ return VARLINK_DEFAULT_CONNECTIONS_PER_UID_MAX;
+}
+
+int varlink_server_set_connections_per_uid_max(VarlinkServer *s, unsigned m) {
+ assert_return(s, -EINVAL);
+ assert_return(m > 0, -EINVAL);
+
+ s->connections_per_uid_max = m;
+ return 0;
+}
+
+int varlink_server_set_connections_max(VarlinkServer *s, unsigned m) {
+ assert_return(s, -EINVAL);
+ assert_return(m > 0, -EINVAL);
+
+ s->connections_max = m;
+ return 0;
+}
+
+unsigned varlink_server_current_connections(VarlinkServer *s) {
+ assert_return(s, UINT_MAX);
+
+ return s->n_connections;
+}
+
+int varlink_server_set_description(VarlinkServer *s, const char *description) {
+ assert_return(s, -EINVAL);
+
+ return free_and_strdup(&s->description, description);
+}
diff --git a/src/shared/varlink.h b/src/shared/varlink.h
new file mode 100644
index 0000000..7ea1f91
--- /dev/null
+++ b/src/shared/varlink.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "sd-event.h"
+
+#include "json.h"
+#include "time-util.h"
+
+/* A minimal Varlink implementation. We only implement the minimal, obvious bits here though. No validation,
+ * no introspection, no name service, just the stuff actually needed.
+ *
+ * You might wonder why we aren't using libvarlink here? Varlink is a very simple protocol, which allows us
+ * to write our own implementation relatively easily. However, the main reasons are these:
+ *
+ * • We want to use our own JSON subsystem, with all the benefits that brings (i.e. accurate unsigned+signed
+ * 64bit integers, full fuzzing, logging during parsing and so on). If we'd want to use that with
+ * libvarlink we'd have to serialize and deserialize all the time from its own representation which is
+ * inefficient and nasty.
+ *
+ * • We want integration into sd-event, but also synchronous event-loop-less operation
+ *
+ * • We need proper per-UID accounting and access control, since we want to allow communication between
+ * unprivileged clients and privileged servers.
+ *
+ * • And of course, we don't want the name service and introspection stuff for now (though that might
+ * change).
+ */
+
+typedef struct Varlink Varlink;
+typedef struct VarlinkServer VarlinkServer;
+
+typedef enum VarlinkReplyFlags {
+ VARLINK_REPLY_ERROR = 1 << 0,
+ VARLINK_REPLY_CONTINUES = 1 << 1,
+ VARLINK_REPLY_LOCAL = 1 << 2,
+} VarlinkReplyFlags;
+
+typedef enum VarlinkMethodFlags {
+ VARLINK_METHOD_ONEWAY = 1 << 0,
+ VARLINK_METHOD_MORE = 2 << 1,
+} VarlinkMethodFlags;
+
+typedef enum VarlinkServerFlags {
+ VARLINK_SERVER_ROOT_ONLY = 1 << 0, /* Only accessible by root */
+ VARLINK_SERVER_MYSELF_ONLY = 1 << 1, /* Only accessible by our own UID */
+ VARLINK_SERVER_ACCOUNT_UID = 1 << 2, /* Do per user accounting */
+
+ _VARLINK_SERVER_FLAGS_ALL = (1 << 3) - 1,
+} VarlinkServerFlags;
+
+typedef int (*VarlinkMethod)(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata);
+typedef int (*VarlinkReply)(Varlink *link, JsonVariant *parameters, const char *error_id, VarlinkReplyFlags flags, void *userdata);
+typedef int (*VarlinkConnect)(VarlinkServer *server, Varlink *link, void *userdata);
+typedef void (*VarlinkDisconnect)(VarlinkServer *server, Varlink *link, void *userdata);
+
+int varlink_connect_address(Varlink **ret, const char *address);
+int varlink_connect_fd(Varlink **ret, int fd);
+
+Varlink* varlink_ref(Varlink *link);
+Varlink* varlink_unref(Varlink *v);
+
+int varlink_get_fd(Varlink *v);
+int varlink_get_events(Varlink *v);
+int varlink_get_timeout(Varlink *v, usec_t *ret);
+
+int varlink_attach_event(Varlink *v, sd_event *e, int64_t priority);
+void varlink_detach_event(Varlink *v);
+sd_event *varlink_get_event(Varlink *v);
+
+int varlink_process(Varlink *v);
+int varlink_wait(Varlink *v, usec_t timeout);
+
+int varlink_flush(Varlink *v);
+int varlink_close(Varlink *v);
+
+Varlink* varlink_flush_close_unref(Varlink *v);
+Varlink* varlink_close_unref(Varlink *v);
+
+/* Enqueue method call, not expecting a reply */
+int varlink_send(Varlink *v, const char *method, JsonVariant *parameters);
+int varlink_sendb(Varlink *v, const char *method, ...);
+
+/* Send method call and wait for reply */
+int varlink_call(Varlink *v, const char *method, JsonVariant *parameters, JsonVariant **ret_parameters, const char **ret_error_id, VarlinkReplyFlags *ret_flags);
+int varlink_callb(Varlink *v, const char *method, JsonVariant **ret_parameters, const char **ret_error_id, VarlinkReplyFlags *ret_flags, ...);
+
+/* Enqueue method call, expect a reply, which is eventually delivered to the reply callback */
+int varlink_invoke(Varlink *v, const char *method, JsonVariant *parameters);
+int varlink_invokeb(Varlink *v, const char *method, ...);
+
+/* Enqueue method call, expect a reply now, and possibly more later, which are all delivered to the reply callback */
+int varlink_observe(Varlink *v, const char *method, JsonVariant *parameters);
+int varlink_observeb(Varlink *v, const char *method, ...);
+
+/* Enqueue a final reply */
+int varlink_reply(Varlink *v, JsonVariant *parameters);
+int varlink_replyb(Varlink *v, ...);
+
+/* Enqueue a (final) error */
+int varlink_error(Varlink *v, const char *error_id, JsonVariant *parameters);
+int varlink_errorb(Varlink *v, const char *error_id, ...);
+int varlink_error_invalid_parameter(Varlink *v, JsonVariant *parameters);
+int varlink_error_errno(Varlink *v, int error);
+
+/* Enqueue a "more" reply */
+int varlink_notify(Varlink *v, JsonVariant *parameters);
+int varlink_notifyb(Varlink *v, ...);
+
+/* Bind a disconnect, reply or timeout callback */
+int varlink_bind_reply(Varlink *v, VarlinkReply reply);
+
+void* varlink_set_userdata(Varlink *v, void *userdata);
+void* varlink_get_userdata(Varlink *v);
+
+int varlink_get_peer_uid(Varlink *v, uid_t *ret);
+int varlink_get_peer_pid(Varlink *v, pid_t *ret);
+
+int varlink_set_relative_timeout(Varlink *v, usec_t usec);
+
+VarlinkServer* varlink_get_server(Varlink *v);
+
+int varlink_set_description(Varlink *v, const char *d);
+
+/* Create a varlink server */
+int varlink_server_new(VarlinkServer **ret, VarlinkServerFlags flags);
+VarlinkServer *varlink_server_ref(VarlinkServer *s);
+VarlinkServer *varlink_server_unref(VarlinkServer *s);
+
+/* Add addresses or fds to listen on */
+int varlink_server_listen_address(VarlinkServer *s, const char *address, mode_t mode);
+int varlink_server_listen_fd(VarlinkServer *s, int fd);
+int varlink_server_add_connection(VarlinkServer *s, int fd, Varlink **ret);
+
+/* Bind callbacks */
+int varlink_server_bind_method(VarlinkServer *s, const char *method, VarlinkMethod callback);
+int varlink_server_bind_method_many_internal(VarlinkServer *s, ...);
+#define varlink_server_bind_method_many(s, ...) varlink_server_bind_method_many_internal(s, __VA_ARGS__, NULL)
+int varlink_server_bind_connect(VarlinkServer *s, VarlinkConnect connect);
+int varlink_server_bind_disconnect(VarlinkServer *s, VarlinkDisconnect disconnect);
+
+void* varlink_server_set_userdata(VarlinkServer *s, void *userdata);
+void* varlink_server_get_userdata(VarlinkServer *s);
+
+int varlink_server_attach_event(VarlinkServer *v, sd_event *e, int64_t priority);
+int varlink_server_detach_event(VarlinkServer *v);
+sd_event *varlink_server_get_event(VarlinkServer *v);
+
+int varlink_server_shutdown(VarlinkServer *server);
+
+unsigned varlink_server_connections_max(VarlinkServer *s);
+unsigned varlink_server_connections_per_uid_max(VarlinkServer *s);
+
+int varlink_server_set_connections_per_uid_max(VarlinkServer *s, unsigned m);
+int varlink_server_set_connections_max(VarlinkServer *s, unsigned m);
+
+unsigned varlink_server_current_connections(VarlinkServer *s);
+
+int varlink_server_set_description(VarlinkServer *s, const char *description);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(Varlink *, varlink_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Varlink *, varlink_close_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(Varlink *, varlink_flush_close_unref);
+DEFINE_TRIVIAL_CLEANUP_FUNC(VarlinkServer *, varlink_server_unref);
+
+#define VARLINK_ERROR_DISCONNECTED "io.systemd.Disconnected"
+#define VARLINK_ERROR_TIMEOUT "io.systemd.TimedOut"
+#define VARLINK_ERROR_PROTOCOL "io.systemd.Protocol"
+#define VARLINK_ERROR_SYSTEM "io.systemd.System"
+
+#define VARLINK_ERROR_INTERFACE_NOT_FOUND "org.varlink.service.InterfaceNotFound"
+#define VARLINK_ERROR_METHOD_NOT_FOUND "org.varlink.service.MethodNotFound"
+#define VARLINK_ERROR_METHOD_NOT_IMPLEMENTED "org.varlink.service.MethodNotImplemented"
+#define VARLINK_ERROR_INVALID_PARAMETER "org.varlink.service.InvalidParameter"
+#define VARLINK_ERROR_SUBSCRIPTION_TAKEN "org.varlink.service.SubscriptionTaken"
diff --git a/src/shared/verbs.c b/src/shared/verbs.c
new file mode 100644
index 0000000..2d19172
--- /dev/null
+++ b/src/shared/verbs.c
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "env-util.h"
+#include "log.h"
+#include "macro.h"
+#include "process-util.h"
+#include "string-util.h"
+#include "verbs.h"
+#include "virt.h"
+
+/* Wraps running_in_chroot() which is used in various places, but also adds an environment variable check so external
+ * processes can reliably force this on.
+ */
+bool running_in_chroot_or_offline(void) {
+ int r;
+
+ /* Added to support use cases like rpm-ostree, where from %post scripts we only want to execute "preset", but
+ * not "start"/"restart" for example.
+ *
+ * See docs/ENVIRONMENT.md for docs.
+ */
+ r = getenv_bool("SYSTEMD_OFFLINE");
+ if (r < 0 && r != -ENXIO)
+ log_debug_errno(r, "Failed to parse $SYSTEMD_OFFLINE: %m");
+ else if (r >= 0)
+ return r > 0;
+
+ /* We've had this condition check for a long time which basically checks for legacy chroot case like Fedora's
+ * "mock", which is used for package builds. We don't want to try to start systemd services there, since
+ * without --new-chroot we don't even have systemd running, and even if we did, adding a concept of background
+ * daemons to builds would be an enormous change, requiring considering things like how the journal output is
+ * handled, etc. And there's really not a use case today for a build talking to a service.
+ *
+ * Note this call itself also looks for a different variable SYSTEMD_IGNORE_CHROOT=1.
+ */
+ r = running_in_chroot();
+ if (r < 0)
+ log_debug_errno(r, "running_in_chroot(): %m");
+
+ return r > 0;
+}
+
+const Verb* verbs_find_verb(const char *name, const Verb verbs[]) {
+ for (size_t i = 0; verbs[i].dispatch; i++)
+ if (streq_ptr(name, verbs[i].verb) ||
+ (!name && FLAGS_SET(verbs[i].flags, VERB_DEFAULT)))
+ return &verbs[i];
+
+ /* At the end of the list? */
+ return NULL;
+}
+
+int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata) {
+ const Verb *verb;
+ const char *name;
+ int left;
+
+ assert(verbs);
+ assert(verbs[0].dispatch);
+ assert(argc >= 0);
+ assert(argv);
+ assert(argc >= optind);
+
+ left = argc - optind;
+ argv += optind;
+ optind = 0;
+ name = argv[0];
+
+ verb = verbs_find_verb(name, verbs);
+ if (!verb) {
+ if (name)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Unknown command verb %s.", name);
+ else
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Command verb required.");
+ }
+
+ if (!name)
+ left = 1;
+
+ if (verb->min_args != VERB_ANY &&
+ (unsigned) left < verb->min_args)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too few arguments.");
+
+ if (verb->max_args != VERB_ANY &&
+ (unsigned) left > verb->max_args)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Too many arguments.");
+
+ if ((verb->flags & VERB_ONLINE_ONLY) && running_in_chroot_or_offline()) {
+ log_info("Running in chroot, ignoring command '%s'", name ?: verb->verb);
+ return 0;
+ }
+
+ if (name)
+ return verb->dispatch(left, argv, userdata);
+ else {
+ char* fake[2] = {
+ (char*) verb->verb,
+ NULL
+ };
+
+ return verb->dispatch(1, fake, userdata);
+ }
+}
diff --git a/src/shared/verbs.h b/src/shared/verbs.h
new file mode 100644
index 0000000..245bb37
--- /dev/null
+++ b/src/shared/verbs.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#define VERB_ANY ((unsigned) -1)
+
+typedef enum VerbFlags {
+ VERB_DEFAULT = 1 << 0, /* The verb to run if no verb is specified */
+ VERB_ONLINE_ONLY = 1 << 1, /* Just do nothing when running in chroot or offline */
+} VerbFlags;
+
+typedef struct {
+ const char *verb;
+ unsigned min_args, max_args;
+ VerbFlags flags;
+ int (* const dispatch)(int argc, char *argv[], void *userdata);
+} Verb;
+
+bool running_in_chroot_or_offline(void);
+
+const Verb* verbs_find_verb(const char *name, const Verb verbs[]);
+int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata);
diff --git a/src/shared/vlan-util.c b/src/shared/vlan-util.c
new file mode 100644
index 0000000..cb43d50
--- /dev/null
+++ b/src/shared/vlan-util.c
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "conf-parser.h"
+#include "parse-util.h"
+#include "string-util.h"
+#include "vlan-util.h"
+
+int parse_vlanid(const char *p, uint16_t *ret) {
+ uint16_t id;
+ int r;
+
+ assert(p);
+ assert(ret);
+
+ r = safe_atou16(p, &id);
+ if (r < 0)
+ return r;
+ if (!vlanid_is_valid(id))
+ return -ERANGE;
+
+ *ret = id;
+ return 0;
+}
+
+int parse_vid_range(const char *p, uint16_t *vid, uint16_t *vid_end) {
+ unsigned lower, upper;
+ int r;
+
+ r = parse_range(p, &lower, &upper);
+ if (r < 0)
+ return r;
+
+ if (lower > VLANID_MAX || upper > VLANID_MAX || lower > upper)
+ return -EINVAL;
+
+ *vid = lower;
+ *vid_end = upper;
+ return 0;
+}
+
+int config_parse_default_port_vlanid(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+ uint16_t *id = data;
+
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ if (streq(rvalue, "none")) {
+ *id = 0;
+ return 0;
+ }
+
+ return config_parse_vlanid(unit, filename, line, section, section_line,
+ lvalue, ltype, rvalue, data, userdata);
+}
+
+int config_parse_vlanid(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ uint16_t *id = data;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(data);
+
+ r = parse_vlanid(rvalue, id);
+ if (r == -ERANGE) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "VLAN identifier outside of valid range 0…4094, ignoring: %s", rvalue);
+ return 0;
+ }
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r,
+ "Failed to parse VLAN identifier value, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/src/shared/vlan-util.h b/src/shared/vlan-util.h
new file mode 100644
index 0000000..0336908
--- /dev/null
+++ b/src/shared/vlan-util.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include "conf-parser.h"
+
+#define VLANID_MAX 4094
+#define VLANID_INVALID UINT16_MAX
+
+/* Note that we permit VLAN Id 0 here, as that is apparently OK by the Linux kernel */
+static inline bool vlanid_is_valid(uint16_t id) {
+ return id <= VLANID_MAX;
+}
+
+int parse_vlanid(const char *p, uint16_t *ret);
+int parse_vid_range(const char *p, uint16_t *vid, uint16_t *vid_end);
+
+CONFIG_PARSER_PROTOTYPE(config_parse_default_port_vlanid);
+CONFIG_PARSER_PROTOTYPE(config_parse_vlanid);
diff --git a/src/shared/volatile-util.c b/src/shared/volatile-util.c
new file mode 100644
index 0000000..3323897
--- /dev/null
+++ b/src/shared/volatile-util.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+
+#include "alloc-util.h"
+#include "macro.h"
+#include "parse-util.h"
+#include "proc-cmdline.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "volatile-util.h"
+
+int query_volatile_mode(VolatileMode *ret) {
+ _cleanup_free_ char *mode = NULL;
+ int r;
+
+ r = proc_cmdline_get_key("systemd.volatile", PROC_CMDLINE_VALUE_OPTIONAL, &mode);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ *ret = VOLATILE_NO;
+ return 0;
+ }
+
+ if (mode) {
+ VolatileMode m;
+
+ m = volatile_mode_from_string(mode);
+ if (m < 0)
+ return -EINVAL;
+
+ *ret = m;
+ } else
+ *ret = VOLATILE_YES;
+
+ return 1;
+}
+
+static const char* const volatile_mode_table[_VOLATILE_MODE_MAX] = {
+ [VOLATILE_NO] = "no",
+ [VOLATILE_YES] = "yes",
+ [VOLATILE_STATE] = "state",
+ [VOLATILE_OVERLAY] = "overlay",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(volatile_mode, VolatileMode, VOLATILE_YES);
diff --git a/src/shared/volatile-util.h b/src/shared/volatile-util.h
new file mode 100644
index 0000000..9a1bb38
--- /dev/null
+++ b/src/shared/volatile-util.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+typedef enum VolatileMode {
+ VOLATILE_NO,
+ VOLATILE_YES,
+ VOLATILE_STATE,
+ VOLATILE_OVERLAY,
+ _VOLATILE_MODE_MAX,
+ _VOLATILE_MODE_INVALID = -1
+} VolatileMode;
+
+VolatileMode volatile_mode_from_string(const char *s);
+const char* volatile_mode_to_string(VolatileMode m);
+
+int query_volatile_mode(VolatileMode *ret);
diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c
new file mode 100644
index 0000000..d33acaf
--- /dev/null
+++ b/src/shared/watchdog.c
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <linux/watchdog.h>
+
+#include "errno-util.h"
+#include "fd-util.h"
+#include "log.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "watchdog.h"
+
+static int watchdog_fd = -1;
+static char *watchdog_device = NULL;
+static usec_t watchdog_timeout = USEC_INFINITY;
+static usec_t watchdog_last_ping = USEC_INFINITY;
+
+static int update_timeout(void) {
+ if (watchdog_fd < 0)
+ return 0;
+ if (watchdog_timeout == USEC_INFINITY)
+ return 0;
+
+ if (watchdog_timeout == 0) {
+ int flags;
+
+ flags = WDIOS_DISABLECARD;
+ if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0)
+ return log_warning_errno(errno, "Failed to disable hardware watchdog: %m");
+ } else {
+ char buf[FORMAT_TIMESPAN_MAX];
+ int sec, flags;
+ usec_t t;
+
+ t = DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC);
+ sec = (int) t >= INT_MAX ? INT_MAX : t; /* Saturate */
+ if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec) < 0)
+ return log_warning_errno(errno, "Failed to set timeout to %is: %m", sec);
+
+ watchdog_timeout = (usec_t) sec * USEC_PER_SEC;
+ log_info("Set hardware watchdog to %s.", format_timespan(buf, sizeof(buf), watchdog_timeout, 0));
+
+ flags = WDIOS_ENABLECARD;
+ if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0) {
+ /* ENOTTY means the watchdog is always enabled so we're fine */
+ log_full(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING,
+ "Failed to enable hardware watchdog: %m");
+ if (!ERRNO_IS_NOT_SUPPORTED(errno))
+ return -errno;
+ }
+
+ if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
+ return log_warning_errno(errno, "Failed to ping hardware watchdog: %m");
+
+ watchdog_last_ping = now(clock_boottime_or_monotonic());
+ }
+
+ return 0;
+}
+
+static int open_watchdog(void) {
+ struct watchdog_info ident;
+ const char *fn;
+
+ if (watchdog_fd >= 0)
+ return 0;
+
+ fn = watchdog_device ?: "/dev/watchdog";
+ watchdog_fd = open(fn, O_WRONLY|O_CLOEXEC);
+ if (watchdog_fd < 0)
+ return log_debug_errno(errno, "Failed to open watchdog device %s: %m", fn);
+
+ if (ioctl(watchdog_fd, WDIOC_GETSUPPORT, &ident) < 0)
+ log_debug_errno(errno, "Hardware watchdog %s does not support WDIOC_GETSUPPORT ioctl: %m", fn);
+ else
+ log_info("Using hardware watchdog '%s', version %x, device %s",
+ ident.identity,
+ ident.firmware_version,
+ fn);
+
+ return update_timeout();
+}
+
+int watchdog_set_device(char *path) {
+ int r;
+
+ r = free_and_strdup(&watchdog_device, path);
+ if (r < 0)
+ return r;
+
+ if (r > 0) /* watchdog_device changed */
+ watchdog_fd = safe_close(watchdog_fd);
+
+ return r;
+}
+
+int watchdog_set_timeout(usec_t *usec) {
+ int r;
+
+ watchdog_timeout = *usec;
+
+ /* If we didn't open the watchdog yet and didn't get any explicit timeout value set, don't do
+ * anything */
+ if (watchdog_fd < 0 && watchdog_timeout == USEC_INFINITY)
+ return 0;
+
+ if (watchdog_fd < 0)
+ r = open_watchdog();
+ else
+ r = update_timeout();
+
+ *usec = watchdog_timeout;
+ return r;
+}
+
+usec_t watchdog_runtime_wait(void) {
+ usec_t rtwait, ntime;
+
+ if (!timestamp_is_set(watchdog_timeout))
+ return USEC_INFINITY;
+
+ /* Sleep half the watchdog timeout since the last successful ping at most */
+ if (timestamp_is_set(watchdog_last_ping)) {
+ ntime = now(clock_boottime_or_monotonic());
+ assert(ntime >= watchdog_last_ping);
+ rtwait = usec_sub_unsigned(watchdog_last_ping + (watchdog_timeout / 2), ntime);
+ } else
+ rtwait = watchdog_timeout / 2;
+
+ return rtwait;
+}
+
+int watchdog_ping(void) {
+ usec_t ntime;
+ int r;
+
+ ntime = now(clock_boottime_or_monotonic());
+
+ /* Never ping earlier than watchdog_timeout/4 and try to ping
+ * by watchdog_timeout/2 plus scheduling latencies the latest */
+ if (timestamp_is_set(watchdog_last_ping)) {
+ assert(ntime >= watchdog_last_ping);
+ if ((ntime - watchdog_last_ping) < (watchdog_timeout / 4))
+ return 0;
+ }
+
+ if (watchdog_fd < 0) {
+ r = open_watchdog();
+ if (r < 0)
+ return r;
+ }
+
+ if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
+ return log_warning_errno(errno, "Failed to ping hardware watchdog: %m");
+
+ watchdog_last_ping = ntime;
+ return 0;
+}
+
+void watchdog_close(bool disarm) {
+ if (watchdog_fd < 0)
+ return;
+
+ if (disarm) {
+ int flags;
+
+ /* Explicitly disarm it */
+ flags = WDIOS_DISABLECARD;
+ if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0)
+ log_warning_errno(errno, "Failed to disable hardware watchdog: %m");
+
+ /* To be sure, use magic close logic, too */
+ for (;;) {
+ static const char v = 'V';
+
+ if (write(watchdog_fd, &v, 1) > 0)
+ break;
+
+ if (errno != EINTR) {
+ log_error_errno(errno, "Failed to disarm watchdog timer: %m");
+ break;
+ }
+ }
+ }
+
+ watchdog_fd = safe_close(watchdog_fd);
+}
diff --git a/src/shared/watchdog.h b/src/shared/watchdog.h
new file mode 100644
index 0000000..b7587db
--- /dev/null
+++ b/src/shared/watchdog.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "time-util.h"
+#include "util.h"
+
+int watchdog_set_device(char *path);
+int watchdog_set_timeout(usec_t *usec);
+int watchdog_ping(void);
+void watchdog_close(bool disarm);
+usec_t watchdog_runtime_wait(void);
+
+static inline void watchdog_free_device(void) {
+ (void) watchdog_set_device(NULL);
+}
diff --git a/src/shared/web-util.c b/src/shared/web-util.c
new file mode 100644
index 0000000..82cd5fb
--- /dev/null
+++ b/src/shared/web-util.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdbool.h>
+
+#include "string-util.h"
+#include "strv.h"
+#include "utf8.h"
+#include "web-util.h"
+
+bool http_etag_is_valid(const char *etag) {
+ if (isempty(etag))
+ return false;
+
+ if (!endswith(etag, "\""))
+ return false;
+
+ if (!STARTSWITH_SET(etag, "\"", "W/\""))
+ return false;
+
+ return true;
+}
+
+bool http_url_is_valid(const char *url) {
+ const char *p;
+
+ if (isempty(url))
+ return false;
+
+ p = STARTSWITH_SET(url, "http://", "https://");
+ if (!p)
+ return false;
+
+ if (isempty(p))
+ return false;
+
+ return ascii_is_valid(p);
+}
+
+bool documentation_url_is_valid(const char *url) {
+ const char *p;
+
+ if (isempty(url))
+ return false;
+
+ if (http_url_is_valid(url))
+ return true;
+
+ p = STARTSWITH_SET(url, "file:/", "info:", "man:");
+ if (isempty(p))
+ return false;
+
+ return ascii_is_valid(p);
+}
diff --git a/src/shared/web-util.h b/src/shared/web-util.h
new file mode 100644
index 0000000..ec54669
--- /dev/null
+++ b/src/shared/web-util.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+
+#include "macro.h"
+
+bool http_url_is_valid(const char *url) _pure_;
+
+bool documentation_url_is_valid(const char *url) _pure_;
+
+bool http_etag_is_valid(const char *etag);
diff --git a/src/shared/wifi-util.c b/src/shared/wifi-util.c
new file mode 100644
index 0000000..2ac8846
--- /dev/null
+++ b/src/shared/wifi-util.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "log.h"
+#include "wifi-util.h"
+
+int wifi_get_interface(sd_netlink *genl, int ifindex, enum nl80211_iftype *iftype, char **ssid) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL;
+ sd_genl_family family;
+ int r;
+
+ assert(genl);
+ assert(ifindex > 0);
+
+ r = sd_genl_message_new(genl, SD_GENL_NL80211, NL80211_CMD_GET_INTERFACE, &m);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create generic netlink message: %m");
+
+ r = sd_netlink_message_append_u32(m, NL80211_ATTR_IFINDEX, ifindex);
+ if (r < 0)
+ return log_debug_errno(r, "Could not append NL80211_ATTR_IFINDEX attribute: %m");
+
+ r = sd_netlink_call(genl, m, 0, &reply);
+ if (r == -ENODEV) {
+ /* For obsolete WEXT driver. */
+ log_debug_errno(r, "Failed to request information about wifi interface %d. "
+ "The device doesn't seem to have nl80211 interface. Ignoring.",
+ ifindex);
+ goto nodata;
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to request information about wifi interface %d: %m", ifindex);
+ if (!reply) {
+ log_debug_errno(r, "No reply received to request for information about wifi interface %d, ignoring.", ifindex);
+ goto nodata;
+ }
+
+ r = sd_netlink_message_get_errno(reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get information about wifi interface %d: %m", ifindex);
+
+ r = sd_genl_message_get_family(genl, reply, &family);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine genl family: %m");
+ if (family != SD_GENL_NL80211) {
+ log_debug("Received message of unexpected genl family %u, ignoring.", family);
+ goto nodata;
+ }
+
+ if (iftype) {
+ uint32_t t;
+
+ r = sd_netlink_message_read_u32(reply, NL80211_ATTR_IFTYPE, &t);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get NL80211_ATTR_IFTYPE attribute: %m");
+ *iftype = t;
+ }
+
+ if (ssid) {
+ r = sd_netlink_message_read_string_strdup(reply, NL80211_ATTR_SSID, ssid);
+ if (r == -ENODATA)
+ *ssid = NULL;
+ else if (r < 0)
+ return log_debug_errno(r, "Failed to get NL80211_ATTR_SSID attribute: %m");
+ }
+
+ return 1;
+
+nodata:
+ if (iftype)
+ *iftype = 0;
+ if (ssid)
+ *ssid = NULL;
+ return 0;
+}
+
+int wifi_get_station(sd_netlink *genl, int ifindex, struct ether_addr *bssid) {
+ _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL, *reply = NULL;
+ sd_genl_family family;
+ int r;
+
+ assert(genl);
+ assert(ifindex > 0);
+ assert(bssid);
+
+ r = sd_genl_message_new(genl, SD_GENL_NL80211, NL80211_CMD_GET_STATION, &m);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create generic netlink message: %m");
+
+ r = sd_netlink_message_set_flags(m, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to set dump flag: %m");
+
+ r = sd_netlink_message_append_u32(m, NL80211_ATTR_IFINDEX, ifindex);
+ if (r < 0)
+ return log_debug_errno(r, "Could not append NL80211_ATTR_IFINDEX attribute: %m");
+
+ r = sd_netlink_call(genl, m, 0, &reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to request information about wifi station: %m");
+ if (!reply) {
+ log_debug_errno(r, "No reply received to request for information about wifi station, ignoring.");
+ goto nodata;
+ }
+
+ r = sd_netlink_message_get_errno(reply);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get information about wifi station: %m");
+
+ r = sd_genl_message_get_family(genl, reply, &family);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine genl family: %m");
+ if (family != SD_GENL_NL80211) {
+ log_debug("Received message of unexpected genl family %u, ignoring.", family);
+ goto nodata;
+ }
+
+ r = sd_netlink_message_read_ether_addr(reply, NL80211_ATTR_MAC, bssid);
+ if (r == -ENODATA)
+ goto nodata;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get NL80211_ATTR_MAC attribute: %m");
+
+ return 1;
+
+nodata:
+ *bssid = (struct ether_addr) {};
+ return 0;
+}
diff --git a/src/shared/wifi-util.h b/src/shared/wifi-util.h
new file mode 100644
index 0000000..0ce4137
--- /dev/null
+++ b/src/shared/wifi-util.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include <linux/nl80211.h>
+#include <net/ethernet.h>
+
+#include "sd-netlink.h"
+
+int wifi_get_interface(sd_netlink *genl, int ifindex, enum nl80211_iftype *iftype, char **ssid);
+int wifi_get_station(sd_netlink *genl, int ifindex, struct ether_addr *bssid);
diff --git a/src/shared/xml.c b/src/shared/xml.c
new file mode 100644
index 0000000..8ff3fea
--- /dev/null
+++ b/src/shared/xml.c
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <stddef.h>
+
+#include "macro.h"
+#include "string-util.h"
+#include "xml.h"
+
+enum {
+ STATE_NULL,
+ STATE_TEXT,
+ STATE_TAG,
+ STATE_ATTRIBUTE,
+};
+
+static void inc_lines(unsigned *line, const char *s, size_t n) {
+ const char *p = s;
+
+ if (!line)
+ return;
+
+ for (;;) {
+ const char *f;
+
+ f = memchr(p, '\n', n);
+ if (!f)
+ return;
+
+ n -= (f - p) + 1;
+ p = f + 1;
+ (*line)++;
+ }
+}
+
+/* We don't actually do real XML here. We only read a simplistic
+ * subset, that is a bit less strict that XML and lacks all the more
+ * complex features, like entities, or namespaces. However, we do
+ * support some HTML5-like simplifications */
+
+int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
+ const char *c, *e, *b;
+ char *ret;
+ int t;
+
+ assert(p);
+ assert(*p);
+ assert(name);
+ assert(state);
+
+ t = PTR_TO_INT(*state);
+ c = *p;
+
+ if (t == STATE_NULL) {
+ if (line)
+ *line = 1;
+ t = STATE_TEXT;
+ }
+
+ for (;;) {
+ if (*c == 0)
+ return XML_END;
+
+ switch (t) {
+
+ case STATE_TEXT: {
+ int x;
+
+ e = strchrnul(c, '<');
+ if (e > c) {
+ /* More text... */
+ ret = strndup(c, e - c);
+ if (!ret)
+ return -ENOMEM;
+
+ inc_lines(line, c, e - c);
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_TEXT);
+
+ return XML_TEXT;
+ }
+
+ assert(*e == '<');
+ b = c + 1;
+
+ if (startswith(b, "!--")) {
+ /* A comment */
+ e = strstr(b + 3, "-->");
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 3 - b);
+
+ c = e + 3;
+ continue;
+ }
+
+ if (*b == '?') {
+ /* Processing instruction */
+
+ e = strstr(b + 1, "?>");
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 2 - b);
+
+ c = e + 2;
+ continue;
+ }
+
+ if (*b == '!') {
+ /* DTD */
+
+ e = strchr(b + 1, '>');
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, b, e + 1 - b);
+
+ c = e + 1;
+ continue;
+ }
+
+ if (*b == '/') {
+ /* A closing tag */
+ x = XML_TAG_CLOSE;
+ b++;
+ } else
+ x = XML_TAG_OPEN;
+
+ e = strpbrk(b, WHITESPACE "/>");
+ if (!e)
+ return -EINVAL;
+
+ ret = strndup(b, e - b);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_TAG);
+
+ return x;
+ }
+
+ case STATE_TAG:
+
+ b = c + strspn(c, WHITESPACE);
+ if (*b == 0)
+ return -EINVAL;
+
+ inc_lines(line, c, b - c);
+
+ e = b + strcspn(b, WHITESPACE "=/>");
+ if (e > b) {
+ /* An attribute */
+
+ ret = strndup(b, e - b);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e;
+ *state = INT_TO_PTR(STATE_ATTRIBUTE);
+
+ return XML_ATTRIBUTE_NAME;
+ }
+
+ if (startswith(b, "/>")) {
+ /* An empty tag */
+
+ *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
+ *p = b + 2;
+ *state = INT_TO_PTR(STATE_TEXT);
+
+ return XML_TAG_CLOSE_EMPTY;
+ }
+
+ if (*b != '>')
+ return -EINVAL;
+
+ c = b + 1;
+ t = STATE_TEXT;
+ continue;
+
+ case STATE_ATTRIBUTE:
+
+ if (*c == '=') {
+ c++;
+
+ if (IN_SET(*c, '\'', '"')) {
+ /* Tag with a quoted value */
+
+ e = strchr(c+1, *c);
+ if (!e)
+ return -EINVAL;
+
+ inc_lines(line, c, e - c);
+
+ ret = strndup(c+1, e - c - 1);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = e + 1;
+ *state = INT_TO_PTR(STATE_TAG);
+
+ return XML_ATTRIBUTE_VALUE;
+
+ }
+
+ /* Tag with a value without quotes */
+
+ b = strpbrk(c, WHITESPACE ">");
+ if (!b)
+ b = c;
+
+ ret = strndup(c, b - c);
+ if (!ret)
+ return -ENOMEM;
+
+ *name = ret;
+ *p = b;
+ *state = INT_TO_PTR(STATE_TAG);
+ return XML_ATTRIBUTE_VALUE;
+ }
+
+ t = STATE_TAG;
+ continue;
+ }
+
+ }
+
+ assert_not_reached("Bad state");
+}
diff --git a/src/shared/xml.h b/src/shared/xml.h
new file mode 100644
index 0000000..217b3b0
--- /dev/null
+++ b/src/shared/xml.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+enum {
+ XML_END,
+ XML_TEXT,
+ XML_TAG_OPEN,
+ XML_TAG_CLOSE,
+ XML_TAG_CLOSE_EMPTY,
+ XML_ATTRIBUTE_NAME,
+ XML_ATTRIBUTE_VALUE,
+};
+
+int xml_tokenize(const char **p, char **name, void **state, unsigned *line);